From 1d8fff907342d2339796dbd27ea47d0e76a6a2d0 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 21 Jul 2015 10:43:54 +0200 Subject: ip_tunnel: Make ovs_tunnel_info and ovs_key_ipv4_tunnel generic Rename the tunnel metadata data structures currently internal to OVS and make them generic for use by all IP tunnels. Both structures are kernel internal and will stay that way. Their members are exposed to user space through individual Netlink attributes by OVS. It will therefore be possible to extend/modify these structures without affecting user ABI. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/openvswitch/actions.c') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 8a8c0b8b4f63..27c1687cfd92 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -611,7 +611,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr, const struct nlattr *actions, int actions_len) { - struct ovs_tunnel_info info; + struct ip_tunnel_info info; struct dp_upcall_info upcall; const struct nlattr *a; int rem; -- cgit v1.2.3 From 34ae932a40369be6bd6ea97d66b6686361b4370d Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 21 Jul 2015 10:44:03 +0200 Subject: openvswitch: Make tunnel set action attach a metadata dst Utilize the new metadata dst to attach encapsulation instructions to the skb. The existing egress_tun_info via the OVS_CB() is left in place until all tunnel vports have been converted to the new method. Signed-off-by: Thomas Graf Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 10 ++++++- net/openvswitch/datapath.c | 8 +++--- net/openvswitch/flow.h | 5 ++++ net/openvswitch/flow_netlink.c | 64 +++++++++++++++++++++++++++++++++++++----- net/openvswitch/flow_netlink.h | 1 + net/openvswitch/flow_table.c | 4 ++- 6 files changed, 79 insertions(+), 13 deletions(-) (limited to 'net/openvswitch/actions.c') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 27c1687cfd92..cf04c2f8b32a 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -733,7 +733,15 @@ static int execute_set_action(struct sk_buff *skb, { /* Only tunnel set execution is supported without a mask. */ if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) { - OVS_CB(skb)->egress_tun_info = nla_data(a); + struct ovs_tunnel_info *tun = nla_data(a); + + skb_dst_drop(skb); + dst_hold((struct dst_entry *)tun->tun_dst); + skb_dst_set(skb, (struct dst_entry *)tun->tun_dst); + + /* FIXME: Remove when all vports have been converted */ + OVS_CB(skb)->egress_tun_info = &tun->tun_dst->u.tun_info; + return 0; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ff8c4a4c1609..02082107c74c 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1018,7 +1018,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } ovs_unlock(); - ovs_nla_free_flow_actions(old_acts); + ovs_nla_free_flow_actions_rcu(old_acts); ovs_flow_free(new_flow, false); } @@ -1030,7 +1030,7 @@ err_unlock_ovs: ovs_unlock(); kfree_skb(reply); err_kfree_acts: - kfree(acts); + ovs_nla_free_flow_actions(acts); err_kfree_flow: ovs_flow_free(new_flow, false); error: @@ -1157,7 +1157,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) if (reply) ovs_notify(&dp_flow_genl_family, reply, info); if (old_acts) - ovs_nla_free_flow_actions(old_acts); + ovs_nla_free_flow_actions_rcu(old_acts); return 0; @@ -1165,7 +1165,7 @@ err_unlock_ovs: ovs_unlock(); kfree_skb(reply); err_kfree_acts: - kfree(acts); + ovs_nla_free_flow_actions(acts); error: return error; } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index cadc6c5c3545..b62cdb3e3589 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -33,6 +33,7 @@ #include #include #include +#include struct sk_buff; @@ -45,6 +46,10 @@ struct sk_buff; #define TUN_METADATA_OPTS(flow_key, opt_len) \ ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len))) +struct ovs_tunnel_info { + struct metadata_dst *tun_dst; +}; + #define OVS_SW_FLOW_KEY_METADATA_SIZE \ (offsetof(struct sw_flow_key, recirc_id) + \ FIELD_SIZEOF(struct sw_flow_key, recirc_id)) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ecfa530d3461..e7906dfb8814 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1548,11 +1548,48 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) return sfa; } +static void ovs_nla_free_set_action(const struct nlattr *a) +{ + const struct nlattr *ovs_key = nla_data(a); + struct ovs_tunnel_info *ovs_tun; + + switch (nla_type(ovs_key)) { + case OVS_KEY_ATTR_TUNNEL_INFO: + ovs_tun = nla_data(ovs_key); + dst_release((struct dst_entry *)ovs_tun->tun_dst); + break; + } +} + +void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +{ + const struct nlattr *a; + int rem; + + if (!sf_acts) + return; + + nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) { + switch (nla_type(a)) { + case OVS_ACTION_ATTR_SET: + ovs_nla_free_set_action(a); + break; + } + } + + kfree(sf_acts); +} + +static void __ovs_nla_free_flow_actions(struct rcu_head *head) +{ + ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu)); +} + /* Schedules 'sf_acts' to be freed after the next RCU grace period. * The caller must hold rcu_read_lock for this to be sensible. */ -void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts) { - kfree_rcu(sf_acts, rcu); + call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions); } static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, @@ -1746,7 +1783,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, { struct sw_flow_match match; struct sw_flow_key key; + struct metadata_dst *tun_dst; struct ip_tunnel_info *tun_info; + struct ovs_tunnel_info *ovs_tun; struct nlattr *a; int err = 0, start, opts_type; @@ -1771,12 +1810,22 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, if (start < 0) return start; + tun_dst = metadata_dst_alloc(key.tun_opts_len, GFP_KERNEL); + if (!tun_dst) + return -ENOMEM; + a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, - sizeof(*tun_info) + key.tun_opts_len, log); - if (IS_ERR(a)) + sizeof(*ovs_tun), log); + if (IS_ERR(a)) { + dst_release((struct dst_entry *)tun_dst); return PTR_ERR(a); + } + + ovs_tun = nla_data(a); + ovs_tun->tun_dst = tun_dst; - tun_info = nla_data(a); + tun_info = &tun_dst->u.tun_info; + tun_info->mode = IP_TUNNEL_INFO_TX; tun_info->key = key.tun_key; tun_info->options_len = key.tun_opts_len; @@ -2177,7 +2226,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr, err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, key->eth.tci, log); if (err) - kfree(*sfa); + ovs_nla_free_flow_actions(*sfa); return err; } @@ -2227,7 +2276,8 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) switch (key_type) { case OVS_KEY_ATTR_TUNNEL_INFO: { - struct ip_tunnel_info *tun_info = nla_data(ovs_key); + struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key); + struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info; start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); if (!start) diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index ec53eb6e632b..acd074408f0a 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -69,5 +69,6 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); void ovs_nla_free_flow_actions(struct sw_flow_actions *); +void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *); #endif /* flow_netlink.h */ diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 4613df8c8290..b70d845e7efb 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -18,6 +18,7 @@ #include "flow.h" #include "datapath.h" +#include "flow_netlink.h" #include #include #include @@ -143,7 +144,8 @@ static void flow_free(struct sw_flow *flow) if (ovs_identifier_is_key(&flow->id)) kfree(flow->id.unmasked_key); - kfree((struct sw_flow_actions __force *)flow->sf_acts); + if (flow->sf_acts) + ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); for_each_node(node) if (flow->stats[node]) kmem_cache_free(flow_stats_cache, -- cgit v1.2.3 From e05176a3283822bd32a1f3d929ce2050232299a8 Mon Sep 17 00:00:00 2001 From: Wenyu Zhang Date: Wed, 5 Aug 2015 00:30:47 -0700 Subject: openvswitch: Make 100 percents packets sampled when sampling rate is 1. When sampling rate is 1, the sampling probability is UINT32_MAX. The packet should be sampled even the prandom32() generate the number of UINT32_MAX. And none packet need be sampled when the probability is 0. Signed-off-by: Wenyu Zhang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/openvswitch/actions.c') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index cf04c2f8b32a..a0ac410e9570 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -669,9 +669,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb, for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { + u32 probability; + switch (nla_type(a)) { case OVS_SAMPLE_ATTR_PROBABILITY: - if (prandom_u32() >= nla_get_u32(a)) + probability = nla_get_u32(a); + if (!probability || prandom_u32() > probability) return 0; break; -- cgit v1.2.3 From 4b048d6d9d0b0b90e1e94f2393796bbf1fa8df4e Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 17 Aug 2015 13:42:25 -0700 Subject: net: Change pseudohdr argument of inet_proto_csum_replace* to be a bool inet_proto_csum_replace4,2,16 take a pseudohdr argument which indicates the checksum field carries a pseudo header. This argument should be a boolean instead of an int. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/checksum.h | 6 +++--- net/core/filter.c | 2 +- net/core/utils.c | 4 ++-- net/ipv4/netfilter/ipt_ECN.c | 2 +- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 4 ++-- net/ipv4/netfilter/nf_nat_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 4 ++-- net/ipv6/netfilter/nf_nat_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_seqadj.c | 9 +++++---- net/netfilter/nf_nat_proto_dccp.c | 2 +- net/netfilter/nf_nat_proto_tcp.c | 2 +- net/netfilter/nf_nat_proto_udp.c | 2 +- net/netfilter/nf_nat_proto_udplite.c | 2 +- net/netfilter/nf_synproxy_core.c | 2 +- net/netfilter/xt_TCPMSS.c | 8 ++++---- net/netfilter/xt_TCPOPTSTRIP.c | 2 +- net/openvswitch/actions.c | 12 ++++++------ net/sched/act_nat.c | 7 ++++--- 18 files changed, 38 insertions(+), 36 deletions(-) (limited to 'net/openvswitch/actions.c') diff --git a/include/net/checksum.h b/include/net/checksum.h index 2d1d73cb773e..619f3445d57e 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -140,14 +140,14 @@ static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, - __be32 from, __be32 to, int pseudohdr); + __be32 from, __be32 to, bool pseudohdr); void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, - int pseudohdr); + bool pseudohdr); static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, __be16 from, __be16 to, - int pseudohdr) + bool pseudohdr) { inet_proto_csum_replace4(sum, skb, (__force __be32)from, (__force __be32)to, pseudohdr); diff --git a/net/core/filter.c b/net/core/filter.c index f8184222465e..83f08cefeab7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1349,7 +1349,7 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = { static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; - u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags); + bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags); int offset = (int) r2; __sum16 sum, *ptr; diff --git a/net/core/utils.c b/net/core/utils.c index a7732a068043..cd7d202f340e 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -301,7 +301,7 @@ out: EXPORT_SYMBOL(in6_pton); void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, - __be32 from, __be32 to, int pseudohdr) + __be32 from, __be32 to, bool pseudohdr) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace4(sum, from, to); @@ -318,7 +318,7 @@ EXPORT_SYMBOL(inet_proto_csum_replace4); void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, - int pseudohdr) + bool pseudohdr) { __be32 diff[] = { ~from[0], ~from[1], ~from[2], ~from[3], diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 4bf3dc49ad1e..270765236f5e 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -72,7 +72,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) tcph->cwr = einfo->proto.tcp.cwr; inet_proto_csum_replace2(&tcph->check, skb, - oldval, ((__be16 *)tcph)[6], 0); + oldval, ((__be16 *)tcph)[6], false); return true; } diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index e59cc05c09e9..22f4579b0c2a 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -120,7 +120,7 @@ static void nf_nat_ipv4_csum_update(struct sk_buff *skb, oldip = iph->daddr; newip = t->dst.u3.ip; } - inet_proto_csum_replace4(check, skb, oldip, newip, 1); + inet_proto_csum_replace4(check, skb, oldip, newip, true); } static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, @@ -151,7 +151,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, } } else inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), 1); + htons(oldlen), htons(datalen), true); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 4557b4ab8342..7b98baa13ede 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -67,7 +67,7 @@ icmp_manip_pkt(struct sk_buff *skb, hdr = (struct icmphdr *)(skb->data + hdroff); inet_proto_csum_replace2(&hdr->checksum, skb, - hdr->un.echo.id, tuple->src.u.icmp.id, 0); + hdr->un.echo.id, tuple->src.u.icmp.id, false); hdr->un.echo.id = tuple->src.u.icmp.id; return true; } diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index e76900e0aa92..70fbaed49edb 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -124,7 +124,7 @@ static void nf_nat_ipv6_csum_update(struct sk_buff *skb, newip = &t->dst.u3.in6; } inet_proto_csum_replace16(check, skb, oldip->s6_addr32, - newip->s6_addr32, 1); + newip->s6_addr32, true); } static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, @@ -155,7 +155,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, } } else inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), 1); + htons(oldlen), htons(datalen), true); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c index 2205e8eeeacf..57593b00c5b4 100644 --- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c @@ -73,7 +73,7 @@ icmpv6_manip_pkt(struct sk_buff *skb, hdr->icmp6_type == ICMPV6_ECHO_REPLY) { inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, hdr->icmp6_identifier, - tuple->src.u.icmp.id, 0); + tuple->src.u.icmp.id, false); hdr->icmp6_identifier = tuple->src.u.icmp.id; } return true; diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index ce3e840c8704..dff0f0cc59e4 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -103,9 +103,9 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb, ntohl(sack->end_seq), ntohl(new_end_seq)); inet_proto_csum_replace4(&tcph->check, skb, - sack->start_seq, new_start_seq, 0); + sack->start_seq, new_start_seq, false); inet_proto_csum_replace4(&tcph->check, skb, - sack->end_seq, new_end_seq, 0); + sack->end_seq, new_end_seq, false); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); @@ -193,8 +193,9 @@ int nf_ct_seq_adjust(struct sk_buff *skb, newseq = htonl(ntohl(tcph->seq) + seqoff); newack = htonl(ntohl(tcph->ack_seq) - ackoff); - inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); - inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); + inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false); + inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, + false); pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c index b8067b53ff3a..15c47b246d0d 100644 --- a/net/netfilter/nf_nat_proto_dccp.c +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -69,7 +69,7 @@ dccp_manip_pkt(struct sk_buff *skb, l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype); inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, - 0); + false); return true; } diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c index 37f5505f4529..4f8820fc5148 100644 --- a/net/netfilter/nf_nat_proto_tcp.c +++ b/net/netfilter/nf_nat_proto_tcp.c @@ -70,7 +70,7 @@ tcp_manip_pkt(struct sk_buff *skb, return true; l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); + inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false); return true; } diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c index b0ede2f0d8bc..b1e627227b6e 100644 --- a/net/netfilter/nf_nat_proto_udp.c +++ b/net/netfilter/nf_nat_proto_udp.c @@ -57,7 +57,7 @@ udp_manip_pkt(struct sk_buff *skb, l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, - 0); + false); if (!hdr->check) hdr->check = CSUM_MANGLED_0; } diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c index 368f14e01e75..58340c97bd83 100644 --- a/net/netfilter/nf_nat_proto_udplite.c +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -56,7 +56,7 @@ udplite_manip_pkt(struct sk_buff *skb, } l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); + inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, false); if (!hdr->check) hdr->check = CSUM_MANGLED_0; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index d7f168527903..14f8b43ec5a7 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -225,7 +225,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, synproxy->tsoff); } inet_proto_csum_replace4(&th->check, skb, - old, *ptr, 0); + old, *ptr, false); return 1; } optoff += op[1]; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 8c3190e2fc6a..8c02501a530f 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -144,7 +144,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, inet_proto_csum_replace2(&tcph->check, skb, htons(oldmss), htons(newmss), - 0); + false); return 0; } } @@ -185,18 +185,18 @@ tcpmss_mangle_packet(struct sk_buff *skb, memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr)); inet_proto_csum_replace2(&tcph->check, skb, - htons(len), htons(len + TCPOLEN_MSS), 1); + htons(len), htons(len + TCPOLEN_MSS), true); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = newmss & 0x00ff; - inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0); + inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), false); oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; inet_proto_csum_replace2(&tcph->check, skb, - oldval, ((__be16 *)tcph)[6], 0); + oldval, ((__be16 *)tcph)[6], false); return TCPOLEN_MSS; } diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 625fa1d636a0..eb92bffff11c 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -80,7 +80,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, n <<= 8; } inet_proto_csum_replace2(&tcph->check, skb, htons(o), - htons(n), 0); + htons(n), false); } memset(opt + i, TCPOPT_NOP, optl); } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 14da52ddd327..4f4200717bef 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -284,14 +284,14 @@ static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, if (nh->protocol == IPPROTO_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb, - addr, new_addr, 1); + addr, new_addr, true); } else if (nh->protocol == IPPROTO_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&uh->check, skb, - addr, new_addr, 1); + addr, new_addr, true); if (!uh->check) uh->check = CSUM_MANGLED_0; } @@ -316,14 +316,14 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, if (l4_proto == NEXTHDR_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb, - addr, new_addr, 1); + addr, new_addr, true); } else if (l4_proto == NEXTHDR_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace16(&uh->check, skb, - addr, new_addr, 1); + addr, new_addr, true); if (!uh->check) uh->check = CSUM_MANGLED_0; } @@ -331,7 +331,7 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, } else if (l4_proto == NEXTHDR_ICMP) { if (likely(transport_len >= sizeof(struct icmp6hdr))) inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum, - skb, addr, new_addr, 1); + skb, addr, new_addr, true); } } @@ -498,7 +498,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) { - inet_proto_csum_replace2(check, skb, *port, new_port, 0); + inet_proto_csum_replace2(check, skb, *port, new_port, false); *port = new_port; } diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 5be0b3c1c5b0..b7c4ead8b5a8 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -162,7 +162,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, goto drop; tcph = (void *)(skb_network_header(skb) + ihl); - inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); + inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, + true); break; } case IPPROTO_UDP: @@ -178,7 +179,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, udph = (void *)(skb_network_header(skb) + ihl); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&udph->check, skb, addr, - new_addr, 1); + new_addr, true); if (!udph->check) udph->check = CSUM_MANGLED_0; } @@ -231,7 +232,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, iph->saddr = new_addr; inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, - 0); + false); break; } default: -- cgit v1.2.3 From be26b9a88fcee570796c67701f50800039e25aec Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 26 Aug 2015 11:31:45 -0700 Subject: openvswitch: Move MASKED* macros to datapath.h This will allow the ovs-conntrack code to reuse these macros. Signed-off-by: Joe Stringer Acked-by: Thomas Graf Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 52 ++++++++++++++++++++++------------------------ net/openvswitch/datapath.h | 4 ++++ 2 files changed, 29 insertions(+), 27 deletions(-) (limited to 'net/openvswitch/actions.c') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 4f4200717bef..520438b77dc8 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -185,10 +185,6 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, return 0; } -/* 'KEY' must not have any bits set outside of the 'MASK' */ -#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) -#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK)) - static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, const __be32 *mpls_lse, const __be32 *mask) { @@ -201,7 +197,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, return err; stack = (__be32 *)skb_mpls_header(skb); - lse = MASKED(*stack, *mpls_lse, *mask); + lse = OVS_MASKED(*stack, *mpls_lse, *mask); if (skb->ip_summed == CHECKSUM_COMPLETE) { __be32 diff[] = { ~(*stack), lse }; @@ -244,9 +240,9 @@ static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_) const u16 *src = (const u16 *)src_; const u16 *mask = (const u16 *)mask_; - SET_MASKED(dst[0], src[0], mask[0]); - SET_MASKED(dst[1], src[1], mask[1]); - SET_MASKED(dst[2], src[2], mask[2]); + OVS_SET_MASKED(dst[0], src[0], mask[0]); + OVS_SET_MASKED(dst[1], src[1], mask[1]); + OVS_SET_MASKED(dst[2], src[2], mask[2]); } static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, @@ -338,10 +334,10 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4], const __be32 mask[4], __be32 masked[4]) { - masked[0] = MASKED(old[0], addr[0], mask[0]); - masked[1] = MASKED(old[1], addr[1], mask[1]); - masked[2] = MASKED(old[2], addr[2], mask[2]); - masked[3] = MASKED(old[3], addr[3], mask[3]); + masked[0] = OVS_MASKED(old[0], addr[0], mask[0]); + masked[1] = OVS_MASKED(old[1], addr[1], mask[1]); + masked[2] = OVS_MASKED(old[2], addr[2], mask[2]); + masked[3] = OVS_MASKED(old[3], addr[3], mask[3]); } static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, @@ -358,15 +354,15 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) { /* Bits 21-24 are always unmasked, so this retains their values. */ - SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); - SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); - SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); + OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); + OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); + OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); } static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl, u8 mask) { - new_ttl = MASKED(nh->ttl, new_ttl, mask); + new_ttl = OVS_MASKED(nh->ttl, new_ttl, mask); csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); nh->ttl = new_ttl; @@ -392,7 +388,7 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key, * makes sense to check if the value actually changed. */ if (mask->ipv4_src) { - new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src); + new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src); if (unlikely(new_addr != nh->saddr)) { set_ip_addr(skb, nh, &nh->saddr, new_addr); @@ -400,7 +396,7 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key, } } if (mask->ipv4_dst) { - new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst); + new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst); if (unlikely(new_addr != nh->daddr)) { set_ip_addr(skb, nh, &nh->daddr, new_addr); @@ -488,7 +484,8 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); } if (mask->ipv6_hlimit) { - SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit); + OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit, + mask->ipv6_hlimit); flow_key->ip.ttl = nh->hop_limit; } return 0; @@ -517,8 +514,8 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key, uh = udp_hdr(skb); /* Either of the masks is non-zero, so do not bother checking them. */ - src = MASKED(uh->source, key->udp_src, mask->udp_src); - dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst); + src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src); + dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst); if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) { if (likely(src != uh->source)) { @@ -558,12 +555,12 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key, return err; th = tcp_hdr(skb); - src = MASKED(th->source, key->tcp_src, mask->tcp_src); + src = OVS_MASKED(th->source, key->tcp_src, mask->tcp_src); if (likely(src != th->source)) { set_tp_port(skb, &th->source, src, &th->check); flow_key->tp.src = src; } - dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst); + dst = OVS_MASKED(th->dest, key->tcp_dst, mask->tcp_dst); if (likely(dst != th->dest)) { set_tp_port(skb, &th->dest, dst, &th->check); flow_key->tp.dst = dst; @@ -590,8 +587,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, old_csum = sh->checksum; old_correct_csum = sctp_compute_cksum(skb, sctphoff); - sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src); - sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst); + sh->source = OVS_MASKED(sh->source, key->sctp_src, mask->sctp_src); + sh->dest = OVS_MASKED(sh->dest, key->sctp_dst, mask->sctp_dst); new_csum = sctp_compute_cksum(skb, sctphoff); @@ -770,12 +767,13 @@ static int execute_masked_set_action(struct sk_buff *skb, switch (nla_type(a)) { case OVS_KEY_ATTR_PRIORITY: - SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *)); + OVS_SET_MASKED(skb->priority, nla_get_u32(a), + *get_mask(a, u32 *)); flow_key->phy.priority = skb->priority; break; case OVS_KEY_ATTR_SKB_MARK: - SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *)); + OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *)); flow_key->phy.skb_mark = skb->mark; break; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 6b28c5cedb23..487a85f7d967 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -200,6 +200,10 @@ void ovs_dp_notify_wq(struct work_struct *work); int action_fifos_init(void); void action_fifos_exit(void); +/* 'KEY' must not have any bits set outside of the 'MASK' */ +#define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) +#define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK)) + #define OVS_NLERR(logging_allowed, fmt, ...) \ do { \ if (logging_allowed && net_ratelimit()) \ -- cgit v1.2.3 From 7f8a436eaa2c3ddd8e1ff2fbca267e6275085536 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 26 Aug 2015 11:31:48 -0700 Subject: openvswitch: Add conntrack action Expose the kernel connection tracker via OVS. Userspace components can make use of the CT action to populate the connection state (ct_state) field for a flow. This state can be subsequently matched. Exposed connection states are OVS_CS_F_*: - NEW (0x01) - Beginning of a new connection. - ESTABLISHED (0x02) - Part of an existing connection. - RELATED (0x04) - Related to an established connection. - INVALID (0x20) - Could not track the connection for this packet. - REPLY_DIR (0x40) - This packet is in the reply direction for the flow. - TRACKED (0x80) - This packet has been sent through conntrack. When the CT action is executed by itself, it will send the packet through the connection tracker and populate the ct_state field with one or more of the connection state flags above. The CT action will always set the TRACKED bit. When the COMMIT flag is passed to the conntrack action, this specifies that information about the connection should be stored. This allows subsequent packets for the same (or related) connections to be correlated with this connection. Sending subsequent packets for the connection through conntrack allows the connection tracker to consider the packets as ESTABLISHED, RELATED, and/or REPLY_DIR. The CT action may optionally take a zone to track the flow within. This allows connections with the same 5-tuple to be kept logically separate from connections in other zones. If the zone is specified, then the "ct_zone" match field will be subsequently populated with the zone id. IP fragments are handled by transparently assembling them as part of the CT action. The maximum received unit (MRU) size is tracked so that refragmentation can occur during output. IP frag handling contributed by Andy Zhou. Based on original design by Justin Pettit. Signed-off-by: Joe Stringer Signed-off-by: Justin Pettit Signed-off-by: Andy Zhou Acked-by: Thomas Graf Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 40 ++++ net/openvswitch/Kconfig | 11 + net/openvswitch/Makefile | 2 + net/openvswitch/actions.c | 175 ++++++++++++++- net/openvswitch/conntrack.c | 454 +++++++++++++++++++++++++++++++++++++++ net/openvswitch/conntrack.h | 78 +++++++ net/openvswitch/datapath.c | 66 ++++-- net/openvswitch/datapath.h | 6 + net/openvswitch/flow.c | 2 + net/openvswitch/flow.h | 6 + net/openvswitch/flow_netlink.c | 69 ++++-- net/openvswitch/flow_netlink.h | 4 +- net/openvswitch/vport.c | 1 + 13 files changed, 877 insertions(+), 37 deletions(-) create mode 100644 net/openvswitch/conntrack.c create mode 100644 net/openvswitch/conntrack.h (limited to 'net/openvswitch/actions.c') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index d6b885460187..55f599792673 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -164,6 +164,9 @@ enum ovs_packet_cmd { * %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the * output port is actually a tunnel port. Contains the output tunnel key * extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes. + * @OVS_PACKET_ATTR_MRU: Present for an %OVS_PACKET_CMD_ACTION and + * %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment + * size. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. @@ -180,6 +183,7 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_UNUSED2, OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe, error logging should be suppressed. */ + OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */ __OVS_PACKET_ATTR_MAX }; @@ -319,6 +323,8 @@ enum ovs_key_attr { OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls. * The implementation may restrict * the accepted length of the array. */ + OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */ + OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -431,6 +437,15 @@ struct ovs_key_nd { __u8 nd_tll[ETH_ALEN]; }; +/* OVS_KEY_ATTR_CT_STATE flags */ +#define OVS_CS_F_NEW 0x01 /* Beginning of a new connection. */ +#define OVS_CS_F_ESTABLISHED 0x02 /* Part of an existing connection. */ +#define OVS_CS_F_RELATED 0x04 /* Related to an established + * connection. */ +#define OVS_CS_F_INVALID 0x20 /* Could not track connection. */ +#define OVS_CS_F_REPLY_DIR 0x40 /* Flow is in the reply direction. */ +#define OVS_CS_F_TRACKED 0x80 /* Conntrack has occurred. */ + /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow @@ -594,6 +609,28 @@ struct ovs_action_hash { uint32_t hash_basis; }; +/** + * enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action. + * @OVS_CT_ATTR_FLAGS: u32 connection tracking flags. + * @OVS_CT_ATTR_ZONE: u16 connection tracking zone. + */ +enum ovs_ct_attr { + OVS_CT_ATTR_UNSPEC, + OVS_CT_ATTR_FLAGS, /* u8 bitmask of OVS_CT_F_*. */ + OVS_CT_ATTR_ZONE, /* u16 zone id. */ + __OVS_CT_ATTR_MAX +}; + +#define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1) + +/* + * OVS_CT_ATTR_FLAGS flags - bitmask of %OVS_CT_F_* + * @OVS_CT_F_COMMIT: Commits the flow to the conntrack table. This allows + * future packets for the same connection to be identified as 'established' + * or 'related'. + */ +#define OVS_CT_F_COMMIT 0x01 + /** * enum ovs_action_attr - Action types. * @@ -623,6 +660,8 @@ struct ovs_action_hash { * indicate the new packet contents. This could potentially still be * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there * is no MPLS label stack, as determined by ethertype, no action is taken. + * @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related + * entries in the flow key. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -648,6 +687,7 @@ enum ovs_action_attr { * data immediately followed by a mask. * The data must be zero for the unmasked * bits. */ + OVS_ACTION_ATTR_CT, /* One nested OVS_CT_ATTR_* . */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 422dc0567de9..98f343d0d6dd 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -31,6 +31,17 @@ config OPENVSWITCH If unsure, say N. +config OPENVSWITCH_CONNTRACK + bool "Open vSwitch conntrack action support" + depends on OPENVSWITCH + depends on NF_CONNTRACK + default OPENVSWITCH + ---help--- + If you say Y here, then Open vSwitch module will be able to pass + packets through conntrack. + + Say N to exclude this support and reduce the binary size. + config OPENVSWITCH_GRE tristate "Open vSwitch GRE tunneling support" depends on OPENVSWITCH diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 6e1701de04d8..5b5913b06f54 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -15,6 +15,8 @@ openvswitch-y := \ vport-internal_dev.o \ vport-netdev.o +openvswitch-$(CONFIG_OPENVSWITCH_CONNTRACK) += conntrack.o + obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 520438b77dc8..72ca2c491b0a 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include #include +#include #include #include #include @@ -38,6 +40,7 @@ #include "datapath.h" #include "flow.h" +#include "conntrack.h" #include "vport.h" static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, @@ -52,6 +55,20 @@ struct deferred_action { struct sw_flow_key pkt_key; }; +#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN) +struct ovs_frag_data { + unsigned long dst; + struct vport *vport; + struct ovs_skb_cb cb; + __be16 inner_protocol; + __u16 vlan_tci; + __be16 vlan_proto; + unsigned int l2_len; + u8 l2_data[MAX_L2_LEN]; +}; + +static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage); + #define DEFERRED_ACTION_FIFO_SIZE 10 struct action_fifo { int head; @@ -602,14 +619,145 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } -static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) +static int ovs_vport_output(struct sock *sock, struct sk_buff *skb) +{ + struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage); + struct vport *vport = data->vport; + + if (skb_cow_head(skb, data->l2_len) < 0) { + kfree_skb(skb); + return -ENOMEM; + } + + __skb_dst_copy(skb, data->dst); + *OVS_CB(skb) = data->cb; + skb->inner_protocol = data->inner_protocol; + skb->vlan_tci = data->vlan_tci; + skb->vlan_proto = data->vlan_proto; + + /* Reconstruct the MAC header. */ + skb_push(skb, data->l2_len); + memcpy(skb->data, &data->l2_data, data->l2_len); + ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len); + skb_reset_mac_header(skb); + + ovs_vport_send(vport, skb); + return 0; +} + +static unsigned int +ovs_dst_get_mtu(const struct dst_entry *dst) +{ + return dst->dev->mtu; +} + +static struct dst_ops ovs_dst_ops = { + .family = AF_UNSPEC, + .mtu = ovs_dst_get_mtu, +}; + +/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is + * ovs_vport_output(), which is called once per fragmented packet. + */ +static void prepare_frag(struct vport *vport, struct sk_buff *skb) +{ + unsigned int hlen = skb_network_offset(skb); + struct ovs_frag_data *data; + + data = this_cpu_ptr(&ovs_frag_data_storage); + data->dst = skb->_skb_refdst; + data->vport = vport; + data->cb = *OVS_CB(skb); + data->inner_protocol = skb->inner_protocol; + data->vlan_tci = skb->vlan_tci; + data->vlan_proto = skb->vlan_proto; + data->l2_len = hlen; + memcpy(&data->l2_data, skb->data, hlen); + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + skb_pull(skb, hlen); +} + +static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, + __be16 ethertype) +{ + if (skb_network_offset(skb) > MAX_L2_LEN) { + OVS_NLERR(1, "L2 header too long to fragment"); + return; + } + + if (ethertype == htons(ETH_P_IP)) { + struct dst_entry ovs_dst; + unsigned long orig_dst; + + prepare_frag(vport, skb); + dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1, + DST_OBSOLETE_NONE, DST_NOCOUNT); + ovs_dst.dev = vport->dev; + + orig_dst = skb->_skb_refdst; + skb_dst_set_noref(skb, &ovs_dst); + IPCB(skb)->frag_max_size = mru; + + ip_do_fragment(skb->sk, skb, ovs_vport_output); + refdst_drop(orig_dst); + } else if (ethertype == htons(ETH_P_IPV6)) { + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + unsigned long orig_dst; + struct rt6_info ovs_rt; + + if (!v6ops) { + kfree_skb(skb); + return; + } + + prepare_frag(vport, skb); + memset(&ovs_rt, 0, sizeof(ovs_rt)); + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + DST_OBSOLETE_NONE, DST_NOCOUNT); + ovs_rt.dst.dev = vport->dev; + + orig_dst = skb->_skb_refdst; + skb_dst_set_noref(skb, &ovs_rt.dst); + IP6CB(skb)->frag_max_size = mru; + + v6ops->fragment(skb->sk, skb, ovs_vport_output); + refdst_drop(orig_dst); + } else { + WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", + ovs_vport_name(vport), ntohs(ethertype), mru, + vport->dev->mtu); + kfree_skb(skb); + } +} + +static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, + struct sw_flow_key *key) { struct vport *vport = ovs_vport_rcu(dp, out_port); - if (likely(vport)) - ovs_vport_send(vport, skb); - else + if (likely(vport)) { + u16 mru = OVS_CB(skb)->mru; + + if (likely(!mru || (skb->len <= mru + ETH_HLEN))) { + ovs_vport_send(vport, skb); + } else if (mru <= vport->dev->mtu) { + __be16 ethertype = key->eth.type; + + if (!is_flow_key_valid(key)) { + if (eth_p_mpls(skb->protocol)) + ethertype = skb->inner_protocol; + else + ethertype = vlan_get_protocol(skb); + } + + ovs_fragment(vport, skb, mru, ethertype); + } else { + kfree_skb(skb); + } + } else { kfree_skb(skb); + } } static int output_userspace(struct datapath *dp, struct sk_buff *skb, @@ -623,6 +771,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_ACTION; + upcall.mru = OVS_CB(skb)->mru; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { @@ -816,6 +965,11 @@ static int execute_masked_set_action(struct sk_buff *skb, err = set_mpls(skb, flow_key, nla_data(a), get_mask(a, __be32 *)); break; + + case OVS_KEY_ATTR_CT_STATE: + case OVS_KEY_ATTR_CT_ZONE: + err = -EINVAL; + break; } return err; @@ -885,7 +1039,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC); if (out_skb) - do_output(dp, out_skb, prev_port); + do_output(dp, out_skb, prev_port, key); prev_port = -1; } @@ -942,6 +1096,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, key, a, attr, len); break; + + case OVS_ACTION_ATTR_CT: + err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key, + nla_data(a)); + + /* Hide stolen IP fragments from user space. */ + if (err == -EINPROGRESS) + return 0; + break; } if (unlikely(err)) { @@ -951,7 +1114,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, } if (prev_port != -1) - do_output(dp, skb, prev_port); + do_output(dp, skb, prev_port, key); else consume_skb(skb); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c new file mode 100644 index 000000000000..1189fd50f1cf --- /dev/null +++ b/net/openvswitch/conntrack.c @@ -0,0 +1,454 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include "datapath.h" +#include "conntrack.h" +#include "flow.h" +#include "flow_netlink.h" + +struct ovs_ct_len_tbl { + size_t maxlen; + size_t minlen; +}; + +/* Conntrack action context for execution. */ +struct ovs_conntrack_info { + struct nf_conntrack_zone zone; + struct nf_conn *ct; + u32 flags; + u16 family; +}; + +static u16 key_to_nfproto(const struct sw_flow_key *key) +{ + switch (ntohs(key->eth.type)) { + case ETH_P_IP: + return NFPROTO_IPV4; + case ETH_P_IPV6: + return NFPROTO_IPV6; + default: + return NFPROTO_UNSPEC; + } +} + +/* Map SKB connection state into the values used by flow definition. */ +static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) +{ + u8 ct_state = OVS_CS_F_TRACKED; + + switch (ctinfo) { + case IP_CT_ESTABLISHED_REPLY: + case IP_CT_RELATED_REPLY: + case IP_CT_NEW_REPLY: + ct_state |= OVS_CS_F_REPLY_DIR; + break; + default: + break; + } + + switch (ctinfo) { + case IP_CT_ESTABLISHED: + case IP_CT_ESTABLISHED_REPLY: + ct_state |= OVS_CS_F_ESTABLISHED; + break; + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + ct_state |= OVS_CS_F_RELATED; + break; + case IP_CT_NEW: + case IP_CT_NEW_REPLY: + ct_state |= OVS_CS_F_NEW; + break; + default: + break; + } + + return ct_state; +} + +static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, + const struct nf_conntrack_zone *zone) +{ + key->ct.state = state; + key->ct.zone = zone->id; +} + +/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has + * previously sent the packet to conntrack via the ct action. + */ +static void ovs_ct_update_key(const struct sk_buff *skb, + struct sw_flow_key *key, bool post_ct) +{ + const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + u8 state = 0; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) { + state = ovs_ct_get_state(ctinfo); + if (ct->master) + state |= OVS_CS_F_RELATED; + zone = nf_ct_zone(ct); + } else if (post_ct) { + state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; + } + __ovs_ct_update_key(key, state, zone); +} + +void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) +{ + ovs_ct_update_key(skb, key, false); +} + +int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) +{ + if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) + return -EMSGSIZE; + + if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && + nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone)) + return -EMSGSIZE; + + return 0; +} + +static int handle_fragments(struct net *net, struct sw_flow_key *key, + u16 zone, struct sk_buff *skb) +{ + struct ovs_skb_cb ovs_cb = *OVS_CB(skb); + + if (key->eth.type == htons(ETH_P_IP)) { + enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; + int err; + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + err = ip_defrag(skb, user); + if (err) + return err; + + ovs_cb.mru = IPCB(skb)->frag_max_size; + } else if (key->eth.type == htons(ETH_P_IPV6)) { +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; + struct sk_buff *reasm; + + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + reasm = nf_ct_frag6_gather(skb, user); + if (!reasm) + return -EINPROGRESS; + + if (skb == reasm) + return -EINVAL; + + key->ip.proto = ipv6_hdr(reasm)->nexthdr; + skb_morph(skb, reasm); + consume_skb(reasm); + ovs_cb.mru = IP6CB(skb)->frag_max_size; +#else + return -EPFNOSUPPORT; +#endif + } else { + return -EPFNOSUPPORT; + } + + key->ip.frag = OVS_FRAG_TYPE_NONE; + skb_clear_hash(skb); + skb->ignore_df = 1; + *OVS_CB(skb) = ovs_cb; + + return 0; +} + +static struct nf_conntrack_expect * +ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, + u16 proto, const struct sk_buff *skb) +{ + struct nf_conntrack_tuple tuple; + + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple)) + return NULL; + return __nf_ct_expect_find(net, zone, &tuple); +} + +/* Determine whether skb->nfct is equal to the result of conntrack lookup. */ +static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb, + const struct ovs_conntrack_info *info) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return false; + if (!net_eq(net, read_pnet(&ct->ct_net))) + return false; + if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct))) + return false; + + return true; +} + +static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key, + const struct ovs_conntrack_info *info, + struct sk_buff *skb) +{ + /* If we are recirculating packets to match on conntrack fields and + * committing with a separate conntrack action, then we don't need to + * actually run the packet through conntrack twice unless it's for a + * different zone. + */ + if (!skb_nfct_cached(net, skb, info)) { + struct nf_conn *tmpl = info->ct; + + /* Associate skb with specified zone. */ + if (tmpl) { + if (skb->nfct) + nf_conntrack_put(skb->nfct); + nf_conntrack_get(&tmpl->ct_general); + skb->nfct = &tmpl->ct_general; + skb->nfctinfo = IP_CT_NEW; + } + + if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING, + skb) != NF_ACCEPT) + return -ENOENT; + } + + return 0; +} + +/* Lookup connection and read fields into key. */ +static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, + const struct ovs_conntrack_info *info, + struct sk_buff *skb) +{ + struct nf_conntrack_expect *exp; + + exp = ovs_ct_expect_find(net, &info->zone, info->family, skb); + if (exp) { + u8 state; + + state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; + __ovs_ct_update_key(key, state, &info->zone); + } else { + int err; + + err = __ovs_ct_lookup(net, key, info, skb); + if (err) + return err; + + ovs_ct_update_key(skb, key, true); + } + + return 0; +} + +/* Lookup connection and confirm if unconfirmed. */ +static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, + const struct ovs_conntrack_info *info, + struct sk_buff *skb) +{ + u8 state; + int err; + + state = key->ct.state; + if (key->ct.zone == info->zone.id && + ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) { + /* Previous lookup has shown that this connection is already + * tracked and committed. Skip committing. + */ + return 0; + } + + err = __ovs_ct_lookup(net, key, info, skb); + if (err) + return err; + if (nf_conntrack_confirm(skb) != NF_ACCEPT) + return -EINVAL; + + ovs_ct_update_key(skb, key, true); + + return 0; +} + +int ovs_ct_execute(struct net *net, struct sk_buff *skb, + struct sw_flow_key *key, + const struct ovs_conntrack_info *info) +{ + int nh_ofs; + int err; + + /* The conntrack module expects to be working at L3. */ + nh_ofs = skb_network_offset(skb); + skb_pull(skb, nh_ofs); + + if (key->ip.frag != OVS_FRAG_TYPE_NONE) { + err = handle_fragments(net, key, info->zone.id, skb); + if (err) + return err; + } + + if (info->flags & OVS_CT_F_COMMIT) + err = ovs_ct_commit(net, key, info, skb); + else + err = ovs_ct_lookup(net, key, info, skb); + + skb_push(skb, nh_ofs); + return err; +} + +static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { + [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32), + .maxlen = sizeof(u32) }, + [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), + .maxlen = sizeof(u16) }, +}; + +static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, + bool log) +{ + struct nlattr *a; + int rem; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + int maxlen = ovs_ct_attr_lens[type].maxlen; + int minlen = ovs_ct_attr_lens[type].minlen; + + if (type > OVS_CT_ATTR_MAX) { + OVS_NLERR(log, + "Unknown conntrack attr (type=%d, max=%d)", + type, OVS_CT_ATTR_MAX); + return -EINVAL; + } + if (nla_len(a) < minlen || nla_len(a) > maxlen) { + OVS_NLERR(log, + "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", + type, nla_len(a), maxlen); + return -EINVAL; + } + + switch (type) { + case OVS_CT_ATTR_FLAGS: + info->flags = nla_get_u32(a); + break; +#ifdef CONFIG_NF_CONNTRACK_ZONES + case OVS_CT_ATTR_ZONE: + info->zone.id = nla_get_u16(a); + break; +#endif + default: + OVS_NLERR(log, "Unknown conntrack attr (%d)", + type); + return -EINVAL; + } + } + + if (rem > 0) { + OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem); + return -EINVAL; + } + + return 0; +} + +bool ovs_ct_verify(enum ovs_key_attr attr) +{ + if (attr == OVS_KEY_ATTR_CT_STATE) + return true; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && + attr == OVS_KEY_ATTR_CT_ZONE) + return true; + + return false; +} + +int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa, bool log) +{ + struct ovs_conntrack_info ct_info; + u16 family; + int err; + + family = key_to_nfproto(key); + if (family == NFPROTO_UNSPEC) { + OVS_NLERR(log, "ct family unspecified"); + return -EINVAL; + } + + memset(&ct_info, 0, sizeof(ct_info)); + ct_info.family = family; + + nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID, + NF_CT_DEFAULT_ZONE_DIR, 0); + + err = parse_ct(attr, &ct_info, log); + if (err) + return err; + + /* Set up template for tracking connections in specific zones. */ + ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL); + if (!ct_info.ct) { + OVS_NLERR(log, "Failed to allocate conntrack template"); + return -ENOMEM; + } + + err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info, + sizeof(ct_info), log); + if (err) + goto err_free_ct; + + __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); + nf_conntrack_get(&ct_info.ct->ct_general); + return 0; +err_free_ct: + nf_conntrack_free(ct_info.ct); + return err; +} + +int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, + struct sk_buff *skb) +{ + struct nlattr *start; + + start = nla_nest_start(skb, OVS_ACTION_ATTR_CT); + if (!start) + return -EMSGSIZE; + + if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags)) + return -EMSGSIZE; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && + nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) + return -EMSGSIZE; + + nla_nest_end(skb, start); + + return 0; +} + +void ovs_ct_free_action(const struct nlattr *a) +{ + struct ovs_conntrack_info *ct_info = nla_data(a); + + if (ct_info->ct) + nf_ct_put(ct_info->ct); +} diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h new file mode 100644 index 000000000000..e812ee64a718 --- /dev/null +++ b/net/openvswitch/conntrack.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef OVS_CONNTRACK_H +#define OVS_CONNTRACK_H 1 + +#include "flow.h" + +struct ovs_conntrack_info; +enum ovs_key_attr; + +#if defined(CONFIG_OPENVSWITCH_CONNTRACK) +bool ovs_ct_verify(enum ovs_key_attr attr); +int ovs_ct_copy_action(struct net *, const struct nlattr *, + const struct sw_flow_key *, struct sw_flow_actions **, + bool log); +int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *); + +int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, + const struct ovs_conntrack_info *); + +void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); +int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); +void ovs_ct_free_action(const struct nlattr *a); +#else +#include + +static inline bool ovs_ct_verify(int attr) +{ + return false; +} + +static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla, + const struct sw_flow_key *key, + struct sw_flow_actions **acts, bool log) +{ + return -ENOTSUPP; +} + +static inline int ovs_ct_action_to_attr(const struct ovs_conntrack_info *info, + struct sk_buff *skb) +{ + return -ENOTSUPP; +} + +static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb, + struct sw_flow_key *key, + const struct ovs_conntrack_info *info) +{ + return -ENOTSUPP; +} + +static inline void ovs_ct_fill_key(const struct sk_buff *skb, + struct sw_flow_key *key) +{ + key->ct.state = 0; + key->ct.zone = 0; +} + +static inline int ovs_ct_put_key(const struct sw_flow_key *key, + struct sk_buff *skb) +{ + return 0; +} + +static inline void ovs_ct_free_action(const struct nlattr *a) { } +#endif +#endif /* ovs_conntrack.h */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d5b547375887..72e63726efa0 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -275,6 +275,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; upcall.portid = ovs_vport_find_upcall_portid(p, skb); + upcall.mru = OVS_CB(skb)->mru; error = ovs_dp_upcall(dp, skb, key, &upcall); if (unlikely(error)) kfree_skb(skb); @@ -400,9 +401,23 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, if (upcall_info->actions_len) size += nla_total_size(upcall_info->actions_len); + /* OVS_PACKET_ATTR_MRU */ + if (upcall_info->mru) + size += nla_total_size(sizeof(upcall_info->mru)); + return size; } +static void pad_packet(struct datapath *dp, struct sk_buff *skb) +{ + if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { + size_t plen = NLA_ALIGN(skb->len) - skb->len; + + if (plen > 0) + memset(skb_put(skb, plen), 0, plen); + } +} + static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_key *key, const struct dp_upcall_info *upcall_info) @@ -492,6 +507,16 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, nla_nest_cancel(user_skb, nla); } + /* Add OVS_PACKET_ATTR_MRU */ + if (upcall_info->mru) { + if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, + upcall_info->mru)) { + err = -ENOBUFS; + goto out; + } + pad_packet(dp, user_skb); + } + /* Only reserve room for attribute header, packet data is added * in skb_zerocopy() */ if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { @@ -505,12 +530,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, goto out; /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ - if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { - size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len; - - if (plen > 0) - memset(skb_put(user_skb, plen), 0, plen); - } + pad_packet(dp, user_skb); ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; @@ -527,6 +547,7 @@ out: static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) { struct ovs_header *ovs_header = info->userhdr; + struct net *net = sock_net(skb->sk); struct nlattr **a = info->attrs; struct sw_flow_actions *acts; struct sk_buff *packet; @@ -535,6 +556,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct ethhdr *eth; struct vport *input_vport; + u16 mru = 0; int len; int err; bool log = !a[OVS_PACKET_ATTR_PROBE]; @@ -564,6 +586,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) else packet->protocol = htons(ETH_P_802_2); + /* Set packet's mru */ + if (a[OVS_PACKET_ATTR_MRU]) { + mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); + packet->ignore_df = 1; + } + OVS_CB(packet)->mru = mru; + /* Build an sw_flow for sending this packet. */ flow = ovs_flow_alloc(); err = PTR_ERR(flow); @@ -575,7 +604,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], + err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS], &flow->key, &acts, log); if (err) goto err_flow_free; @@ -586,7 +615,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->mark = flow->key.phy.skb_mark; rcu_read_lock(); - dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); + dp = get_dp_rcu(net, ovs_header->dp_ifindex); err = -ENODEV; if (!dp) goto err_unlock; @@ -598,6 +627,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (!input_vport) goto err_unlock; + packet->dev = input_vport->dev; OVS_CB(packet)->input_vport = input_vport; sf_acts = rcu_dereference(flow->sf_acts); @@ -624,6 +654,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, + [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, }; static const struct genl_ops dp_packet_genl_ops[] = { @@ -880,6 +911,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { + struct net *net = sock_net(skb->sk); struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sw_flow *flow = NULL, *new_flow; @@ -929,8 +961,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_kfree_flow; /* Validate actions. */ - error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, - &acts, log); + error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], + &new_flow->key, &acts, log); if (error) { OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); goto err_kfree_flow; @@ -944,7 +976,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } ovs_lock(); - dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + dp = get_dp(net, ovs_header->dp_ifindex); if (unlikely(!dp)) { error = -ENODEV; goto err_unlock_ovs; @@ -1038,7 +1070,8 @@ error: } /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ -static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, +static struct sw_flow_actions *get_flow_actions(struct net *net, + const struct nlattr *a, const struct sw_flow_key *key, const struct sw_flow_mask *mask, bool log) @@ -1048,7 +1081,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, int error; ovs_flow_mask_key(&masked_key, key, mask); - error = ovs_nla_copy_actions(a, &masked_key, &acts, log); + error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log); if (error) { OVS_NLERR(log, "Actions may not be safe on all matching packets"); @@ -1060,6 +1093,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) { + struct net *net = sock_net(skb->sk); struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; @@ -1091,8 +1125,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask, - log); + acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key, + &mask, log); if (IS_ERR(acts)) { error = PTR_ERR(acts); goto error; @@ -1108,7 +1142,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) } ovs_lock(); - dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + dp = get_dp(net, ovs_header->dp_ifindex); if (unlikely(!dp)) { error = -ENODEV; goto err_unlock_ovs; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 487a85f7d967..d24ba98024be 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -27,6 +27,7 @@ #include #include +#include "conntrack.h" #include "flow.h" #include "flow_table.h" #include "vport.h" @@ -97,10 +98,13 @@ struct datapath { * NULL if the packet is not being tunneled. * @input_vport: The original vport packet came in on. This value is cached * when a packet is received by OVS. + * @mru: The maximum received fragement size; 0 if the packet is not + * fragmented. */ struct ovs_skb_cb { struct ip_tunnel_info *egress_tun_info; struct vport *input_vport; + u16 mru; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) @@ -113,6 +117,7 @@ struct ovs_skb_cb { * then no packet is sent and the packet is accounted in the datapath's @n_lost * counter. * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY. + * @mru: If not zero, Maximum received IP fragment size. */ struct dp_upcall_info { const struct ip_tunnel_info *egress_tun_info; @@ -121,6 +126,7 @@ struct dp_upcall_info { int actions_len; u32 portid; u8 cmd; + u16 mru; }; /** diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 8db22ef73626..376ca8738fd4 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -49,6 +49,7 @@ #include "datapath.h" #include "flow.h" #include "flow_netlink.h" +#include "conntrack.h" u64 ovs_flow_used_time(unsigned long flow_jiffies) { @@ -707,6 +708,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->phy.priority = skb->priority; key->phy.in_port = OVS_CB(skb)->input_vport->port_no; key->phy.skb_mark = skb->mark; + ovs_ct_fill_key(skb, key); key->ovs_flow_hash = 0; key->recirc_id = 0; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 082a87bac819..312c7d755b9b 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -111,6 +111,12 @@ struct sw_flow_key { } nd; } ipv6; }; + struct { + /* Connection tracking fields. */ + u16 zone; + u8 state; + } ct; + } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ struct sw_flow_key_range { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c182b28c0884..4e795b289eb7 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -281,7 +281,7 @@ size_t ovs_key_attr_size(void) /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 24); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -290,6 +290,8 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ + + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */ + + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -339,6 +341,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, .next = ovs_tunnel_key_lens, }, [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, + [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) }, + [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, }; static bool is_all_zero(const u8 *fp, size_t size) @@ -768,6 +772,21 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } + + if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && + ovs_ct_verify(OVS_KEY_ATTR_CT_STATE)) { + u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]); + + SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); + } + if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) && + ovs_ct_verify(OVS_KEY_ATTR_CT_ZONE)) { + u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]); + + SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); + } return 0; } @@ -1266,6 +1285,7 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr, memset(&match, 0, sizeof(match)); match.key = key; + memset(&key->ct, 0, sizeof(key->ct)); key->phy.in_port = DP_MAX_PORTS; return metadata_from_nlattrs(&match, &attrs, a, false, log); @@ -1314,6 +1334,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) goto nla_put_failure; + if (ovs_ct_put_key(output, skb)) + goto nla_put_failure; + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) goto nla_put_failure; @@ -1574,6 +1597,9 @@ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) case OVS_ACTION_ATTR_SET: ovs_nla_free_set_action(a); break; + case OVS_ACTION_ATTR_CT: + ovs_ct_free_action(a); + break; } } @@ -1647,8 +1673,8 @@ static struct nlattr *__add_action(struct sw_flow_actions **sfa, return a; } -static int add_action(struct sw_flow_actions **sfa, int attrtype, - void *data, int len, bool log) +int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data, + int len, bool log) { struct nlattr *a; @@ -1663,7 +1689,7 @@ static inline int add_nested_action_start(struct sw_flow_actions **sfa, int used = (*sfa)->actions_len; int err; - err = add_action(sfa, attrtype, NULL, 0, log); + err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log); if (err) return err; @@ -1679,12 +1705,12 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, a->nla_len = sfa->actions_len - st_offset; } -static int __ovs_nla_copy_actions(const struct nlattr *attr, +static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log); -static int validate_and_copy_sample(const struct nlattr *attr, +static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log) @@ -1716,15 +1742,15 @@ static int validate_and_copy_sample(const struct nlattr *attr, start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); if (start < 0) return start; - err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, - nla_data(probability), sizeof(u32), log); + err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, + nla_data(probability), sizeof(u32), log); if (err) return err; st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); if (st_acts < 0) return st_acts; - err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa, + err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa, eth_type, vlan_tci, log); if (err) return err; @@ -2058,7 +2084,7 @@ static int copy_action(const struct nlattr *from, return 0; } -static int __ovs_nla_copy_actions(const struct nlattr *attr, +static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log) @@ -2082,7 +2108,8 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_SET] = (u32)-1, [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, - [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) + [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), + [OVS_ACTION_ATTR_CT] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -2189,13 +2216,20 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, break; case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(a, key, depth, sfa, + err = validate_and_copy_sample(net, a, key, depth, sfa, eth_type, vlan_tci, log); if (err) return err; skip_copy = true; break; + case OVS_ACTION_ATTR_CT: + err = ovs_ct_copy_action(net, a, key, sfa, log); + if (err) + return err; + skip_copy = true; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; @@ -2214,7 +2248,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, } /* 'key' must be the masked key. */ -int ovs_nla_copy_actions(const struct nlattr *attr, +int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log) { @@ -2225,7 +2259,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr, return PTR_ERR(*sfa); (*sfa)->orig_len = nla_len(attr); - err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, + err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, key->eth.tci, log); if (err) ovs_nla_free_flow_actions(*sfa); @@ -2350,6 +2384,13 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) if (err) return err; break; + + case OVS_ACTION_ATTR_CT: + err = ovs_ct_action_to_attr(nla_data(a), skb); + if (err) + return err; + break; + default: if (nla_put(skb, type, nla_len(a), nla_data(a))) return -EMSGSIZE; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index acd074408f0a..c0b484b237c9 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -62,9 +62,11 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, const struct sw_flow_key *key, bool log); u32 ovs_nla_get_ufid_flags(const struct nlattr *attr); -int ovs_nla_copy_actions(const struct nlattr *attr, +int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log); +int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, + void *data, int len, bool log); int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index d73e5a16e7ca..e2dc9dac59e6 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -484,6 +484,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->egress_tun_info = NULL; + OVS_CB(skb)->mru = 0; /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_key_extract(tun_info, skb, &key); if (unlikely(error)) { -- cgit v1.2.3 From 182e3042e15de759e81618d11fe4f62f5259d982 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 26 Aug 2015 11:31:49 -0700 Subject: openvswitch: Allow matching on conntrack mark Allow matching and setting the ct_mark field. As with ct_state and ct_zone, these fields are populated when the CT action is executed. To write to this field, a value and mask can be specified as a nested attribute under the CT action. This data is stored with the conntrack entry, and is executed after the lookup occurs for the CT action. The conntrack entry itself must be committed using the COMMIT flag in the CT action flags for this change to persist. Signed-off-by: Justin Pettit Signed-off-by: Joe Stringer Acked-by: Thomas Graf Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 5 +++ net/openvswitch/actions.c | 1 + net/openvswitch/conntrack.c | 67 ++++++++++++++++++++++++++++++++++++++-- net/openvswitch/conntrack.h | 1 + net/openvswitch/flow.h | 1 + net/openvswitch/flow_netlink.c | 12 ++++++- 6 files changed, 83 insertions(+), 4 deletions(-) (limited to 'net/openvswitch/actions.c') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 55f599792673..7a185b554343 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -325,6 +325,7 @@ enum ovs_key_attr { * the accepted length of the array. */ OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */ OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ + OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -613,11 +614,15 @@ struct ovs_action_hash { * enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action. * @OVS_CT_ATTR_FLAGS: u32 connection tracking flags. * @OVS_CT_ATTR_ZONE: u16 connection tracking zone. + * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the + * mask, the corresponding bit in the value is copied to the connection + * tracking mark field in the connection. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, OVS_CT_ATTR_FLAGS, /* u8 bitmask of OVS_CT_F_*. */ OVS_CT_ATTR_ZONE, /* u16 zone id. */ + OVS_CT_ATTR_MARK, /* mark to associate with this connection. */ __OVS_CT_ATTR_MAX }; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 72ca2c491b0a..9741d2c703df 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -968,6 +968,7 @@ static int execute_masked_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_CT_STATE: case OVS_KEY_ATTR_CT_ZONE: + case OVS_KEY_ATTR_CT_MARK: err = -EINVAL; break; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 1189fd50f1cf..e53dafccf21f 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -28,12 +28,19 @@ struct ovs_ct_len_tbl { size_t minlen; }; +/* Metadata mark for masked write to conntrack mark */ +struct md_mark { + u32 value; + u32 mask; +}; + /* Conntrack action context for execution. */ struct ovs_conntrack_info { struct nf_conntrack_zone zone; struct nf_conn *ct; u32 flags; u16 family; + struct md_mark mark; }; static u16 key_to_nfproto(const struct sw_flow_key *key) @@ -84,10 +91,12 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) } static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, - const struct nf_conntrack_zone *zone) + const struct nf_conntrack_zone *zone, + const struct nf_conn *ct) { key->ct.state = state; key->ct.zone = zone->id; + key->ct.mark = ct ? ct->mark : 0; } /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has @@ -110,7 +119,7 @@ static void ovs_ct_update_key(const struct sk_buff *skb, } else if (post_ct) { state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; } - __ovs_ct_update_key(key, state, zone); + __ovs_ct_update_key(key, state, zone, ct); } void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) @@ -127,6 +136,35 @@ int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone)) return -EMSGSIZE; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && + nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark)) + return -EMSGSIZE; + + return 0; +} + +static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, + u32 ct_mark, u32 mask) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + u32 new_mark; + + if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)) + return -ENOTSUPP; + + /* The connection could be invalid, in which case set_mark is no-op. */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return 0; + + new_mark = ct_mark | (ct->mark & ~(mask)); + if (ct->mark != new_mark) { + ct->mark = new_mark; + nf_conntrack_event_cache(IPCT_MARK, ct); + key->ct.mark = new_mark; + } + return 0; } @@ -247,7 +285,7 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, u8 state; state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; - __ovs_ct_update_key(key, state, &info->zone); + __ovs_ct_update_key(key, state, &info->zone, exp->master); } else { int err; @@ -310,7 +348,13 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, err = ovs_ct_commit(net, key, info, skb); else err = ovs_ct_lookup(net, key, info, skb); + if (err) + goto err; + if (info->mark.mask) + err = ovs_ct_set_mark(skb, key, info->mark.value, + info->mark.mask); +err: skb_push(skb, nh_ofs); return err; } @@ -320,6 +364,8 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { .maxlen = sizeof(u32) }, [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), .maxlen = sizeof(u16) }, + [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), + .maxlen = sizeof(struct md_mark) }, }; static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, @@ -354,6 +400,14 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, case OVS_CT_ATTR_ZONE: info->zone.id = nla_get_u16(a); break; +#endif +#ifdef CONFIG_NF_CONNTRACK_MARK + case OVS_CT_ATTR_MARK: { + struct md_mark *mark = nla_data(a); + + info->mark = *mark; + break; + } #endif default: OVS_NLERR(log, "Unknown conntrack attr (%d)", @@ -377,6 +431,9 @@ bool ovs_ct_verify(enum ovs_key_attr attr) if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && attr == OVS_KEY_ATTR_CT_ZONE) return true; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && + attr == OVS_KEY_ATTR_CT_MARK) + return true; return false; } @@ -439,6 +496,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) return -EMSGSIZE; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && + nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), + &ct_info->mark)) + return -EMSGSIZE; nla_nest_end(skb, start); diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index e812ee64a718..87b289c58978 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -65,6 +65,7 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb, { key->ct.state = 0; key->ct.zone = 0; + key->ct.mark = 0; } static inline int ovs_ct_put_key(const struct sw_flow_key *key, diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 312c7d755b9b..e05e69711ce1 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -114,6 +114,7 @@ struct sw_flow_key { struct { /* Connection tracking fields. */ u16 zone; + u32 mark; u8 state; } ct; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4e795b289eb7..b17a4ec348f9 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -281,7 +281,7 @@ size_t ovs_key_attr_size(void) /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 24); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 25); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -292,6 +292,7 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -343,6 +344,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) }, [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, + [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, }; static bool is_all_zero(const u8 *fp, size_t size) @@ -787,6 +789,13 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); } + if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) && + ovs_ct_verify(OVS_KEY_ATTR_CT_MARK)) { + u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]); + + SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); + } return 0; } @@ -1919,6 +1928,7 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: + case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_ETHERNET: break; -- cgit v1.2.3 From c2ac667358708d7cce64c78f58af6adf4c1e848b Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 26 Aug 2015 11:31:52 -0700 Subject: openvswitch: Allow matching on conntrack label Allow matching and setting the ct_label field. As with ct_mark, this is populated by executing the CT action. The label field may be modified by specifying a label and mask nested under the CT action. It is stored as metadata attached to the connection. Label modification occurs after lookup, and will only persist when the conntrack entry is committed by providing the COMMIT flag to the CT action. Labels are currently fixed to 128 bits in size. Signed-off-by: Joe Stringer Acked-by: Thomas Graf Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 10 +++ net/openvswitch/actions.c | 1 + net/openvswitch/conntrack.c | 128 ++++++++++++++++++++++++++++++++++++++- net/openvswitch/conntrack.h | 11 +++- net/openvswitch/datapath.c | 18 +++--- net/openvswitch/datapath.h | 3 + net/openvswitch/flow.c | 4 +- net/openvswitch/flow.h | 3 +- net/openvswitch/flow_netlink.c | 46 +++++++++----- net/openvswitch/flow_netlink.h | 9 +-- 10 files changed, 199 insertions(+), 34 deletions(-) (limited to 'net/openvswitch/actions.c') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 7a185b554343..9d52058a9330 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -326,6 +326,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */ OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ + OVS_KEY_ATTR_CT_LABEL, /* 16-octet connection tracking label */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -438,6 +439,11 @@ struct ovs_key_nd { __u8 nd_tll[ETH_ALEN]; }; +#define OVS_CT_LABEL_LEN 16 +struct ovs_key_ct_label { + __u8 ct_label[OVS_CT_LABEL_LEN]; +}; + /* OVS_KEY_ATTR_CT_STATE flags */ #define OVS_CS_F_NEW 0x01 /* Beginning of a new connection. */ #define OVS_CS_F_ESTABLISHED 0x02 /* Part of an existing connection. */ @@ -617,12 +623,16 @@ struct ovs_action_hash { * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the * mask, the corresponding bit in the value is copied to the connection * tracking mark field in the connection. + * @OVS_CT_ATTR_LABEL: %OVS_CT_LABEL_LEN value followed by %OVS_CT_LABEL_LEN + * mask. For each bit set in the mask, the corresponding bit in the value is + * copied to the connection tracking label field in the connection. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, OVS_CT_ATTR_FLAGS, /* u8 bitmask of OVS_CT_F_*. */ OVS_CT_ATTR_ZONE, /* u16 zone id. */ OVS_CT_ATTR_MARK, /* mark to associate with this connection. */ + OVS_CT_ATTR_LABEL, /* label to associate with this connection. */ __OVS_CT_ATTR_MAX }; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 9741d2c703df..736a113a75c3 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -969,6 +969,7 @@ static int execute_masked_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_CT_STATE: case OVS_KEY_ATTR_CT_ZONE: case OVS_KEY_ATTR_CT_MARK: + case OVS_KEY_ATTR_CT_LABEL: err = -EINVAL; break; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e53dafccf21f..a0417fb33d1d 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -34,6 +35,12 @@ struct md_mark { u32 mask; }; +/* Metadata label for masked write to conntrack label. */ +struct md_label { + struct ovs_key_ct_label value; + struct ovs_key_ct_label mask; +}; + /* Conntrack action context for execution. */ struct ovs_conntrack_info { struct nf_conntrack_zone zone; @@ -41,6 +48,7 @@ struct ovs_conntrack_info { u32 flags; u16 family; struct md_mark mark; + struct md_label label; }; static u16 key_to_nfproto(const struct sw_flow_key *key) @@ -90,6 +98,24 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) return ct_state; } +static void ovs_ct_get_label(const struct nf_conn *ct, + struct ovs_key_ct_label *label) +{ + struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; + + if (cl) { + size_t len = cl->words * sizeof(long); + + if (len > OVS_CT_LABEL_LEN) + len = OVS_CT_LABEL_LEN; + else if (len < OVS_CT_LABEL_LEN) + memset(label, 0, OVS_CT_LABEL_LEN); + memcpy(label, cl->bits, len); + } else { + memset(label, 0, OVS_CT_LABEL_LEN); + } +} + static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, const struct nf_conntrack_zone *zone, const struct nf_conn *ct) @@ -97,6 +123,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, key->ct.state = state; key->ct.zone = zone->id; key->ct.mark = ct ? ct->mark : 0; + ovs_ct_get_label(ct, &key->ct.label); } /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has @@ -140,6 +167,11 @@ int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark)) return -EMSGSIZE; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABEL) && + nla_put(skb, OVS_KEY_ATTR_CT_LABEL, sizeof(key->ct.label), + &key->ct.label)) + return -EMSGSIZE; + return 0; } @@ -168,6 +200,40 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, return 0; } +static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_key_ct_label *label, + const struct ovs_key_ct_label *mask) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn_labels *cl; + struct nf_conn *ct; + int err; + + if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) + return -ENOTSUPP; + + /* The connection could be invalid, in which case set_label is no-op.*/ + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return 0; + + cl = nf_ct_labels_find(ct); + if (!cl) { + nf_ct_labels_ext_add(ct); + cl = nf_ct_labels_find(ct); + } + if (!cl || cl->words * sizeof(long) < OVS_CT_LABEL_LEN) + return -ENOSPC; + + err = nf_connlabels_replace(ct, (u32 *)label, (u32 *)mask, + OVS_CT_LABEL_LEN / sizeof(u32)); + if (err) + return err; + + ovs_ct_get_label(ct, &key->ct.label); + return 0; +} + static int handle_fragments(struct net *net, struct sw_flow_key *key, u16 zone, struct sk_buff *skb) { @@ -327,6 +393,17 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, return 0; } +static bool label_nonzero(const struct ovs_key_ct_label *label) +{ + size_t i; + + for (i = 0; i < sizeof(*label); i++) + if (label->ct_label[i]) + return true; + + return false; +} + int ovs_ct_execute(struct net *net, struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_conntrack_info *info) @@ -351,9 +428,15 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, if (err) goto err; - if (info->mark.mask) + if (info->mark.mask) { err = ovs_ct_set_mark(skb, key, info->mark.value, info->mark.mask); + if (err) + goto err; + } + if (label_nonzero(&info->label.mask)) + err = ovs_ct_set_label(skb, key, &info->label.value, + &info->label.mask); err: skb_push(skb, nh_ofs); return err; @@ -366,6 +449,8 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { .maxlen = sizeof(u16) }, [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), .maxlen = sizeof(struct md_mark) }, + [OVS_CT_ATTR_LABEL] = { .minlen = sizeof(struct md_label), + .maxlen = sizeof(struct md_label) }, }; static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, @@ -408,6 +493,14 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, info->mark = *mark; break; } +#endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + case OVS_CT_ATTR_LABEL: { + struct md_label *label = nla_data(a); + + info->label = *label; + break; + } #endif default: OVS_NLERR(log, "Unknown conntrack attr (%d)", @@ -424,7 +517,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, return 0; } -bool ovs_ct_verify(enum ovs_key_attr attr) +bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) { if (attr == OVS_KEY_ATTR_CT_STATE) return true; @@ -434,6 +527,12 @@ bool ovs_ct_verify(enum ovs_key_attr attr) if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && attr == OVS_KEY_ATTR_CT_MARK) return true; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && + attr == OVS_KEY_ATTR_CT_LABEL) { + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + return ovs_net->xt_label; + } return false; } @@ -500,6 +599,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), &ct_info->mark)) return -EMSGSIZE; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && + nla_put(skb, OVS_CT_ATTR_LABEL, sizeof(ct_info->label), + &ct_info->label)) + return -EMSGSIZE; nla_nest_end(skb, start); @@ -513,3 +616,24 @@ void ovs_ct_free_action(const struct nlattr *a) if (ct_info->ct) nf_ct_put(ct_info->ct); } + +void ovs_ct_init(struct net *net) +{ + unsigned int n_bits = sizeof(struct ovs_key_ct_label) * BITS_PER_BYTE; + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + if (nf_connlabels_get(net, n_bits)) { + ovs_net->xt_label = false; + OVS_NLERR(true, "Failed to set connlabel length"); + } else { + ovs_net->xt_label = true; + } +} + +void ovs_ct_exit(struct net *net) +{ + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + if (ovs_net->xt_label) + nf_connlabels_put(net); +} diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 87b289c58978..3cb30667a7dc 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -20,7 +20,9 @@ struct ovs_conntrack_info; enum ovs_key_attr; #if defined(CONFIG_OPENVSWITCH_CONNTRACK) -bool ovs_ct_verify(enum ovs_key_attr attr); +void ovs_ct_init(struct net *); +void ovs_ct_exit(struct net *); +bool ovs_ct_verify(struct net *, enum ovs_key_attr attr); int ovs_ct_copy_action(struct net *, const struct nlattr *, const struct sw_flow_key *, struct sw_flow_actions **, bool log); @@ -35,7 +37,11 @@ void ovs_ct_free_action(const struct nlattr *a); #else #include -static inline bool ovs_ct_verify(int attr) +static inline void ovs_ct_init(struct net *net) { } + +static inline void ovs_ct_exit(struct net *net) { } + +static inline bool ovs_ct_verify(struct net *net, int attr) { return false; } @@ -66,6 +72,7 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb, key->ct.state = 0; key->ct.zone = 0; key->ct.mark = 0; + memset(&key->ct.label, 0, sizeof(key->ct.label)); } static inline int ovs_ct_put_key(const struct sw_flow_key *key, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 72e63726efa0..ec0f8d9cee73 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -599,8 +599,8 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(flow)) goto err_kfree_skb; - err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet, - &flow->key, log); + err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY], + packet, &flow->key, log); if (err) goto err_flow_free; @@ -947,7 +947,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) /* Extract key. */ ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], + error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; @@ -1118,7 +1118,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], + error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto error; @@ -1208,6 +1208,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; + struct net *net = sock_net(skb->sk); struct sw_flow_key key; struct sk_buff *reply; struct sw_flow *flow; @@ -1222,7 +1223,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, + err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); } else if (!ufid_present) { OVS_NLERR(log, @@ -1266,6 +1267,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; + struct net *net = sock_net(skb->sk); struct sw_flow_key key; struct sk_buff *reply; struct sw_flow *flow = NULL; @@ -1280,8 +1282,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, - log); + err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], + NULL, log); if (unlikely(err)) return err; } @@ -2237,6 +2239,7 @@ static int __net_init ovs_init_net(struct net *net) INIT_LIST_HEAD(&ovs_net->dps); INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); + ovs_ct_init(net); return 0; } @@ -2271,6 +2274,7 @@ static void __net_exit ovs_exit_net(struct net *dnet) struct net *net; LIST_HEAD(head); + ovs_ct_exit(dnet); ovs_lock(); list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index d24ba98024be..4e785ab88973 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -138,6 +138,9 @@ struct ovs_net { struct list_head dps; struct work_struct dp_notify_work; struct vport_net vport_net; + + /* Module reference for configuring conntrack. */ + bool xt_label; }; extern int ovs_net_id; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 376ca8738fd4..5a3195e538ce 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -715,7 +715,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, return key_extract(skb, key); } -int ovs_flow_key_extract_userspace(const struct nlattr *attr, +int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, struct sk_buff *skb, struct sw_flow_key *key, bool log) { @@ -724,7 +724,7 @@ int ovs_flow_key_extract_userspace(const struct nlattr *attr, memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); /* Extract metadata from netlink attributes. */ - err = ovs_nla_get_flow_metadata(attr, key, log); + err = ovs_nla_get_flow_metadata(net, attr, key, log); if (err) return err; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index e05e69711ce1..fe527d2dd4b7 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -116,6 +116,7 @@ struct sw_flow_key { u16 zone; u32 mark; u8 state; + struct ovs_key_ct_label label; } ct; } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ @@ -220,7 +221,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key); /* Extract key from packet coming from userspace. */ -int ovs_flow_key_extract_userspace(const struct nlattr *attr, +int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, struct sk_buff *skb, struct sw_flow_key *key, bool log); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index b17a4ec348f9..e22c5bfe8575 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -281,7 +281,7 @@ size_t ovs_key_attr_size(void) /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 25); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -293,6 +293,7 @@ size_t ovs_key_attr_size(void) + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABEL */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -345,6 +346,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) }, [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) }, }; static bool is_all_zero(const u8 *fp, size_t size) @@ -721,9 +723,9 @@ int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, egress_tun_info->options_len); } -static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, - const struct nlattr **a, bool is_mask, - bool log) +static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, + u64 *attrs, const struct nlattr **a, + bool is_mask, bool log) { if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); @@ -776,36 +778,45 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && - ovs_ct_verify(OVS_KEY_ATTR_CT_STATE)) { + ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]); SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); } if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) && - ovs_ct_verify(OVS_KEY_ATTR_CT_ZONE)) { + ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) { u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]); SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); } if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) && - ovs_ct_verify(OVS_KEY_ATTR_CT_MARK)) { + ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) { u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]); SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); } + if (*attrs & (1 << OVS_KEY_ATTR_CT_LABEL) && + ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABEL)) { + const struct ovs_key_ct_label *cl; + + cl = nla_data(a[OVS_KEY_ATTR_CT_LABEL]); + SW_FLOW_KEY_MEMCPY(match, ct.label, cl->ct_label, + sizeof(*cl), is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABEL); + } return 0; } -static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, - const struct nlattr **a, bool is_mask, - bool log) +static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, + u64 attrs, const struct nlattr **a, + bool is_mask, bool log) { int err; - err = metadata_from_nlattrs(match, &attrs, a, is_mask, log); + err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log); if (err) return err; @@ -1057,6 +1068,7 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val) * mask. In case the 'mask' is NULL, the flow is treated as exact match * flow. Otherwise, it is treated as a wildcarded flow, except the mask * does not include any don't care bit. + * @net: Used to determine per-namespace field support. * @match: receives the extracted flow match information. * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. The fields should of the packet that triggered the creation @@ -1067,7 +1079,7 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val) * probing for feature compatibility this should be passed in as false to * suppress unnecessary error logging. */ -int ovs_nla_get_match(struct sw_flow_match *match, +int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, const struct nlattr *nla_key, const struct nlattr *nla_mask, bool log) @@ -1117,7 +1129,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, key_attrs, a, false, log); + err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log); if (err) return err; @@ -1197,7 +1209,8 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log); + err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true, + log); if (err) goto free_newmask; } @@ -1278,7 +1291,7 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) * extracted from the packet itself. */ -int ovs_nla_get_flow_metadata(const struct nlattr *attr, +int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr, struct sw_flow_key *key, bool log) { @@ -1297,7 +1310,7 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr, memset(&key->ct, 0, sizeof(key->ct)); key->phy.in_port = DP_MAX_PORTS; - return metadata_from_nlattrs(&match, &attrs, a, false, log); + return metadata_from_nlattrs(net, &match, &attrs, a, false, log); } static int __ovs_nla_put_key(const struct sw_flow_key *swkey, @@ -1929,6 +1942,7 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_CT_MARK: + case OVS_KEY_ATTR_CT_LABEL: case OVS_KEY_ATTR_ETHERNET: break; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index c0b484b237c9..07878e22e783 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -45,15 +45,16 @@ void ovs_match_init(struct sw_flow_match *match, int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, int attr, bool is_mask, struct sk_buff *); -int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *, - bool log); +int ovs_nla_get_flow_metadata(struct net *, const struct nlattr *, + struct sw_flow_key *, bool log); int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb); int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb); int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb); -int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key, - const struct nlattr *mask, bool log); +int ovs_nla_get_match(struct net *, struct sw_flow_match *, + const struct nlattr *key, const struct nlattr *mask, + bool log); int ovs_nla_put_egress_tunnel_key(struct sk_buff *, const struct ip_tunnel_info *); -- cgit v1.2.3 From 7b85b4dff2caa5e52726093fd058f87bbc14f156 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 27 Aug 2015 15:25:46 -0700 Subject: openvswitch: Include ip6_fib.h. kbuild test robot reports that certain configurations will not automatically pick up on the "struct rt6_info" definition, so explicitly include the header for this structure. Fixes: 7f8a436 "openvswitch: Add conntrack action" Signed-off-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/openvswitch/actions.c') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 736a113a75c3..4487543806bb 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 3eedb41fb43461b5fde3f72fd00a7706f0b90103 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sat, 29 Aug 2015 17:44:06 -0700 Subject: openvswitch: Remove egress_tun_info. tun info is passed using skb-dst pointer. Now we have converted all vports to netdev based implementation so Now we can remove redundant pointer to tun-info from OVS_CB. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 5 ----- net/openvswitch/datapath.c | 1 - net/openvswitch/datapath.h | 3 --- net/openvswitch/vport-geneve.c | 3 +-- net/openvswitch/vport-gre.c | 3 +-- net/openvswitch/vport-vxlan.c | 3 +-- net/openvswitch/vport.c | 6 +++--- net/openvswitch/vport.h | 3 +-- 8 files changed, 7 insertions(+), 20 deletions(-) (limited to 'net/openvswitch/actions.c') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 4487543806bb..090d9e3a460c 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -896,10 +896,6 @@ static int execute_set_action(struct sk_buff *skb, skb_dst_drop(skb); dst_hold((struct dst_entry *)tun->tun_dst); skb_dst_set(skb, (struct dst_entry *)tun->tun_dst); - - /* FIXME: Remove when all vports have been converted */ - OVS_CB(skb)->egress_tun_info = &tun->tun_dst->u.tun_info; - return 0; } @@ -1159,7 +1155,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, int err; this_cpu_inc(exec_actions_level); - OVS_CB(skb)->egress_tun_info = NULL; err = do_execute_actions(dp, skb, key, acts->actions, acts->actions_len); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ec0f8d9cee73..60c2ab8e6bc3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -610,7 +610,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; rcu_assign_pointer(flow->sf_acts, acts); - OVS_CB(packet)->egress_tun_info = NULL; packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 4e785ab88973..da15fd3f3c34 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -94,15 +94,12 @@ struct datapath { /** * struct ovs_skb_cb - OVS data in skb CB - * @egress_tun_key: Tunnel information about this packet on egress path. - * NULL if the packet is not being tunneled. * @input_vport: The original vport packet came in on. This value is cached * when a packet is received by OVS. * @mru: The maximum received fragement size; 0 if the packet is not * fragmented. */ struct ovs_skb_cb { - struct ip_tunnel_info *egress_tun_info; struct vport *input_vport; u16 mru; }; diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index fa37c95f7339..24c56e56fedd 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -62,8 +62,7 @@ static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, return ovs_tunnel_get_egress_info(egress_tun_info, ovs_dp_get_net(vport->dp), - OVS_CB(skb)->egress_tun_info, - IPPROTO_UDP, skb->mark, sport, dport); + skb, IPPROTO_UDP, sport, dport); } static struct vport *geneve_tnl_create(const struct vport_parms *parms) diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 871801d2ac23..36c39843607e 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -89,8 +89,7 @@ static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, { return ovs_tunnel_get_egress_info(egress_tun_info, ovs_dp_get_net(vport->dp), - OVS_CB(skb)->egress_tun_info, - IPPROTO_GRE, skb->mark, 0, 0); + skb, IPPROTO_GRE, 0, 0); } static struct vport_ops ovs_gre_vport_ops = { diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 1e8b00a23a23..ed7b23f443ec 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -160,8 +160,7 @@ static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, src_port = udp_flow_src_port(net, skb, 0, 0, true); return ovs_tunnel_get_egress_info(egress_tun_info, net, - OVS_CB(skb)->egress_tun_info, - IPPROTO_UDP, skb->mark, + skb, IPPROTO_UDP, src_port, dst_port); } diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 40164037928e..e658439a0bdd 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -483,7 +483,6 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, u64_stats_update_end(&stats->syncp); OVS_CB(skb)->input_vport = vport; - OVS_CB(skb)->egress_tun_info = NULL; OVS_CB(skb)->mru = 0; /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_key_extract(tun_info, skb, &key); @@ -575,13 +574,14 @@ EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info, struct net *net, - const struct ip_tunnel_info *tun_info, + struct sk_buff *skb, u8 ipproto, - u32 skb_mark, __be16 tp_src, __be16 tp_dst) { + const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb); const struct ip_tunnel_key *tun_key; + u32 skb_mark = skb->mark; struct rtable *rt; struct flowi4 fl; diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 2f352446b6a8..a36aef22af7f 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -61,9 +61,8 @@ int ovs_vport_send(struct vport *, struct sk_buff *); int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info, struct net *net, - const struct ip_tunnel_info *tun_info, + struct sk_buff *, u8 ipproto, - u32 skb_mark, __be16 tp_src, __be16 tp_dst); int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, -- cgit v1.2.3 From 4c22279848c531fc7f555d463daf3d0df963bd41 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 30 Aug 2015 18:09:38 -0700 Subject: ip-tunnel: Use API to access tunnel metadata options. Currently tun-info options pointer is used in few cases to pass options around. But tunnel options can be accessed using ip_tunnel_info_opts() API without using the pointer. Following patch removes the redundant pointer and consistently make use of API. Signed-off-by: Pravin B Shelar Acked-by: Thomas Graf Reviewed-by: Jesse Gross Signed-off-by: David S. Miller --- drivers/net/geneve.c | 9 ++---- drivers/net/vxlan.c | 4 +-- include/net/dst_metadata.h | 31 +++++++++---------- include/net/ip_tunnels.h | 67 +++++++++++++++++++----------------------- net/ipv4/ip_tunnel_core.c | 2 -- net/openvswitch/actions.c | 8 +++-- net/openvswitch/datapath.c | 3 +- net/openvswitch/datapath.h | 3 +- net/openvswitch/flow.c | 7 +++-- net/openvswitch/flow_netlink.c | 27 +++++++---------- net/openvswitch/flow_netlink.h | 3 +- net/openvswitch/vport-geneve.c | 5 ++-- net/openvswitch/vport-gre.c | 5 ++-- net/openvswitch/vport-vxlan.c | 4 +-- net/openvswitch/vport.c | 27 +++++++++-------- net/openvswitch/vport.h | 7 +++-- 16 files changed, 100 insertions(+), 112 deletions(-) (limited to 'net/openvswitch/actions.c') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 68b0f0325fc7..da3259ce7c8d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -143,7 +143,6 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) if (ip_tunnel_collect_metadata() || gs->collect_md) { __be16 flags; - void *opts; flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | (gnvh->oam ? TUNNEL_OAM : 0) | @@ -154,11 +153,9 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) gnvh->opt_len * 4); if (!tun_dst) goto drop; - /* Update tunnel dst according to Geneve options. */ - opts = ip_tunnel_info_opts(&tun_dst->u.tun_info, - gnvh->opt_len * 4); - memcpy(opts, gnvh->options, gnvh->opt_len * 4); + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, + gnvh->options, gnvh->opt_len * 4); } else { /* Drop packets w/ critical options, * since we don't support any... @@ -663,7 +660,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) tunnel_id_to_vni(key->tun_id, vni); if (key->tun_flags & TUNNEL_GENEVE_OPT) - opts = ip_tunnel_info_opts(info, info->options_len); + opts = ip_tunnel_info_opts(info); udp_csum = !!(key->tun_flags & TUNNEL_CSUM); err = geneve_build_skb(rt, skb, key->tun_flags, vni, diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6c5269aea544..ce988fd01b34 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1271,7 +1271,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto drop; info = &tun_dst->u.tun_info; - md = ip_tunnel_info_opts(info, sizeof(*md)); + md = ip_tunnel_info_opts(info); } else { memset(md, 0, sizeof(*md)); } @@ -1948,7 +1948,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, tos = info->key.tos; if (info->options_len) - md = ip_tunnel_info_opts(info, sizeof(*md)); + md = ip_tunnel_info_opts(info); } else { md->gbp = skb->mark; } diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index d32f49cc621d..547ab8241593 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -48,21 +48,16 @@ static inline bool skb_valid_dst(const struct sk_buff *skb) struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); -static inline struct metadata_dst *tun_rx_dst(__be16 flags, - __be64 tunnel_id, int md_size) +static inline struct metadata_dst *tun_rx_dst(int md_size) { struct metadata_dst *tun_dst; - struct ip_tunnel_info *info; tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC); if (!tun_dst) return NULL; - info = &tun_dst->u.tun_info; - info->key.tun_flags = flags; - info->key.tun_id = tunnel_id; - info->key.tp_src = 0; - info->key.tp_dst = 0; + tun_dst->u.tun_info.options_len = 0; + tun_dst->u.tun_info.mode = 0; return tun_dst; } @@ -73,17 +68,14 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, { const struct iphdr *iph = ip_hdr(skb); struct metadata_dst *tun_dst; - struct ip_tunnel_info *info; - tun_dst = tun_rx_dst(flags, tunnel_id, md_size); + tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; - info = &tun_dst->u.tun_info; - info->key.u.ipv4.src = iph->saddr; - info->key.u.ipv4.dst = iph->daddr; - info->key.tos = iph->tos; - info->key.ttl = iph->ttl; + ip_tunnel_key_init(&tun_dst->u.tun_info.key, + iph->saddr, iph->daddr, iph->tos, iph->ttl, + 0, 0, tunnel_id, flags); return tun_dst; } @@ -96,16 +88,21 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, struct metadata_dst *tun_dst; struct ip_tunnel_info *info; - tun_dst = tun_rx_dst(flags, tunnel_id, md_size); + tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; info = &tun_dst->u.tun_info; + info->mode = IP_TUNNEL_INFO_IPV6; + info->key.tun_flags = flags; + info->key.tun_id = tunnel_id; + info->key.tp_src = 0; + info->key.tp_dst = 0; + info->key.u.ipv6.src = ip6h->saddr; info->key.u.ipv6.dst = ip6h->daddr; info->key.tos = ipv6_get_dsfield(ip6h); info->key.ttl = ip6h->hop_limit; - info->mode = IP_TUNNEL_INFO_IPV6; return tun_dst; } diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 2b4fa06e91bd..9a6a3ba888e8 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -57,7 +57,6 @@ struct ip_tunnel_key { struct ip_tunnel_info { struct ip_tunnel_key key; - const void *options; u8 options_len; u8 mode; }; @@ -180,49 +179,32 @@ int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op, int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, unsigned int num); -static inline void __ip_tunnel_info_init(struct ip_tunnel_info *tun_info, - __be32 saddr, __be32 daddr, - u8 tos, u8 ttl, - __be16 tp_src, __be16 tp_dst, - __be64 tun_id, __be16 tun_flags, - const void *opts, u8 opts_len) +static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, + __be32 saddr, __be32 daddr, + u8 tos, u8 ttl, + __be16 tp_src, __be16 tp_dst, + __be64 tun_id, __be16 tun_flags) { - tun_info->key.tun_id = tun_id; - tun_info->key.u.ipv4.src = saddr; - tun_info->key.u.ipv4.dst = daddr; - memset((unsigned char *)&tun_info->key + IP_TUNNEL_KEY_IPV4_PAD, + key->tun_id = tun_id; + key->u.ipv4.src = saddr; + key->u.ipv4.dst = daddr; + memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD, 0, IP_TUNNEL_KEY_IPV4_PAD_LEN); - tun_info->key.tos = tos; - tun_info->key.ttl = ttl; - tun_info->key.tun_flags = tun_flags; + key->tos = tos; + key->ttl = ttl; + key->tun_flags = tun_flags; /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of * the upper tunnel are used. * E.g: GRE over IPSEC, the tp_src and tp_port are zero. */ - tun_info->key.tp_src = tp_src; - tun_info->key.tp_dst = tp_dst; + key->tp_src = tp_src; + key->tp_dst = tp_dst; /* Clear struct padding. */ - if (sizeof(tun_info->key) != IP_TUNNEL_KEY_SIZE) - memset((unsigned char *)&tun_info->key + IP_TUNNEL_KEY_SIZE, - 0, sizeof(tun_info->key) - IP_TUNNEL_KEY_SIZE); - - tun_info->options = opts; - tun_info->options_len = opts_len; - - tun_info->mode = 0; -} - -static inline void ip_tunnel_info_init(struct ip_tunnel_info *tun_info, - const struct iphdr *iph, - __be16 tp_src, __be16 tp_dst, - __be64 tun_id, __be16 tun_flags, - const void *opts, u8 opts_len) -{ - __ip_tunnel_info_init(tun_info, iph->saddr, iph->daddr, - iph->tos, iph->ttl, tp_src, tp_dst, - tun_id, tun_flags, opts, opts_len); + if (sizeof(*key) != IP_TUNNEL_KEY_SIZE) + memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE, + 0, sizeof(*key) - IP_TUNNEL_KEY_SIZE); } static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info @@ -317,11 +299,24 @@ static inline void iptunnel_xmit_stats(int err, } } -static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info, size_t n) +static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info) { return info + 1; } +static inline void ip_tunnel_info_opts_get(void *to, + const struct ip_tunnel_info *info) +{ + memcpy(to, info + 1, info->options_len); +} + +static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, + const void *from, int len) +{ + memcpy(ip_tunnel_info_opts(info), from, len); + info->options_len = len; +} + static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) { return (struct ip_tunnel_info *)lwtstate->data; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 0c756ade1cf7..29ed6c5a5185 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -249,7 +249,6 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP_FLAGS]); tun_info->mode = IP_TUNNEL_INFO_TX; - tun_info->options = NULL; tun_info->options_len = 0; *ts = new_state; @@ -357,7 +356,6 @@ static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP6_FLAGS]); tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6; - tun_info->options = NULL; tun_info->options_len = 0; *ts = new_state; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 090d9e3a460c..315f5330b6e5 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -793,11 +793,13 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, if (vport) { int err; + upcall.egress_tun_info = &info; err = ovs_vport_get_egress_tun_info(vport, skb, - &info); - if (!err) - upcall.egress_tun_info = &info; + &upcall); + if (err) + upcall.egress_tun_info = NULL; } + break; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 60c2ab8e6bc3..6fbd2decb19e 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -491,7 +491,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (upcall_info->egress_tun_info) { nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); err = ovs_nla_put_egress_tunnel_key(user_skb, - upcall_info->egress_tun_info); + upcall_info->egress_tun_info, + upcall_info->egress_tun_opts); BUG_ON(err); nla_nest_end(user_skb, nla); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index c05b7d9e7bf2..f88038a99f44 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -116,7 +116,8 @@ struct ovs_skb_cb { * @mru: If not zero, Maximum received IP fragment size. */ struct dp_upcall_info { - const struct ip_tunnel_info *egress_tun_info; + struct ip_tunnel_info *egress_tun_info; + const void *egress_tun_opts; const struct nlattr *userdata; const struct nlattr *actions; int actions_len; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index bed8d09230cd..c8db44ab2ee7 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -702,12 +702,13 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, return -EINVAL; memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key)); - if (tun_info->options) { + if (tun_info->options_len) { BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * 8)) - 1 > sizeof(key->tun_opts)); - memcpy(TUN_METADATA_OPTS(key, tun_info->options_len), - tun_info->options, tun_info->options_len); + + ip_tunnel_info_opts_get(TUN_METADATA_OPTS(key, tun_info->options_len), + tun_info); key->tun_opts_len = tun_info->options_len; } else { key->tun_opts_len = 0; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index e22c5bfe8575..c92d6a262bc5 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -716,10 +716,11 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, } int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, - const struct ip_tunnel_info *egress_tun_info) + const struct ip_tunnel_info *egress_tun_info, + const void *egress_tun_opts) { return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key, - egress_tun_info->options, + egress_tun_opts, egress_tun_info->options_len); } @@ -1876,20 +1877,14 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, tun_info = &tun_dst->u.tun_info; tun_info->mode = IP_TUNNEL_INFO_TX; tun_info->key = key.tun_key; - tun_info->options_len = key.tun_opts_len; - - if (tun_info->options_len) { - /* We need to store the options in the action itself since - * everything else will go away after flow setup. We can append - * it to tun_info and then point there. - */ - memcpy((tun_info + 1), - TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len); - tun_info->options = (tun_info + 1); - } else { - tun_info->options = NULL; - } + /* We need to store the options in the action itself since + * everything else will go away after flow setup. We can append + * it to tun_info and then point there. + */ + ip_tunnel_info_opts_set(tun_info, + TUN_METADATA_OPTS(&key, key.tun_opts_len), + key.tun_opts_len); add_nested_action_end(*sfa, start); return err; @@ -2345,7 +2340,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) err = ipv4_tun_to_nlattr(skb, &tun_info->key, tun_info->options_len ? - tun_info->options : NULL, + ip_tunnel_info_opts(tun_info) : NULL, tun_info->options_len); if (err) return err; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 07878e22e783..6ca3f0baf449 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -56,7 +56,8 @@ int ovs_nla_get_match(struct net *, struct sw_flow_match *, const struct nlattr *key, const struct nlattr *mask, bool log); int ovs_nla_put_egress_tunnel_key(struct sk_buff *, - const struct ip_tunnel_info *); + const struct ip_tunnel_info *, + const void *egress_tun_opts); bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log); int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 24c56e56fedd..2735e9c4a3b8 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -53,15 +53,14 @@ static int geneve_get_options(const struct vport *vport, } static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct ip_tunnel_info *egress_tun_info) + struct dp_upcall_info *upcall) { struct geneve_port *geneve_port = geneve_vport(vport); struct net *net = ovs_dp_get_net(vport->dp); __be16 dport = htons(geneve_port->port_no); __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); - return ovs_tunnel_get_egress_info(egress_tun_info, - ovs_dp_get_net(vport->dp), + return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp), skb, IPPROTO_UDP, sport, dport); } diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 36c39843607e..4d24481669c9 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -85,10 +85,9 @@ static struct vport *gre_create(const struct vport_parms *parms) } static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct ip_tunnel_info *egress_tun_info) + struct dp_upcall_info *upcall) { - return ovs_tunnel_get_egress_info(egress_tun_info, - ovs_dp_get_net(vport->dp), + return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp), skb, IPPROTO_GRE, 0, 0); } diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index ed7b23f443ec..c11413d5075f 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -147,7 +147,7 @@ static struct vport *vxlan_create(const struct vport_parms *parms) } static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct ip_tunnel_info *egress_tun_info) + struct dp_upcall_info *upcall) { struct vxlan_dev *vxlan = netdev_priv(vport->dev); struct net *net = ovs_dp_get_net(vport->dp); @@ -159,7 +159,7 @@ static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, inet_get_local_port_range(net, &port_min, &port_max); src_port = udp_flow_src_port(net, skb, 0, 0, true); - return ovs_tunnel_get_egress_info(egress_tun_info, net, + return ovs_tunnel_get_egress_info(upcall, net, skb, IPPROTO_UDP, src_port, dst_port); } diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 1679dea7c6bc..dc81dc619aa2 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -487,13 +487,14 @@ void ovs_vport_deferred_free(struct vport *vport) } EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); -int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info, +int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, struct net *net, struct sk_buff *skb, u8 ipproto, __be16 tp_src, __be16 tp_dst) { + struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info; const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb); const struct ip_tunnel_key *tun_key; u32 skb_mark = skb->mark; @@ -520,26 +521,26 @@ int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info, /* Generate egress_tun_info based on tun_info, * saddr, tp_src and tp_dst */ - __ip_tunnel_info_init(egress_tun_info, - fl.saddr, tun_key->u.ipv4.dst, - tun_key->tos, - tun_key->ttl, - tp_src, tp_dst, - tun_key->tun_id, - tun_key->tun_flags, - tun_info->options, - tun_info->options_len); - + ip_tunnel_key_init(&egress_tun_info->key, + fl.saddr, tun_key->u.ipv4.dst, + tun_key->tos, + tun_key->ttl, + tp_src, tp_dst, + tun_key->tun_id, + tun_key->tun_flags); + egress_tun_info->options_len = tun_info->options_len; + egress_tun_info->mode = tun_info->mode; + upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info); return 0; } EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info); int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct ip_tunnel_info *info) + struct dp_upcall_info *upcall) { /* get_egress_tun_info() is only implemented on tunnel ports. */ if (unlikely(!vport->ops->get_egress_tun_info)) return -EINVAL; - return vport->ops->get_egress_tun_info(vport, skb, info); + return vport->ops->get_egress_tun_info(vport, skb, upcall); } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 4b6f4a5296c3..a413f3ae6a7b 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -53,14 +53,15 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids); int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); -int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info, +int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, struct net *net, struct sk_buff *, u8 ipproto, __be16 tp_src, __be16 tp_dst); + int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct ip_tunnel_info *info); + struct dp_upcall_info *upcall); /** * struct vport_portids - array of netlink portids of a vport. @@ -154,7 +155,7 @@ struct vport_ops { void (*send)(struct vport *, struct sk_buff *); int (*get_egress_tun_info)(struct vport *, struct sk_buff *, - struct ip_tunnel_info *); + struct dp_upcall_info *upcall); struct module *owner; struct list_head list; -- cgit v1.2.3