From 798c166173ffb50128993641fcf791df51bed48e Mon Sep 17 00:00:00 2001 From: andy zhou Date: Mon, 20 Mar 2017 16:32:29 -0700 Subject: openvswitch: Optimize sample action for the clone use cases With the introduction of open flow 'clone' action, the OVS user space can now translate the 'clone' action into kernel datapath 'sample' action, with 100% probability, to ensure that the clone semantics, which is that the packet seen by the clone action is the same as the packet seen by the action after clone, is faithfully carried out in the datapath. While the sample action in the datpath has the matching semantics, its implementation is only optimized for its original use. Specifically, there are two limitation: First, there is a 3 level of nesting restriction, enforced at the flow downloading time. This limit turns out to be too restrictive for the 'clone' use case. Second, the implementation avoid recursive call only if the sample action list has a single userspace action. The main optimization implemented in this series removes the static nesting limit check, instead, implement the run time recursion limit check, and recursion avoidance similar to that of the 'recirc' action. This optimization solve both #1 and #2 issues above. One related optimization attempts to avoid copying flow key as long as the actions enclosed does not change the flow key. The detection is performed only once at the flow downloading time. Another related optimization is to rewrite the action list at flow downloading time in order to save the fast path from parsing the sample action list in its original form repeatedly. Signed-off-by: Andy Zhou Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 7f41f7d0000f..66d1c3ccfd8e 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -578,10 +578,25 @@ enum ovs_sample_attr { OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ __OVS_SAMPLE_ATTR_MAX, + +#ifdef __KERNEL__ + OVS_SAMPLE_ATTR_ARG /* struct sample_arg */ +#endif }; #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) +#ifdef __KERNEL__ +struct sample_arg { + bool exec; /* When true, actions in sample will not + * change flow keys. False otherwise. + */ + u32 probability; /* Same value as + * 'OVS_SAMPLE_ATTR_PROBABILITY'. + */ +}; +#endif + /** * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION -- cgit v1.2.3 From 120645513f55a4ac5543120d9e79925d30a0156f Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 21 Apr 2017 16:48:06 -0700 Subject: openvswitch: Add eventmask support to CT action. Add a new optional conntrack action attribute OVS_CT_ATTR_EVENTMASK, which can be used in conjunction with the commit flag (OVS_CT_ATTR_COMMIT) to set the mask of bits specifying which conntrack events (IPCT_*) should be delivered via the Netfilter netlink multicast groups. Default behavior depends on the system configuration, but typically a lot of events are delivered. This can be very chatty for the NFNLGRP_CONNTRACK_UPDATE group, even if only some types of events are of interest. Netfilter core init_conntrack() adds the event cache extension, so we only need to set the ctmask value. However, if the system is configured without support for events, the setting will be skipped due to extension not being found. Signed-off-by: Jarno Rajahalme Reviewed-by: Greg Rose Acked-by: Joe Stringer Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 12 ++++++++++++ net/openvswitch/conntrack.c | 27 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 66d1c3ccfd8e..61b7d36dfe34 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -693,6 +693,17 @@ struct ovs_action_hash { * nothing if the connection is already committed will check that the current * packet is in conntrack entry's original direction. If directionality does * not match, will delete the existing conntrack entry and commit a new one. + * @OVS_CT_ATTR_EVENTMASK: Mask of bits indicating which conntrack event types + * (enum ip_conntrack_events IPCT_*) should be reported. For any bit set to + * zero, the corresponding event type is not generated. Default behavior + * depends on system configuration, but typically all event types are + * generated, hence listening on NFNLGRP_CONNTRACK_UPDATE events may get a lot + * of events. Explicitly passing this attribute allows limiting the updates + * received to the events of interest. The bit 1 << IPCT_NEW, 1 << + * IPCT_RELATED, and 1 << IPCT_DESTROY must be set to ones for those events to + * be received on NFNLGRP_CONNTRACK_NEW and NFNLGRP_CONNTRACK_DESTROY groups, + * respectively. Remaining bits control the changes for which an event is + * delivered on the NFNLGRP_CONNTRACK_UPDATE group. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, @@ -704,6 +715,7 @@ enum ovs_ct_attr { related connections. */ OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */ OVS_CT_ATTR_FORCE_COMMIT, /* No argument */ + OVS_CT_ATTR_EVENTMASK, /* u32 mask of IPCT_* events. */ __OVS_CT_ATTR_MAX }; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 58de4c2da673..4f7c3b5c080b 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -66,7 +66,9 @@ struct ovs_conntrack_info { u8 commit : 1; u8 nat : 3; /* enum ovs_ct_nat */ u8 force : 1; + u8 have_eventmask : 1; u16 family; + u32 eventmask; /* Mask of 1 << IPCT_*. */ struct md_mark mark; struct md_labels labels; #ifdef CONFIG_NF_NAT_NEEDED @@ -1007,6 +1009,20 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, if (!ct) return 0; + /* Set the conntrack event mask if given. NEW and DELETE events have + * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener + * typically would receive many kinds of updates. Setting the event + * mask allows those events to be filtered. The set event mask will + * remain in effect for the lifetime of the connection unless changed + * by a further CT action with both the commit flag and the eventmask + * option. */ + if (info->have_eventmask) { + struct nf_conntrack_ecache *cache = nf_ct_ecache_find(ct); + + if (cache) + cache->ctmask = info->eventmask; + } + /* Apply changes before confirming the connection so that the initial * conntrack NEW netlink event carries the values given in the CT * action. @@ -1238,6 +1254,8 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { /* NAT length is checked when parsing the nested attributes. */ [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, #endif + [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32), + .maxlen = sizeof(u32) }, }; static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, @@ -1316,6 +1334,11 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, break; } #endif + case OVS_CT_ATTR_EVENTMASK: + info->have_eventmask = true; + info->eventmask = nla_get_u32(a); + break; + default: OVS_NLERR(log, "Unknown conntrack attr (%d)", type); @@ -1515,6 +1538,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, ct_info->helper->name)) return -EMSGSIZE; } + if (ct_info->have_eventmask && + nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask)) + return -EMSGSIZE; + #ifdef CONFIG_NF_NAT_NEEDED if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) return -EMSGSIZE; -- cgit v1.2.3