diff options
| author | Thadeu Lima de Souza Cascardo <cascardo@redhat.com> | 2016-09-16 01:11:53 +0300 | 
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2016-09-19 05:14:01 +0300 | 
| commit | db74a3335e0f645e3139c80bcfc90feb01d8e304 (patch) | |
| tree | 5adec60da4faa80b188cc0b45639224c0a197db4 /net/openvswitch/flow.c | |
| parent | 40773966ccf1985a1b2bb570a03cbeaf1cbd4e00 (diff) | |
| download | linux-db74a3335e0f645e3139c80bcfc90feb01d8e304.tar.xz | |
openvswitch: use percpu flow stats
Instead of using flow stats per NUMA node, use it per CPU. When using
megaflows, the stats lock can be a bottleneck in scalability.
On a E5-2690 12-core system, usual throughput went from ~4Mpps to
~15Mpps when forwarding between two 40GbE ports with a single flow
configured on the datapath.
This has been tested on a system with possible CPUs 0-7,16-23. After
module removal, there were no corruption on the slab cache.
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Cc: pravin shelar <pshelar@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/openvswitch/flow.c')
| -rw-r--r-- | net/openvswitch/flow.c | 42 | 
1 files changed, 22 insertions, 20 deletions
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 5b80612df182..0fa45439def1 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -29,6 +29,7 @@  #include <linux/module.h>  #include <linux/in.h>  #include <linux/rcupdate.h> +#include <linux/cpumask.h>  #include <linux/if_arp.h>  #include <linux/ip.h>  #include <linux/ipv6.h> @@ -72,32 +73,33 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,  {  	struct flow_stats *stats;  	int node = numa_node_id(); +	int cpu = smp_processor_id();  	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); -	stats = rcu_dereference(flow->stats[node]); +	stats = rcu_dereference(flow->stats[cpu]); -	/* Check if already have node-specific stats. */ +	/* Check if already have CPU-specific stats. */  	if (likely(stats)) {  		spin_lock(&stats->lock);  		/* Mark if we write on the pre-allocated stats. */ -		if (node == 0 && unlikely(flow->stats_last_writer != node)) -			flow->stats_last_writer = node; +		if (cpu == 0 && unlikely(flow->stats_last_writer != cpu)) +			flow->stats_last_writer = cpu;  	} else {  		stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */  		spin_lock(&stats->lock); -		/* If the current NUMA-node is the only writer on the +		/* If the current CPU is the only writer on the  		 * pre-allocated stats keep using them.  		 */ -		if (unlikely(flow->stats_last_writer != node)) { +		if (unlikely(flow->stats_last_writer != cpu)) {  			/* A previous locker may have already allocated the -			 * stats, so we need to check again.  If node-specific +			 * stats, so we need to check again.  If CPU-specific  			 * stats were already allocated, we update the pre-  			 * allocated stats as we have already locked them.  			 */ -			if (likely(flow->stats_last_writer != NUMA_NO_NODE) -			    && likely(!rcu_access_pointer(flow->stats[node]))) { -				/* Try to allocate node-specific stats. */ +			if (likely(flow->stats_last_writer != -1) && +			    likely(!rcu_access_pointer(flow->stats[cpu]))) { +				/* Try to allocate CPU-specific stats. */  				struct flow_stats *new_stats;  				new_stats = @@ -114,12 +116,12 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,  					new_stats->tcp_flags = tcp_flags;  					spin_lock_init(&new_stats->lock); -					rcu_assign_pointer(flow->stats[node], +					rcu_assign_pointer(flow->stats[cpu],  							   new_stats);  					goto unlock;  				}  			} -			flow->stats_last_writer = node; +			flow->stats_last_writer = cpu;  		}  	} @@ -136,15 +138,15 @@ void ovs_flow_stats_get(const struct sw_flow *flow,  			struct ovs_flow_stats *ovs_stats,  			unsigned long *used, __be16 *tcp_flags)  { -	int node; +	int cpu;  	*used = 0;  	*tcp_flags = 0;  	memset(ovs_stats, 0, sizeof(*ovs_stats)); -	/* We open code this to make sure node 0 is always considered */ -	for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { -		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); +	/* We open code this to make sure cpu 0 is always considered */ +	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { +		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);  		if (stats) {  			/* Local CPU may write on non-local stats, so we must @@ -164,11 +166,11 @@ void ovs_flow_stats_get(const struct sw_flow *flow,  /* Called with ovs_mutex. */  void ovs_flow_stats_clear(struct sw_flow *flow)  { -	int node; +	int cpu; -	/* We open code this to make sure node 0 is always considered */ -	for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { -		struct flow_stats *stats = ovsl_dereference(flow->stats[node]); +	/* We open code this to make sure cpu 0 is always considered */ +	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { +		struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);  		if (stats) {  			spin_lock_bh(&stats->lock);  | 
