From db74a3335e0f645e3139c80bcfc90feb01d8e304 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 15 Sep 2016 19:11:53 -0300 Subject: openvswitch: use percpu flow stats Instead of using flow stats per NUMA node, use it per CPU. When using megaflows, the stats lock can be a bottleneck in scalability. On a E5-2690 12-core system, usual throughput went from ~4Mpps to ~15Mpps when forwarding between two 40GbE ports with a single flow configured on the datapath. This has been tested on a system with possible CPUs 0-7,16-23. After module removal, there were no corruption on the slab cache. Signed-off-by: Thadeu Lima de Souza Cascardo Cc: pravin shelar Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'net/openvswitch/flow_table.c') diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 957a3c31dbb0..ea7a8073fa02 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -79,17 +80,12 @@ struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; struct flow_stats *stats; - int node; - flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL); if (!flow) return ERR_PTR(-ENOMEM); - flow->sf_acts = NULL; - flow->mask = NULL; - flow->id.unmasked_key = NULL; - flow->id.ufid_len = 0; - flow->stats_last_writer = NUMA_NO_NODE; + flow->stats_last_writer = -1; /* Initialize the default stat node. */ stats = kmem_cache_alloc_node(flow_stats_cache, @@ -102,10 +98,6 @@ struct sw_flow *ovs_flow_alloc(void) RCU_INIT_POINTER(flow->stats[0], stats); - for_each_node(node) - if (node != 0) - RCU_INIT_POINTER(flow->stats[node], NULL); - return flow; err: kmem_cache_free(flow_cache, flow); @@ -142,17 +134,17 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { - int node; + int cpu; if (ovs_identifier_is_key(&flow->id)) kfree(flow->id.unmasked_key); if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); - /* We open code this to make sure node 0 is always considered */ - for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) - if (node != 0 && flow->stats[node]) + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) + if (flow->stats[cpu]) kmem_cache_free(flow_stats_cache, - (struct flow_stats __force *)flow->stats[node]); + (struct flow_stats __force *)flow->stats[cpu]); kmem_cache_free(flow_cache, flow); } @@ -757,7 +749,7 @@ int ovs_flow_init(void) BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) - + (nr_node_ids + + (nr_cpu_ids * sizeof(struct flow_stats *)), 0, 0, NULL); if (flow_cache == NULL) -- cgit v1.2.3