diff options
Diffstat (limited to 'net/openvswitch')
-rw-r--r-- | net/openvswitch/conntrack.c | 38 | ||||
-rw-r--r-- | net/openvswitch/datapath.c | 45 | ||||
-rw-r--r-- | net/openvswitch/datapath.h | 9 | ||||
-rw-r--r-- | net/openvswitch/flow.c | 1 | ||||
-rw-r--r-- | net/openvswitch/flow_netlink.c | 6 | ||||
-rw-r--r-- | net/openvswitch/flow_table.c | 287 | ||||
-rw-r--r-- | net/openvswitch/flow_table.h | 24 | ||||
-rw-r--r-- | net/openvswitch/vport.c | 3 |
8 files changed, 366 insertions, 47 deletions
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 4340f25fe390..98d393e70de3 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -276,10 +276,6 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) ovs_ct_update_key(skb, NULL, key, false, false); } -#define IN6_ADDR_INITIALIZER(ADDR) \ - { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \ - (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] } - int ovs_ct_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, struct sk_buff *skb) { @@ -301,24 +297,30 @@ int ovs_ct_put_key(const struct sw_flow_key *swkey, if (swkey->ct_orig_proto) { if (swkey->eth.type == htons(ETH_P_IP)) { - struct ovs_key_ct_tuple_ipv4 orig = { - output->ipv4.ct_orig.src, - output->ipv4.ct_orig.dst, - output->ct.orig_tp.src, - output->ct.orig_tp.dst, - output->ct_orig_proto, - }; + struct ovs_key_ct_tuple_ipv4 orig; + + memset(&orig, 0, sizeof(orig)); + orig.ipv4_src = output->ipv4.ct_orig.src; + orig.ipv4_dst = output->ipv4.ct_orig.dst; + orig.src_port = output->ct.orig_tp.src; + orig.dst_port = output->ct.orig_tp.dst; + orig.ipv4_proto = output->ct_orig_proto; + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, sizeof(orig), &orig)) return -EMSGSIZE; } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - struct ovs_key_ct_tuple_ipv6 orig = { - IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src), - IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst), - output->ct.orig_tp.src, - output->ct.orig_tp.dst, - output->ct_orig_proto, - }; + struct ovs_key_ct_tuple_ipv6 orig; + + memset(&orig, 0, sizeof(orig)); + memcpy(orig.ipv6_src, output->ipv6.ct_orig.src.s6_addr32, + sizeof(orig.ipv6_src)); + memcpy(orig.ipv6_dst, output->ipv6.ct_orig.dst.s6_addr32, + sizeof(orig.ipv6_dst)); + orig.src_port = output->ct.orig_tp.src; + orig.dst_port = output->ct.orig_tp.dst; + orig.ipv6_proto = output->ct_orig_proto; + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, sizeof(orig), &orig)) return -EMSGSIZE; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 94b024534987..42f8cc70bb2c 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -130,6 +130,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *, uint32_t cutlen); +static void ovs_dp_masks_rebalance(struct work_struct *work); + /* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { @@ -223,13 +225,14 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) struct dp_stats_percpu *stats; u64 *stats_counter; u32 n_mask_hit; + u32 n_cache_hit; int error; stats = this_cpu_ptr(dp->stats_percpu); /* Look up flow. */ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), - &n_mask_hit); + &n_mask_hit, &n_cache_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -260,6 +263,7 @@ out: u64_stats_update_begin(&stats->syncp); (*stats_counter)++; stats->n_mask_hit += n_mask_hit; + stats->n_cache_hit += n_cache_hit; u64_stats_update_end(&stats->syncp); } @@ -697,6 +701,7 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, stats->n_missed += local_stats.n_missed; stats->n_lost += local_stats.n_lost; mega_stats->n_mask_hit += local_stats.n_mask_hit; + mega_stats->n_cache_hit += local_stats.n_cache_hit; } } @@ -1493,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void) msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats)); msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats)); msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ + msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */ return msgsize; } @@ -1530,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) goto nla_put_failure; + if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE, + ovs_flow_tbl_masks_cache_size(&dp->table))) + goto nla_put_failure; + genlmsg_end(skb, ovs_header); return 0; @@ -1594,6 +1604,16 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) #endif } + if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) { + int err; + u32 cache_size; + + cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]); + err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size); + if (err) + return err; + } + dp->user_features = user_features; if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) @@ -1882,6 +1902,8 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, + [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0, + PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)), }; static const struct genl_ops dp_datapath_genl_ops[] = { @@ -2338,6 +2360,23 @@ out: return skb->len; } +static void ovs_dp_masks_rebalance(struct work_struct *work) +{ + struct ovs_net *ovs_net = container_of(work, struct ovs_net, + masks_rebalance.work); + struct datapath *dp; + + ovs_lock(); + + list_for_each_entry(dp, &ovs_net->dps, list_node) + ovs_flow_masks_rebalance(&dp->table); + + ovs_unlock(); + + schedule_delayed_work(&ovs_net->masks_rebalance, + msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); +} + static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, @@ -2432,6 +2471,9 @@ static int __net_init ovs_init_net(struct net *net) INIT_LIST_HEAD(&ovs_net->dps); INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); + INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance); + schedule_delayed_work(&ovs_net->masks_rebalance, + msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); return ovs_ct_init(net); } @@ -2486,6 +2528,7 @@ static void __net_exit ovs_exit_net(struct net *dnet) ovs_unlock(); + cancel_delayed_work_sync(&ovs_net->masks_rebalance); cancel_work_sync(&ovs_net->dp_notify_work); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 2016dd107939..38f7d3e66ca6 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -20,8 +20,9 @@ #include "meter.h" #include "vport-internal_dev.h" -#define DP_MAX_PORTS USHRT_MAX -#define DP_VPORT_HASH_BUCKETS 1024 +#define DP_MAX_PORTS USHRT_MAX +#define DP_VPORT_HASH_BUCKETS 1024 +#define DP_MASKS_REBALANCE_INTERVAL 4000 /** * struct dp_stats_percpu - per-cpu packet processing statistics for a given @@ -37,12 +38,15 @@ * @n_mask_hit: Number of masks looked up for flow match. * @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked * up per packet. + * @n_cache_hit: The number of received packets that had their mask found using + * the mask cache. */ struct dp_stats_percpu { u64 n_hit; u64 n_missed; u64 n_lost; u64 n_mask_hit; + u64 n_cache_hit; struct u64_stats_sync syncp; }; @@ -131,6 +135,7 @@ struct dp_upcall_info { struct ovs_net { struct list_head dps; struct work_struct dp_notify_work; + struct delayed_work masks_rebalance; #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_ct_limit_info *ct_limit_info; #endif diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9d375e74b607..03942c30d83e 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -890,6 +890,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, if (static_branch_unlikely(&tc_recirc_sharing_support)) { tc_ext = skb_ext_find(skb, TC_SKB_EXT); key->recirc_id = tc_ext ? tc_ext->chain : 0; + OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; } else { key->recirc_id = 0; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 79252d4887ff..9d3e50c4d29f 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1763,11 +1763,11 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val) * does not include any don't care bit. * @net: Used to determine per-namespace field support. * @match: receives the extracted flow match information. - * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * @nla_key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. The fields should of the packet that triggered the creation * of this flow. - * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink - * attribute specifies the mask field of the wildcarded flow. + * @nla_mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* + * Netlink attribute specifies the mask field of the wildcarded flow. * @log: Boolean to allow kernel error logging. Normally true, but when * probing for feature compatibility this should be passed in as false to * suppress unnecessary error logging. diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 2398d7238300..8c12675cbb67 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -29,6 +29,7 @@ #include <linux/icmp.h> #include <linux/icmpv6.h> #include <linux/rculist.h> +#include <linux/sort.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ndisc.h> @@ -37,8 +38,8 @@ #define MASK_ARRAY_SIZE_MIN 16 #define REHASH_INTERVAL (10 * 60 * HZ) +#define MC_DEFAULT_HASH_ENTRIES 256 #define MC_HASH_SHIFT 8 -#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT) #define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT) static struct kmem_cache *flow_cache; @@ -169,16 +170,70 @@ static struct table_instance *table_instance_alloc(int new_size) return ti; } +static void __mask_array_destroy(struct mask_array *ma) +{ + free_percpu(ma->masks_usage_cntr); + kfree(ma); +} + +static void mask_array_rcu_cb(struct rcu_head *rcu) +{ + struct mask_array *ma = container_of(rcu, struct mask_array, rcu); + + __mask_array_destroy(ma); +} + +static void tbl_mask_array_reset_counters(struct mask_array *ma) +{ + int i, cpu; + + /* As the per CPU counters are not atomic we can not go ahead and + * reset them from another CPU. To be able to still have an approximate + * zero based counter we store the value at reset, and subtract it + * later when processing. + */ + for (i = 0; i < ma->max; i++) { + ma->masks_usage_zero_cntr[i] = 0; + + for_each_possible_cpu(cpu) { + u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr, + cpu); + unsigned int start; + u64 counter; + + do { + start = u64_stats_fetch_begin_irq(&ma->syncp); + counter = usage_counters[i]; + } while (u64_stats_fetch_retry_irq(&ma->syncp, start)); + + ma->masks_usage_zero_cntr[i] += counter; + } + } +} + static struct mask_array *tbl_mask_array_alloc(int size) { struct mask_array *new; size = max(MASK_ARRAY_SIZE_MIN, size); new = kzalloc(sizeof(struct mask_array) + - sizeof(struct sw_flow_mask *) * size, GFP_KERNEL); + sizeof(struct sw_flow_mask *) * size + + sizeof(u64) * size, GFP_KERNEL); if (!new) return NULL; + new->masks_usage_zero_cntr = (u64 *)((u8 *)new + + sizeof(struct mask_array) + + sizeof(struct sw_flow_mask *) * + size); + + new->masks_usage_cntr = __alloc_percpu(sizeof(u64) * size, + __alignof__(u64)); + if (!new->masks_usage_cntr) { + kfree(new); + return NULL; + } + new->count = 0; new->max = size; @@ -202,10 +257,10 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size) if (ovsl_dereference(old->masks[i])) new->masks[new->count++] = old->masks[i]; } + call_rcu(&old->rcu, mask_array_rcu_cb); } rcu_assign_pointer(tbl->mask_array, new); - kfree_rcu(old, rcu); return 0; } @@ -223,6 +278,11 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl, return err; ma = ovsl_dereference(tbl->mask_array); + } else { + /* On every add or delete we need to reset the counters so + * every new mask gets a fair chance of being prioritized. + */ + tbl_mask_array_reset_counters(ma); } BUG_ON(ovsl_dereference(ma->masks[ma_count])); @@ -260,6 +320,9 @@ found: if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && ma_count <= (ma->max / 3)) tbl_mask_array_realloc(tbl, ma->max / 2); + else + tbl_mask_array_reset_counters(ma); + } /* Remove 'mask' from the mask list, if it is not needed any more. */ @@ -278,15 +341,79 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) } } +static void __mask_cache_destroy(struct mask_cache *mc) +{ + free_percpu(mc->mask_cache); + kfree(mc); +} + +static void mask_cache_rcu_cb(struct rcu_head *rcu) +{ + struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu); + + __mask_cache_destroy(mc); +} + +static struct mask_cache *tbl_mask_cache_alloc(u32 size) +{ + struct mask_cache_entry __percpu *cache = NULL; + struct mask_cache *new; + + /* Only allow size to be 0, or a power of 2, and does not exceed + * percpu allocation size. + */ + if ((!is_power_of_2(size) && size != 0) || + (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE) + return NULL; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + new->cache_size = size; + if (new->cache_size > 0) { + cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry), + new->cache_size), + __alignof__(struct mask_cache_entry)); + if (!cache) { + kfree(new); + return NULL; + } + } + + new->mask_cache = cache; + return new; +} +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size) +{ + struct mask_cache *mc = rcu_dereference(table->mask_cache); + struct mask_cache *new; + + if (size == mc->cache_size) + return 0; + + if ((!is_power_of_2(size) && size != 0) || + (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE) + return -EINVAL; + + new = tbl_mask_cache_alloc(size); + if (!new) + return -ENOMEM; + + rcu_assign_pointer(table->mask_cache, new); + call_rcu(&mc->rcu, mask_cache_rcu_cb); + + return 0; +} + int ovs_flow_tbl_init(struct flow_table *table) { struct table_instance *ti, *ufid_ti; + struct mask_cache *mc; struct mask_array *ma; - table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) * - MC_HASH_ENTRIES, - __alignof__(struct mask_cache_entry)); - if (!table->mask_cache) + mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES); + if (!mc) return -ENOMEM; ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN); @@ -304,6 +431,7 @@ int ovs_flow_tbl_init(struct flow_table *table) rcu_assign_pointer(table->ti, ti); rcu_assign_pointer(table->ufid_ti, ufid_ti); rcu_assign_pointer(table->mask_array, ma); + rcu_assign_pointer(table->mask_cache, mc); table->last_rehash = jiffies; table->count = 0; table->ufid_count = 0; @@ -312,9 +440,9 @@ int ovs_flow_tbl_init(struct flow_table *table) free_ti: __table_instance_destroy(ti); free_mask_array: - kfree(ma); + __mask_array_destroy(ma); free_mask_cache: - free_percpu(table->mask_cache); + __mask_cache_destroy(mc); return -ENOMEM; } @@ -390,9 +518,11 @@ void ovs_flow_tbl_destroy(struct flow_table *table) { struct table_instance *ti = rcu_dereference_raw(table->ti); struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); + struct mask_cache *mc = rcu_dereference_raw(table->mask_cache); + struct mask_array *ma = rcu_dereference_raw(table->mask_array); - free_percpu(table->mask_cache); - kfree_rcu(rcu_dereference_raw(table->mask_array), rcu); + call_rcu(&mc->rcu, mask_cache_rcu_cb); + call_rcu(&ma->rcu, mask_array_rcu_cb); table_instance_destroy(table, ti, ufid_ti, false); } @@ -604,8 +734,10 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, struct mask_array *ma, const struct sw_flow_key *key, u32 *n_mask_hit, + u32 *n_cache_hit, u32 *index) { + u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr); struct sw_flow *flow; struct sw_flow_mask *mask; int i; @@ -614,8 +746,13 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, mask = rcu_dereference_ovsl(ma->masks[*index]); if (mask) { flow = masked_flow_lookup(ti, key, mask, n_mask_hit); - if (flow) + if (flow) { + u64_stats_update_begin(&ma->syncp); + usage_counters[*index]++; + u64_stats_update_end(&ma->syncp); + (*n_cache_hit)++; return flow; + } } } @@ -631,6 +768,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, flow = masked_flow_lookup(ti, key, mask, n_mask_hit); if (flow) { /* Found */ *index = i; + u64_stats_update_begin(&ma->syncp); + usage_counters[*index]++; + u64_stats_update_end(&ma->syncp); return flow; } } @@ -648,8 +788,10 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, const struct sw_flow_key *key, u32 skb_hash, - u32 *n_mask_hit) + u32 *n_mask_hit, + u32 *n_cache_hit) { + struct mask_cache *mc = rcu_dereference(tbl->mask_cache); struct mask_array *ma = rcu_dereference(tbl->mask_array); struct table_instance *ti = rcu_dereference(tbl->ti); struct mask_cache_entry *entries, *ce; @@ -658,10 +800,13 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, int seg; *n_mask_hit = 0; - if (unlikely(!skb_hash)) { + *n_cache_hit = 0; + if (unlikely(!skb_hash || mc->cache_size == 0)) { u32 mask_index = 0; + u32 cache = 0; - return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index); + return flow_lookup(tbl, ti, ma, key, n_mask_hit, &cache, + &mask_index); } /* Pre and post recirulation flows usually have the same skb_hash @@ -672,17 +817,17 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, ce = NULL; hash = skb_hash; - entries = this_cpu_ptr(tbl->mask_cache); + entries = this_cpu_ptr(mc->mask_cache); /* Find the cache entry 'ce' to operate on. */ for (seg = 0; seg < MC_HASH_SEGS; seg++) { - int index = hash & (MC_HASH_ENTRIES - 1); + int index = hash & (mc->cache_size - 1); struct mask_cache_entry *e; e = &entries[index]; if (e->skb_hash == skb_hash) { flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, - &e->mask_index); + n_cache_hit, &e->mask_index); if (!flow) e->skb_hash = 0; return flow; @@ -695,10 +840,12 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, } /* Cache miss, do full lookup. */ - flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index); + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, n_cache_hit, + &ce->mask_index); if (flow) ce->skb_hash = skb_hash; + *n_cache_hit = 0; return flow; } @@ -708,9 +855,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); u32 __always_unused n_mask_hit; + u32 __always_unused n_cache_hit; u32 index = 0; - return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index); + return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index); } struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, @@ -787,6 +935,13 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table) return READ_ONCE(ma->count); } +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table) +{ + struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache); + + return READ_ONCE(mc->cache_size); +} + static struct table_instance *table_instance_expand(struct table_instance *ti, bool ufid) { @@ -934,6 +1089,98 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, return 0; } +static int compare_mask_and_count(const void *a, const void *b) +{ + const struct mask_count *mc_a = a; + const struct mask_count *mc_b = b; + + return (s64)mc_b->counter - (s64)mc_a->counter; +} + +/* Must be called with OVS mutex held. */ +void ovs_flow_masks_rebalance(struct flow_table *table) +{ + struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); + struct mask_count *masks_and_count; + struct mask_array *new; + int masks_entries = 0; + int i; + + /* Build array of all current entries with use counters. */ + masks_and_count = kmalloc_array(ma->max, sizeof(*masks_and_count), + GFP_KERNEL); + if (!masks_and_count) + return; + + for (i = 0; i < ma->max; i++) { + struct sw_flow_mask *mask; + unsigned int start; + int cpu; + + mask = rcu_dereference_ovsl(ma->masks[i]); + if (unlikely(!mask)) + break; + + masks_and_count[i].index = i; + masks_and_count[i].counter = 0; + + for_each_possible_cpu(cpu) { + u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr, + cpu); + u64 counter; + + do { + start = u64_stats_fetch_begin_irq(&ma->syncp); + counter = usage_counters[i]; + } while (u64_stats_fetch_retry_irq(&ma->syncp, start)); + + masks_and_count[i].counter += counter; + } + + /* Subtract the zero count value. */ + masks_and_count[i].counter -= ma->masks_usage_zero_cntr[i]; + + /* Rather than calling tbl_mask_array_reset_counters() + * below when no change is needed, do it inline here. + */ + ma->masks_usage_zero_cntr[i] += masks_and_count[i].counter; + } + + if (i == 0) + goto free_mask_entries; + + /* Sort the entries */ + masks_entries = i; + sort(masks_and_count, masks_entries, sizeof(*masks_and_count), + compare_mask_and_count, NULL); + + /* If the order is the same, nothing to do... */ + for (i = 0; i < masks_entries; i++) { + if (i != masks_and_count[i].index) + break; + } + if (i == masks_entries) + goto free_mask_entries; + + /* Rebuilt the new list in order of usage. */ + new = tbl_mask_array_alloc(ma->max); + if (!new) + goto free_mask_entries; + + for (i = 0; i < masks_entries; i++) { + int index = masks_and_count[i].index; + + if (ovsl_dereference(ma->masks[index])) + new->masks[new->count++] = ma->masks[index]; + } + + rcu_assign_pointer(table->mask_array, new); + call_rcu(&ma->rcu, mask_array_rcu_cb); + +free_mask_entries: + kfree(masks_and_count); +} + /* Initializes the flow module. * Returns zero if successful or a negative error code. */ int ovs_flow_init(void) diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 8a5cea6ae111..74ce48fecba9 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -27,9 +27,23 @@ struct mask_cache_entry { u32 mask_index; }; +struct mask_cache { + struct rcu_head rcu; + u32 cache_size; /* Must be ^2 value. */ + struct mask_cache_entry __percpu *mask_cache; +}; + +struct mask_count { + int index; + u64 counter; +}; + struct mask_array { struct rcu_head rcu; int count, max; + u64 __percpu *masks_usage_cntr; + u64 *masks_usage_zero_cntr; + struct u64_stats_sync syncp; struct sw_flow_mask __rcu *masks[]; }; @@ -45,7 +59,7 @@ struct table_instance { struct flow_table { struct table_instance __rcu *ti; struct table_instance __rcu *ufid_ti; - struct mask_cache_entry __percpu *mask_cache; + struct mask_cache __rcu *mask_cache; struct mask_array __rcu *mask_array; unsigned long last_rehash; unsigned int count; @@ -69,12 +83,15 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, const struct sw_flow_mask *mask); void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); int ovs_flow_tbl_num_masks(const struct flow_table *table); +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table); +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size); struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, u32 *bucket, u32 *idx); struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, const struct sw_flow_key *, u32 skb_hash, - u32 *n_mask_hit); + u32 *n_mask_hit, + u32 *n_cache_hit); struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, @@ -86,4 +103,7 @@ bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, bool full, const struct sw_flow_mask *mask); + +void ovs_flow_masks_rebalance(struct flow_table *table); + #endif /* flow_table.h */ diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 47febb4504f0..0d44c5c013fa 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -87,6 +87,7 @@ EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister); /** * ovs_vport_locate - find a port that has already been created * + * @net: network namespace * @name: name of port to find * * Must be called with ovs or RCU read lock. @@ -418,7 +419,7 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) * * @vport: vport that received the packet * @skb: skb that was received - * @tun_key: tunnel (if any) that carried packet + * @tun_info: tunnel (if any) that carried packet * * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. |