diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/fib_frontend.c | 13 | ||||
-rw-r--r-- | net/ipv4/fib_rules.c | 3 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 92 | ||||
-rw-r--r-- | net/switchdev/switchdev.c | 161 |
4 files changed, 268 insertions, 1 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 220c4b4af4cf..e067770235bf 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -144,6 +144,19 @@ static void fib_flush(struct net *net) rt_cache_flush(net); } +void fib_flush_external(struct net *net) +{ + struct fib_table *tb; + struct hlist_head *head; + unsigned int h; + + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + head = &net->ipv4.fib_table_hash[h]; + hlist_for_each_entry(tb, head, tb_hlist) + fib_table_flush_external(tb); + } +} + /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index d3db718be51d..190d0d00d744 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -209,6 +209,8 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; + fib_flush_external(rule->fr_net); + err = 0; errout: return err; @@ -224,6 +226,7 @@ static void fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; + fib_flush_external(rule->fr_net); } static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index fae34ad4bb1a..6544f1a0cfa1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -79,6 +79,7 @@ #include <net/tcp.h> #include <net/sock.h> #include <net/ip_fib.h> +#include <net/switchdev.h> #include "fib_lookup.h" #define MAX_STAT_DEPTH 32 @@ -1135,7 +1136,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; + err = netdev_switch_fib_ipv4_add(key, plen, fi, + new_fa->fa_tos, + cfg->fc_type, + tb->tb_id); + if (err) { + netdev_switch_fib_ipv4_abort(fi); + kmem_cache_free(fn_alias_kmem, new_fa); + goto out; + } + hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); + alias_free_mem_rcu(fa); fib_release_info(fi_drop); @@ -1171,10 +1183,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = 0; new_fa->fa_slen = slen; + /* (Optionally) offload fib entry to switch hardware. */ + err = netdev_switch_fib_ipv4_add(key, plen, fi, tos, + cfg->fc_type, tb->tb_id); + if (err) { + netdev_switch_fib_ipv4_abort(fi); + goto out_free_new_fa; + } + /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); if (err) - goto out_free_new_fa; + goto out_sw_fib_del; if (!plen) tb->tb_num_default++; @@ -1185,6 +1205,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) succeeded: return 0; +out_sw_fib_del: + netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1456,6 +1478,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if (!fa_to_delete) return -ESRCH; + netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, + cfg->fc_type, tb->tb_id); + rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1536,6 +1561,67 @@ found: return n; } +/* Caller must hold RTNL */ +void fib_table_flush_external(struct fib_table *tb) +{ + struct trie *t = (struct trie *)tb->tb_data; + struct fib_alias *fa; + struct tnode *n, *pn; + unsigned long cindex; + unsigned char slen; + int found = 0; + + n = rcu_dereference(t->trie); + if (!n) + return; + + pn = NULL; + cindex = 0; + + while (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; +backtrace: + /* walk trie in reverse order */ + do { + while (!(cindex--)) { + t_key pkey = pn->key; + + n = pn; + pn = node_parent(n); + + /* resize completed node */ + resize(t, n); + + /* if we got the root we are done */ + if (!pn) + return; + + cindex = get_index(pkey, pn); + } + + /* grab the next available node */ + n = tnode_get_child(pn, cindex); + } while (!n); + } + + hlist_for_each_entry(fa, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + + if (fi && (fi->fib_flags & RTNH_F_EXTERNAL)) { + netdev_switch_fib_ipv4_del(n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, + fa->fa_type, tb->tb_id); + } + } + + /* if trie is leaf only loop is completed */ + if (pn) + goto backtrace; +} + /* Caller must hold RTNL. */ int fib_table_flush(struct fib_table *tb) { @@ -1589,6 +1675,10 @@ backtrace: struct fib_info *fi = fa->fa_info; if (fi && (fi->fib_flags & RTNH_F_DEAD)) { + netdev_switch_fib_ipv4_del(n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, + fa->fa_type, tb->tb_id); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8c1e558db118..f4fd575aa2a3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -14,6 +14,7 @@ #include <linux/mutex.h> #include <linux/notifier.h> #include <linux/netdevice.h> +#include <net/ip_fib.h> #include <net/switchdev.h> /** @@ -225,3 +226,163 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, return ret; } EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink); + +static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct net_device *lower_dev; + struct net_device *port_dev; + struct list_head *iter; + + /* Recusively search down until we find a sw port dev. + * (A sw port dev supports ndo_switch_parent_id_get). + */ + + if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD && + ops->ndo_switch_parent_id_get) + return dev; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + port_dev = netdev_switch_get_lowest_dev(lower_dev); + if (port_dev) + return port_dev; + } + + return NULL; +} + +static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) +{ + struct netdev_phys_item_id psid; + struct netdev_phys_item_id prev_psid; + struct net_device *dev = NULL; + int nhsel; + + /* For this route, all nexthop devs must be on the same switch. */ + + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; + + if (!nh->nh_dev) + return NULL; + + dev = netdev_switch_get_lowest_dev(nh->nh_dev); + if (!dev) + return NULL; + + if (netdev_switch_parent_id_get(dev, &psid)) + return NULL; + + if (nhsel > 0) { + if (prev_psid.id_len != psid.id_len) + return NULL; + if (memcmp(prev_psid.id, psid.id, psid.id_len)) + return NULL; + } + + prev_psid = psid; + } + + return dev; +} + +/** + * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch + * + * @dst: route's IPv4 destination address + * @dst_len: destination address length (prefix length) + * @fi: route FIB info structure + * @tos: route TOS + * @type: route type + * @tb_id: route table ID + * + * Add IPv4 route entry to switch device. + */ +int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) +{ + struct net_device *dev; + const struct net_device_ops *ops; + int err = 0; + + /* Don't offload route if using custom ip rules or if + * IPv4 FIB offloading has been disabled completely. + */ + + if (fi->fib_net->ipv4.fib_has_custom_rules | + fi->fib_net->ipv4.fib_offload_disabled) + return 0; + + dev = netdev_switch_get_dev_by_nhs(fi); + if (!dev) + return 0; + ops = dev->netdev_ops; + + if (ops->ndo_switch_fib_ipv4_add) { + err = ops->ndo_switch_fib_ipv4_add(dev, htonl(dst), dst_len, + fi, tos, type, tb_id); + if (!err) + fi->fib_flags |= RTNH_F_EXTERNAL; + } + + return err; +} +EXPORT_SYMBOL(netdev_switch_fib_ipv4_add); + +/** + * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch + * + * @dst: route's IPv4 destination address + * @dst_len: destination address length (prefix length) + * @fi: route FIB info structure + * @tos: route TOS + * @type: route type + * @tb_id: route table ID + * + * Delete IPv4 route entry from switch device. + */ +int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) +{ + struct net_device *dev; + const struct net_device_ops *ops; + int err = 0; + + if (!(fi->fib_flags & RTNH_F_EXTERNAL)) + return 0; + + dev = netdev_switch_get_dev_by_nhs(fi); + if (!dev) + return 0; + ops = dev->netdev_ops; + + if (ops->ndo_switch_fib_ipv4_del) { + err = ops->ndo_switch_fib_ipv4_del(dev, htonl(dst), dst_len, + fi, tos, type, tb_id); + if (!err) + fi->fib_flags &= ~RTNH_F_EXTERNAL; + } + + return err; +} +EXPORT_SYMBOL(netdev_switch_fib_ipv4_del); + +/** + * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation + * + * @fi: route FIB info structure + */ +void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +{ + /* There was a problem installing this route to the offload + * device. For now, until we come up with more refined + * policy handling, abruptly end IPv4 fib offloading for + * for entire net by flushing offload device(s) of all + * IPv4 routes, and mark IPv4 fib offloading broken from + * this point forward. + */ + + fib_flush_external(fi->fib_net); + fi->fib_net->ipv4.fib_offload_disabled = true; +} +EXPORT_SYMBOL(netdev_switch_fib_ipv4_abort); |