summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/rocker/rocker.c483
-rw-r--r--include/linux/netdevice.h22
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/switchdev.h24
-rw-r--r--include/uapi/linux/rtnetlink.h1
-rw-r--r--net/ipv4/fib_frontend.c13
-rw-r--r--net/ipv4/fib_rules.c3
-rw-r--r--net/ipv4/fib_trie.c92
-rw-r--r--net/switchdev/switchdev.c161
10 files changed, 754 insertions, 48 deletions
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index a5d1e6ea7d58..d04d3b374e31 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -32,6 +32,9 @@
#include <linux/bitops.h>
#include <net/switchdev.h>
#include <net/rtnetlink.h>
+#include <net/ip_fib.h>
+#include <net/netevent.h>
+#include <net/arp.h>
#include <asm-generic/io-64-nonatomic-lo-hi.h>
#include <generated/utsrelease.h>
@@ -111,9 +114,10 @@ struct rocker_flow_tbl_key {
struct rocker_flow_tbl_entry {
struct hlist_node entry;
- u32 ref_count;
+ u32 cmd;
u64 cookie;
struct rocker_flow_tbl_key key;
+ size_t key_len;
u32 key_crc32; /* key */
};
@@ -161,6 +165,16 @@ struct rocker_internal_vlan_tbl_entry {
__be16 vlan_id;
};
+struct rocker_neigh_tbl_entry {
+ struct hlist_node entry;
+ __be32 ip_addr; /* key */
+ struct net_device *dev;
+ u32 ref_count;
+ u32 index;
+ u8 eth_dst[ETH_ALEN];
+ bool ttl_check;
+};
+
struct rocker_desc_info {
char *data; /* mapped */
size_t data_size;
@@ -234,6 +248,9 @@ struct rocker {
unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN];
DECLARE_HASHTABLE(internal_vlan_tbl, 8);
spinlock_t internal_vlan_tbl_lock;
+ DECLARE_HASHTABLE(neigh_tbl, 16);
+ spinlock_t neigh_tbl_lock;
+ u32 neigh_tbl_next_index;
};
static const u8 zero_mac[ETH_ALEN] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
@@ -256,7 +273,6 @@ enum {
ROCKER_PRIORITY_VLAN = 1,
ROCKER_PRIORITY_TERM_MAC_UCAST = 0,
ROCKER_PRIORITY_TERM_MAC_MCAST = 1,
- ROCKER_PRIORITY_UNICAST_ROUTING = 1,
ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_EXACT = 1,
ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_WILD = 2,
ROCKER_PRIORITY_BRIDGING_VLAN = 3,
@@ -1940,8 +1956,7 @@ static int rocker_cmd_flow_tbl_add(struct rocker *rocker,
struct rocker_tlv *cmd_info;
int err = 0;
- if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
- ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD))
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd))
return -EMSGSIZE;
cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
if (!cmd_info)
@@ -1998,8 +2013,7 @@ static int rocker_cmd_flow_tbl_del(struct rocker *rocker,
const struct rocker_flow_tbl_entry *entry = priv;
struct rocker_tlv *cmd_info;
- if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
- ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL))
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd))
return -EMSGSIZE;
cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
if (!cmd_info)
@@ -2168,9 +2182,9 @@ static int rocker_cmd_group_tbl_del(struct rocker *rocker,
return 0;
}
-/*****************************************
- * Flow, group, FDB, internal VLAN tables
- *****************************************/
+/***************************************************
+ * Flow, group, FDB, internal VLAN and neigh tables
+ ***************************************************/
static int rocker_init_tbls(struct rocker *rocker)
{
@@ -2186,6 +2200,9 @@ static int rocker_init_tbls(struct rocker *rocker)
hash_init(rocker->internal_vlan_tbl);
spin_lock_init(&rocker->internal_vlan_tbl_lock);
+ hash_init(rocker->neigh_tbl);
+ spin_lock_init(&rocker->neigh_tbl_lock);
+
return 0;
}
@@ -2196,6 +2213,7 @@ static void rocker_free_tbls(struct rocker *rocker)
struct rocker_group_tbl_entry *group_entry;
struct rocker_fdb_tbl_entry *fdb_entry;
struct rocker_internal_vlan_tbl_entry *internal_vlan_entry;
+ struct rocker_neigh_tbl_entry *neigh_entry;
struct hlist_node *tmp;
int bkt;
@@ -2219,16 +2237,22 @@ static void rocker_free_tbls(struct rocker *rocker)
tmp, internal_vlan_entry, entry)
hash_del(&internal_vlan_entry->entry);
spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, flags);
+
+ spin_lock_irqsave(&rocker->neigh_tbl_lock, flags);
+ hash_for_each_safe(rocker->neigh_tbl, bkt, tmp, neigh_entry, entry)
+ hash_del(&neigh_entry->entry);
+ spin_unlock_irqrestore(&rocker->neigh_tbl_lock, flags);
}
static struct rocker_flow_tbl_entry *
rocker_flow_tbl_find(struct rocker *rocker, struct rocker_flow_tbl_entry *match)
{
struct rocker_flow_tbl_entry *found;
+ size_t key_len = match->key_len ? match->key_len : sizeof(found->key);
hash_for_each_possible(rocker->flow_tbl, found,
entry, match->key_crc32) {
- if (memcmp(&found->key, &match->key, sizeof(found->key)) == 0)
+ if (memcmp(&found->key, &match->key, key_len) == 0)
return found;
}
@@ -2241,42 +2265,34 @@ static int rocker_flow_tbl_add(struct rocker_port *rocker_port,
{
struct rocker *rocker = rocker_port->rocker;
struct rocker_flow_tbl_entry *found;
+ size_t key_len = match->key_len ? match->key_len : sizeof(found->key);
unsigned long flags;
- bool add_to_hw = false;
- int err = 0;
- match->key_crc32 = crc32(~0, &match->key, sizeof(match->key));
+ match->key_crc32 = crc32(~0, &match->key, key_len);
spin_lock_irqsave(&rocker->flow_tbl_lock, flags);
found = rocker_flow_tbl_find(rocker, match);
if (found) {
- kfree(match);
+ match->cookie = found->cookie;
+ hash_del(&found->entry);
+ kfree(found);
+ found = match;
+ found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD;
} else {
found = match;
found->cookie = rocker->flow_tbl_next_cookie++;
- hash_add(rocker->flow_tbl, &found->entry, found->key_crc32);
- add_to_hw = true;
+ found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD;
}
- found->ref_count++;
+ hash_add(rocker->flow_tbl, &found->entry, found->key_crc32);
spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags);
- if (add_to_hw) {
- err = rocker_cmd_exec(rocker, rocker_port,
- rocker_cmd_flow_tbl_add,
- found, NULL, NULL, nowait);
- if (err) {
- spin_lock_irqsave(&rocker->flow_tbl_lock, flags);
- hash_del(&found->entry);
- spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags);
- kfree(found);
- }
- }
-
- return err;
+ return rocker_cmd_exec(rocker, rocker_port,
+ rocker_cmd_flow_tbl_add,
+ found, NULL, NULL, nowait);
}
static int rocker_flow_tbl_del(struct rocker_port *rocker_port,
@@ -2285,29 +2301,26 @@ static int rocker_flow_tbl_del(struct rocker_port *rocker_port,
{
struct rocker *rocker = rocker_port->rocker;
struct rocker_flow_tbl_entry *found;
+ size_t key_len = match->key_len ? match->key_len : sizeof(found->key);
unsigned long flags;
- bool del_from_hw = false;
int err = 0;
- match->key_crc32 = crc32(~0, &match->key, sizeof(match->key));
+ match->key_crc32 = crc32(~0, &match->key, key_len);
spin_lock_irqsave(&rocker->flow_tbl_lock, flags);
found = rocker_flow_tbl_find(rocker, match);
if (found) {
- found->ref_count--;
- if (found->ref_count == 0) {
- hash_del(&found->entry);
- del_from_hw = true;
- }
+ hash_del(&found->entry);
+ found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL;
}
spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags);
kfree(match);
- if (del_from_hw) {
+ if (found) {
err = rocker_cmd_exec(rocker, rocker_port,
rocker_cmd_flow_tbl_del,
found, NULL, NULL, nowait);
@@ -2467,6 +2480,31 @@ static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port,
return rocker_flow_tbl_do(rocker_port, flags, entry);
}
+static int rocker_flow_tbl_ucast4_routing(struct rocker_port *rocker_port,
+ __be16 eth_type, __be32 dst,
+ __be32 dst_mask, u32 priority,
+ enum rocker_of_dpa_table_id goto_tbl,
+ u32 group_id, int flags)
+{
+ struct rocker_flow_tbl_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING;
+ entry->key.priority = priority;
+ entry->key.ucast_routing.eth_type = eth_type;
+ entry->key.ucast_routing.dst4 = dst;
+ entry->key.ucast_routing.dst4_mask = dst_mask;
+ entry->key.ucast_routing.goto_tbl = goto_tbl;
+ entry->key.ucast_routing.group_id = group_id;
+ entry->key_len = offsetof(struct rocker_flow_tbl_key,
+ ucast_routing.group_id);
+
+ return rocker_flow_tbl_do(rocker_port, flags, entry);
+}
+
static int rocker_flow_tbl_acl(struct rocker_port *rocker_port,
int flags, u32 in_pport,
u32 in_pport_mask,
@@ -2554,7 +2592,6 @@ static int rocker_group_tbl_add(struct rocker_port *rocker_port,
struct rocker *rocker = rocker_port->rocker;
struct rocker_group_tbl_entry *found;
unsigned long flags;
- int err = 0;
spin_lock_irqsave(&rocker->group_tbl_lock, flags);
@@ -2574,12 +2611,9 @@ static int rocker_group_tbl_add(struct rocker_port *rocker_port,
spin_unlock_irqrestore(&rocker->group_tbl_lock, flags);
- if (found->cmd)
- err = rocker_cmd_exec(rocker, rocker_port,
- rocker_cmd_group_tbl_add,
- found, NULL, NULL, nowait);
-
- return err;
+ return rocker_cmd_exec(rocker, rocker_port,
+ rocker_cmd_group_tbl_add,
+ found, NULL, NULL, nowait);
}
static int rocker_group_tbl_del(struct rocker_port *rocker_port,
@@ -2675,6 +2709,244 @@ static int rocker_group_l2_flood(struct rocker_port *rocker_port,
group_id);
}
+static int rocker_group_l3_unicast(struct rocker_port *rocker_port,
+ int flags, u32 index, u8 *src_mac,
+ u8 *dst_mac, __be16 vlan_id,
+ bool ttl_check, u32 pport)
+{
+ struct rocker_group_tbl_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->group_id = ROCKER_GROUP_L3_UNICAST(index);
+ if (src_mac)
+ ether_addr_copy(entry->l3_unicast.eth_src, src_mac);
+ if (dst_mac)
+ ether_addr_copy(entry->l3_unicast.eth_dst, dst_mac);
+ entry->l3_unicast.vlan_id = vlan_id;
+ entry->l3_unicast.ttl_check = ttl_check;
+ entry->l3_unicast.group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, pport);
+
+ return rocker_group_tbl_do(rocker_port, flags, entry);
+}
+
+static struct rocker_neigh_tbl_entry *
+ rocker_neigh_tbl_find(struct rocker *rocker, __be32 ip_addr)
+{
+ struct rocker_neigh_tbl_entry *found;
+
+ hash_for_each_possible(rocker->neigh_tbl, found, entry, ip_addr)
+ if (found->ip_addr == ip_addr)
+ return found;
+
+ return NULL;
+}
+
+static void _rocker_neigh_add(struct rocker *rocker,
+ struct rocker_neigh_tbl_entry *entry)
+{
+ entry->index = rocker->neigh_tbl_next_index++;
+ entry->ref_count++;
+ hash_add(rocker->neigh_tbl, &entry->entry, entry->ip_addr);
+}
+
+static void _rocker_neigh_del(struct rocker *rocker,
+ struct rocker_neigh_tbl_entry *entry)
+{
+ if (--entry->ref_count == 0) {
+ hash_del(&entry->entry);
+ kfree(entry);
+ }
+}
+
+static void _rocker_neigh_update(struct rocker *rocker,
+ struct rocker_neigh_tbl_entry *entry,
+ u8 *eth_dst, bool ttl_check)
+{
+ if (eth_dst) {
+ ether_addr_copy(entry->eth_dst, eth_dst);
+ entry->ttl_check = ttl_check;
+ } else {
+ entry->ref_count++;
+ }
+}
+
+static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port,
+ int flags, __be32 ip_addr, u8 *eth_dst)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_neigh_tbl_entry *entry;
+ struct rocker_neigh_tbl_entry *found;
+ unsigned long lock_flags;
+ __be16 eth_type = htons(ETH_P_IP);
+ enum rocker_of_dpa_table_id goto_tbl =
+ ROCKER_OF_DPA_TABLE_ID_ACL_POLICY;
+ u32 group_id;
+ u32 priority = 0;
+ bool adding = !(flags & ROCKER_OP_FLAG_REMOVE);
+ bool updating;
+ bool removing;
+ int err = 0;
+
+ entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
+ if (!entry)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&rocker->neigh_tbl_lock, lock_flags);
+
+ found = rocker_neigh_tbl_find(rocker, ip_addr);
+
+ updating = found && adding;
+ removing = found && !adding;
+ adding = !found && adding;
+
+ if (adding) {
+ entry->ip_addr = ip_addr;
+ entry->dev = rocker_port->dev;
+ ether_addr_copy(entry->eth_dst, eth_dst);
+ entry->ttl_check = true;
+ _rocker_neigh_add(rocker, entry);
+ } else if (removing) {
+ memcpy(entry, found, sizeof(*entry));
+ _rocker_neigh_del(rocker, found);
+ } else if (updating) {
+ _rocker_neigh_update(rocker, found, eth_dst, true);
+ memcpy(entry, found, sizeof(*entry));
+ } else {
+ err = -ENOENT;
+ }
+
+ spin_unlock_irqrestore(&rocker->neigh_tbl_lock, lock_flags);
+
+ if (err)
+ goto err_out;
+
+ /* For each active neighbor, we have an L3 unicast group and
+ * a /32 route to the neighbor, which uses the L3 unicast
+ * group. The L3 unicast group can also be referred to by
+ * other routes' nexthops.
+ */
+
+ err = rocker_group_l3_unicast(rocker_port, flags,
+ entry->index,
+ rocker_port->dev->dev_addr,
+ entry->eth_dst,
+ rocker_port->internal_vlan_id,
+ entry->ttl_check,
+ rocker_port->pport);
+ if (err) {
+ netdev_err(rocker_port->dev,
+ "Error (%d) L3 unicast group index %d\n",
+ err, entry->index);
+ goto err_out;
+ }
+
+ if (adding || removing) {
+ group_id = ROCKER_GROUP_L3_UNICAST(entry->index);
+ err = rocker_flow_tbl_ucast4_routing(rocker_port,
+ eth_type, ip_addr,
+ inet_make_mask(32),
+ priority, goto_tbl,
+ group_id, flags);
+
+ if (err)
+ netdev_err(rocker_port->dev,
+ "Error (%d) /32 unicast route %pI4 group 0x%08x\n",
+ err, &entry->ip_addr, group_id);
+ }
+
+err_out:
+ if (!adding)
+ kfree(entry);
+
+ return err;
+}
+
+static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port,
+ __be32 ip_addr)
+{
+ struct net_device *dev = rocker_port->dev;
+ struct neighbour *n = __ipv4_neigh_lookup(dev, ip_addr);
+ int err = 0;
+
+ if (!n)
+ n = neigh_create(&arp_tbl, &ip_addr, dev);
+ if (!n)
+ return -ENOMEM;
+
+ /* If the neigh is already resolved, then go ahead and
+ * install the entry, otherwise start the ARP process to
+ * resolve the neigh.
+ */
+
+ if (n->nud_state & NUD_VALID)
+ err = rocker_port_ipv4_neigh(rocker_port, 0, ip_addr, n->ha);
+ else
+ neigh_event_send(n, NULL);
+
+ return err;
+}
+
+static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags,
+ __be32 ip_addr, u32 *index)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_neigh_tbl_entry *entry;
+ struct rocker_neigh_tbl_entry *found;
+ unsigned long lock_flags;
+ bool adding = !(flags & ROCKER_OP_FLAG_REMOVE);
+ bool updating;
+ bool removing;
+ bool resolved = true;
+ int err = 0;
+
+ entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
+ if (!entry)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&rocker->neigh_tbl_lock, lock_flags);
+
+ found = rocker_neigh_tbl_find(rocker, ip_addr);
+ if (found)
+ *index = found->index;
+
+ updating = found && adding;
+ removing = found && !adding;
+ adding = !found && adding;
+
+ if (adding) {
+ entry->ip_addr = ip_addr;
+ entry->dev = rocker_port->dev;
+ _rocker_neigh_add(rocker, entry);
+ *index = entry->index;
+ resolved = false;
+ } else if (removing) {
+ _rocker_neigh_del(rocker, found);
+ } else if (updating) {
+ _rocker_neigh_update(rocker, found, NULL, false);
+ resolved = !is_zero_ether_addr(found->eth_dst);
+ } else {
+ err = -ENOENT;
+ }
+
+ spin_unlock_irqrestore(&rocker->neigh_tbl_lock, lock_flags);
+
+ if (!adding)
+ kfree(entry);
+
+ if (err)
+ return err;
+
+ /* Resolved means neigh ip_addr is resolved to neigh mac. */
+
+ if (!resolved)
+ err = rocker_port_ipv4_resolve(rocker_port, ip_addr);
+
+ return err;
+}
+
static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port,
int flags, __be16 vlan_id)
{
@@ -3429,6 +3701,51 @@ not_found:
spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, lock_flags);
}
+static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, __be32 dst,
+ int dst_len, struct fib_info *fi, u32 tb_id,
+ int flags)
+{
+ struct fib_nh *nh;
+ __be16 eth_type = htons(ETH_P_IP);
+ __be32 dst_mask = inet_make_mask(dst_len);
+ __be16 internal_vlan_id = rocker_port->internal_vlan_id;
+ u32 priority = fi->fib_priority;
+ enum rocker_of_dpa_table_id goto_tbl =
+ ROCKER_OF_DPA_TABLE_ID_ACL_POLICY;
+ u32 group_id;
+ bool nh_on_port;
+ bool has_gw;
+ u32 index;
+ int err;
+
+ /* XXX support ECMP */
+
+ nh = fi->fib_nh;
+ nh_on_port = (fi->fib_dev == rocker_port->dev);
+ has_gw = !!nh->nh_gw;
+
+ if (has_gw && nh_on_port) {
+ err = rocker_port_ipv4_nh(rocker_port, flags,
+ nh->nh_gw, &index);
+ if (err)
+ return err;
+
+ group_id = ROCKER_GROUP_L3_UNICAST(index);
+ } else {
+ /* Send to CPU for processing */
+ group_id = ROCKER_GROUP_L2_INTERFACE(internal_vlan_id, 0);
+ }
+
+ err = rocker_flow_tbl_ucast4_routing(rocker_port, eth_type, dst,
+ dst_mask, priority, goto_tbl,
+ group_id, flags);
+ if (err)
+ netdev_err(rocker_port->dev, "Error (%d) IPv4 route %pI4\n",
+ err, &dst);
+
+ return err;
+}
+
/*****************
* Net device ops
*****************/
@@ -3830,6 +4147,30 @@ static int rocker_port_switch_port_stp_update(struct net_device *dev, u8 state)
return rocker_port_stp_update(rocker_port, state);
}
+static int rocker_port_switch_fib_ipv4_add(struct net_device *dev,
+ __be32 dst, int dst_len,
+ struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ int flags = 0;
+
+ return rocker_port_fib_ipv4(rocker_port, dst, dst_len,
+ fi, tb_id, flags);
+}
+
+static int rocker_port_switch_fib_ipv4_del(struct net_device *dev,
+ __be32 dst, int dst_len,
+ struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ int flags = ROCKER_OP_FLAG_REMOVE;
+
+ return rocker_port_fib_ipv4(rocker_port, dst, dst_len,
+ fi, tb_id, flags);
+}
+
static const struct net_device_ops rocker_port_netdev_ops = {
.ndo_open = rocker_port_open,
.ndo_stop = rocker_port_stop,
@@ -3844,6 +4185,8 @@ static const struct net_device_ops rocker_port_netdev_ops = {
.ndo_bridge_getlink = rocker_port_bridge_getlink,
.ndo_switch_parent_id_get = rocker_port_switch_parent_id_get,
.ndo_switch_port_stp_update = rocker_port_switch_port_stp_update,
+ .ndo_switch_fib_ipv4_add = rocker_port_switch_fib_ipv4_add,
+ .ndo_switch_fib_ipv4_del = rocker_port_switch_fib_ipv4_del,
};
/********************
@@ -4204,8 +4547,9 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
NAPI_POLL_WEIGHT);
rocker_carrier_init(rocker_port);
- dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
- NETIF_F_HW_SWITCH_OFFLOAD;
+ dev->features |= NETIF_F_NETNS_LOCAL |
+ NETIF_F_HW_VLAN_CTAG_FILTER |
+ NETIF_F_HW_SWITCH_OFFLOAD;
err = register_netdev(dev);
if (err) {
@@ -4546,6 +4890,48 @@ static struct notifier_block rocker_netdevice_nb __read_mostly = {
.notifier_call = rocker_netdevice_event,
};
+/************************************
+ * Net event notifier event handler
+ ************************************/
+
+static int rocker_neigh_update(struct net_device *dev, struct neighbour *n)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ int flags = (n->nud_state & NUD_VALID) ? 0 : ROCKER_OP_FLAG_REMOVE;
+ __be32 ip_addr = *(__be32 *)n->primary_key;
+
+ return rocker_port_ipv4_neigh(rocker_port, flags, ip_addr, n->ha);
+}
+
+static int rocker_netevent_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev;
+ struct neighbour *n = ptr;
+ int err;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ if (n->tbl != &arp_tbl)
+ return NOTIFY_DONE;
+ dev = n->dev;
+ if (!rocker_port_dev_check(dev))
+ return NOTIFY_DONE;
+ err = rocker_neigh_update(dev, n);
+ if (err)
+ netdev_warn(dev,
+ "failed to handle neigh update (err %d)\n",
+ err);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block rocker_netevent_nb __read_mostly = {
+ .notifier_call = rocker_netevent_event,
+};
+
/***********************
* Module init and exit
***********************/
@@ -4555,18 +4941,21 @@ static int __init rocker_module_init(void)
int err;
register_netdevice_notifier(&rocker_netdevice_nb);
+ register_netevent_notifier(&rocker_netevent_nb);
err = pci_register_driver(&rocker_pci_driver);
if (err)
goto err_pci_register_driver;
return 0;
err_pci_register_driver:
+ unregister_netdevice_notifier(&rocker_netevent_nb);
unregister_netdevice_notifier(&rocker_netdevice_nb);
return err;
}
static void __exit rocker_module_exit(void)
{
+ unregister_netevent_notifier(&rocker_netevent_nb);
unregister_netdevice_notifier(&rocker_netdevice_nb);
pci_unregister_driver(&rocker_pci_driver);
}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 625c8d71511b..45413784a3b1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -768,6 +768,8 @@ struct netdev_phys_item_id {
typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
struct sk_buff *skb);
+struct fib_info;
+
/*
* This structure defines the management hooks for network devices.
* The following hooks can be defined; unless noted otherwise, they are
@@ -1031,6 +1033,14 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state);
* Called to notify switch device port of bridge port STP
* state change.
+ * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst,
+ * int dst_len, struct fib_info *fi,
+ * u8 tos, u8 type, u32 tb_id);
+ * Called to add/modify IPv4 route to switch device.
+ * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst,
+ * int dst_len, struct fib_info *fi,
+ * u8 tos, u8 type, u32 tb_id);
+ * Called to delete IPv4 route from switch device.
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1192,6 +1202,18 @@ struct net_device_ops {
struct netdev_phys_item_id *psid);
int (*ndo_switch_port_stp_update)(struct net_device *dev,
u8 state);
+ int (*ndo_switch_fib_ipv4_add)(struct net_device *dev,
+ __be32 dst,
+ int dst_len,
+ struct fib_info *fi,
+ u8 tos, u8 type,
+ u32 tb_id);
+ int (*ndo_switch_fib_ipv4_del)(struct net_device *dev,
+ __be32 dst,
+ int dst_len,
+ struct fib_info *fi,
+ u8 tos, u8 type,
+ u32 tb_id);
#endif
};
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 825cb2800908..1657604c5dd3 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -196,6 +196,7 @@ int fib_table_delete(struct fib_table *, struct fib_config *);
int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
struct netlink_callback *cb);
int fib_table_flush(struct fib_table *table);
+void fib_table_flush_external(struct fib_table *table);
void fib_free_table(struct fib_table *tb);
@@ -308,6 +309,7 @@ static inline int fib_num_tclassid_users(struct net *net)
return 0;
}
#endif
+void fib_flush_external(struct net *net);
/* Exported by fib_semantics.c */
int ip_fib_check_default(__be32 gw, struct net_device *dev);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index db1db158a00e..1085e12f940f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -47,6 +47,7 @@ struct netns_ipv4 {
int fib_num_tclassid_users;
#endif
struct hlist_head *fib_table_hash;
+ bool fib_offload_disabled;
struct sock *fibnl;
struct sock * __percpu *icmp_sk;
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index cfcdac2e5d25..dc0a5cc7c2c5 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -51,6 +51,12 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
struct nlmsghdr *nlh, u16 flags);
int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
struct nlmsghdr *nlh, u16 flags);
+int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id);
+int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id);
+void netdev_switch_fib_ipv4_abort(struct fib_info *fi);
+
#else
static inline int netdev_switch_parent_id_get(struct net_device *dev,
@@ -109,6 +115,24 @@ static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *
return 0;
}
+static inline int netdev_switch_fib_ipv4_add(u32 dst, int dst_len,
+ struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id)
+{
+ return 0;
+}
+
+static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len,
+ struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id)
+{
+ return 0;
+}
+
+void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
+{
+}
+
#endif
#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 06f75a407f74..c3722b024e73 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -334,6 +334,7 @@ struct rtnexthop {
#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
+#define RTNH_F_EXTERNAL 8 /* Route installed externally */
/* Macros to handle hexthops */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 220c4b4af4cf..e067770235bf 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -144,6 +144,19 @@ static void fib_flush(struct net *net)
rt_cache_flush(net);
}
+void fib_flush_external(struct net *net)
+{
+ struct fib_table *tb;
+ struct hlist_head *head;
+ unsigned int h;
+
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+ head = &net->ipv4.fib_table_hash[h];
+ hlist_for_each_entry(tb, head, tb_hlist)
+ fib_table_flush_external(tb);
+ }
+}
+
/*
* Find address type as if only "dev" was present in the system. If
* on_dev is NULL then all interfaces are taken into consideration.
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index d3db718be51d..190d0d00d744 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -209,6 +209,8 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
+ fib_flush_external(rule->fr_net);
+
err = 0;
errout:
return err;
@@ -224,6 +226,7 @@ static void fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
+ fib_flush_external(rule->fr_net);
}
static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fae34ad4bb1a..6544f1a0cfa1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -79,6 +79,7 @@
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/switchdev.h>
#include "fib_lookup.h"
#define MAX_STAT_DEPTH 32
@@ -1135,7 +1136,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_state = state & ~FA_S_ACCESSED;
new_fa->fa_slen = fa->fa_slen;
+ err = netdev_switch_fib_ipv4_add(key, plen, fi,
+ new_fa->fa_tos,
+ cfg->fc_type,
+ tb->tb_id);
+ if (err) {
+ netdev_switch_fib_ipv4_abort(fi);
+ kmem_cache_free(fn_alias_kmem, new_fa);
+ goto out;
+ }
+
hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
+
alias_free_mem_rcu(fa);
fib_release_info(fi_drop);
@@ -1171,10 +1183,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_state = 0;
new_fa->fa_slen = slen;
+ /* (Optionally) offload fib entry to switch hardware. */
+ err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
+ cfg->fc_type, tb->tb_id);
+ if (err) {
+ netdev_switch_fib_ipv4_abort(fi);
+ goto out_free_new_fa;
+ }
+
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
if (err)
- goto out_free_new_fa;
+ goto out_sw_fib_del;
if (!plen)
tb->tb_num_default++;
@@ -1185,6 +1205,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
succeeded:
return 0;
+out_sw_fib_del:
+ netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
@@ -1456,6 +1478,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
if (!fa_to_delete)
return -ESRCH;
+ netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
+ cfg->fc_type, tb->tb_id);
+
rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -1536,6 +1561,67 @@ found:
return n;
}
+/* Caller must hold RTNL */
+void fib_table_flush_external(struct fib_table *tb)
+{
+ struct trie *t = (struct trie *)tb->tb_data;
+ struct fib_alias *fa;
+ struct tnode *n, *pn;
+ unsigned long cindex;
+ unsigned char slen;
+ int found = 0;
+
+ n = rcu_dereference(t->trie);
+ if (!n)
+ return;
+
+ pn = NULL;
+ cindex = 0;
+
+ while (IS_TNODE(n)) {
+ /* record pn and cindex for leaf walking */
+ pn = n;
+ cindex = 1ul << n->bits;
+backtrace:
+ /* walk trie in reverse order */
+ do {
+ while (!(cindex--)) {
+ t_key pkey = pn->key;
+
+ n = pn;
+ pn = node_parent(n);
+
+ /* resize completed node */
+ resize(t, n);
+
+ /* if we got the root we are done */
+ if (!pn)
+ return;
+
+ cindex = get_index(pkey, pn);
+ }
+
+ /* grab the next available node */
+ n = tnode_get_child(pn, cindex);
+ } while (!n);
+ }
+
+ hlist_for_each_entry(fa, &n->leaf, fa_list) {
+ struct fib_info *fi = fa->fa_info;
+
+ if (fi && (fi->fib_flags & RTNH_F_EXTERNAL)) {
+ netdev_switch_fib_ipv4_del(n->key,
+ KEYLENGTH - fa->fa_slen,
+ fi, fa->fa_tos,
+ fa->fa_type, tb->tb_id);
+ }
+ }
+
+ /* if trie is leaf only loop is completed */
+ if (pn)
+ goto backtrace;
+}
+
/* Caller must hold RTNL. */
int fib_table_flush(struct fib_table *tb)
{
@@ -1589,6 +1675,10 @@ backtrace:
struct fib_info *fi = fa->fa_info;
if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
+ netdev_switch_fib_ipv4_del(n->key,
+ KEYLENGTH - fa->fa_slen,
+ fi, fa->fa_tos,
+ fa->fa_type, tb->tb_id);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8c1e558db118..f4fd575aa2a3 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -14,6 +14,7 @@
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
+#include <net/ip_fib.h>
#include <net/switchdev.h>
/**
@@ -225,3 +226,163 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
return ret;
}
EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink);
+
+static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct net_device *lower_dev;
+ struct net_device *port_dev;
+ struct list_head *iter;
+
+ /* Recusively search down until we find a sw port dev.
+ * (A sw port dev supports ndo_switch_parent_id_get).
+ */
+
+ if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD &&
+ ops->ndo_switch_parent_id_get)
+ return dev;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ port_dev = netdev_switch_get_lowest_dev(lower_dev);
+ if (port_dev)
+ return port_dev;
+ }
+
+ return NULL;
+}
+
+static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi)
+{
+ struct netdev_phys_item_id psid;
+ struct netdev_phys_item_id prev_psid;
+ struct net_device *dev = NULL;
+ int nhsel;
+
+ /* For this route, all nexthop devs must be on the same switch. */
+
+ for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+ const struct fib_nh *nh = &fi->fib_nh[nhsel];
+
+ if (!nh->nh_dev)
+ return NULL;
+
+ dev = netdev_switch_get_lowest_dev(nh->nh_dev);
+ if (!dev)
+ return NULL;
+
+ if (netdev_switch_parent_id_get(dev, &psid))
+ return NULL;
+
+ if (nhsel > 0) {
+ if (prev_psid.id_len != psid.id_len)
+ return NULL;
+ if (memcmp(prev_psid.id, psid.id, psid.id_len))
+ return NULL;
+ }
+
+ prev_psid = psid;
+ }
+
+ return dev;
+}
+
+/**
+ * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch
+ *
+ * @dst: route's IPv4 destination address
+ * @dst_len: destination address length (prefix length)
+ * @fi: route FIB info structure
+ * @tos: route TOS
+ * @type: route type
+ * @tb_id: route table ID
+ *
+ * Add IPv4 route entry to switch device.
+ */
+int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id)
+{
+ struct net_device *dev;
+ const struct net_device_ops *ops;
+ int err = 0;
+
+ /* Don't offload route if using custom ip rules or if
+ * IPv4 FIB offloading has been disabled completely.
+ */
+
+ if (fi->fib_net->ipv4.fib_has_custom_rules |
+ fi->fib_net->ipv4.fib_offload_disabled)
+ return 0;
+
+ dev = netdev_switch_get_dev_by_nhs(fi);
+ if (!dev)
+ return 0;
+ ops = dev->netdev_ops;
+
+ if (ops->ndo_switch_fib_ipv4_add) {
+ err = ops->ndo_switch_fib_ipv4_add(dev, htonl(dst), dst_len,
+ fi, tos, type, tb_id);
+ if (!err)
+ fi->fib_flags |= RTNH_F_EXTERNAL;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(netdev_switch_fib_ipv4_add);
+
+/**
+ * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch
+ *
+ * @dst: route's IPv4 destination address
+ * @dst_len: destination address length (prefix length)
+ * @fi: route FIB info structure
+ * @tos: route TOS
+ * @type: route type
+ * @tb_id: route table ID
+ *
+ * Delete IPv4 route entry from switch device.
+ */
+int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id)
+{
+ struct net_device *dev;
+ const struct net_device_ops *ops;
+ int err = 0;
+
+ if (!(fi->fib_flags & RTNH_F_EXTERNAL))
+ return 0;
+
+ dev = netdev_switch_get_dev_by_nhs(fi);
+ if (!dev)
+ return 0;
+ ops = dev->netdev_ops;
+
+ if (ops->ndo_switch_fib_ipv4_del) {
+ err = ops->ndo_switch_fib_ipv4_del(dev, htonl(dst), dst_len,
+ fi, tos, type, tb_id);
+ if (!err)
+ fi->fib_flags &= ~RTNH_F_EXTERNAL;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(netdev_switch_fib_ipv4_del);
+
+/**
+ * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation
+ *
+ * @fi: route FIB info structure
+ */
+void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
+{
+ /* There was a problem installing this route to the offload
+ * device. For now, until we come up with more refined
+ * policy handling, abruptly end IPv4 fib offloading for
+ * for entire net by flushing offload device(s) of all
+ * IPv4 routes, and mark IPv4 fib offloading broken from
+ * this point forward.
+ */
+
+ fib_flush_external(fi->fib_net);
+ fi->fib_net->ipv4.fib_offload_disabled = true;
+}
+EXPORT_SYMBOL(netdev_switch_fib_ipv4_abort);