diff options
author | Sathya Perla <sathya.perla@broadcom.com> | 2017-10-26 18:51:29 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-10-27 18:02:45 +0300 |
commit | 8c95f773b4a367f7b9bcca7ab5f85675cfc812e9 (patch) | |
tree | 9bbb7846fc8e1e50c31e2a2746f902376c88ff50 | |
parent | f8503969d27b2b26ff0adbce4b7d7cf4ba5e43c2 (diff) | |
download | linux-8c95f773b4a367f7b9bcca7ab5f85675cfc812e9.tar.xz |
bnxt_en: add support for Flower based vxlan encap/decap offload
This patch adds IPv4 vxlan encap/decap action support to TC-flower
offload.
For vxlan encap, the driver maintains a tunnel encap hash-table.
When a new flow with a tunnel encap action arrives, this table
is looked up; if an encap entry exists, it uses the already
programmed encap_record_handle as the tunnel_handle in the
hwrm_cfa_flow_alloc cmd. Else, a new encap node is added and the
L2 header fields are queried via a route lookup.
hwrm_cfa_encap_record_alloc cmd is used to create a new encap
record and the encap_record_handle is used as the tunnel_handle
while adding the flow.
For vxlan decap, the driver maintains a tunnel decap hash-table.
When a new flow with a tunnel decap action arrives, this table
is looked up; if a decap entry exists, it uses the already
programmed decap_filter_handle as the tunnel_handle in the
hwrm_cfa_flow_alloc cmd. Else, a new decap node is added and
a decap_filter_handle is alloc'd via the hwrm_cfa_decap_filter_alloc
cmd. This handle is used as the tunnel_handle while adding the flow.
The code to issue the HWRM FW cmds is introduced in a follow-up patch.
Signed-off-by: Sathya Perla <sathya.perla@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/broadcom/bnxt/bnxt.h | 9 | ||||
-rw-r--r-- | drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 566 | ||||
-rw-r--r-- | drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h | 66 |
4 files changed, 631 insertions, 12 deletions
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 2188f1606209..d88d864db7d4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -965,6 +965,15 @@ struct bnxt_tc_info { /* hash table to store L2 keys of TC flows */ struct rhashtable l2_table; struct rhashtable_params l2_ht_params; + /* hash table to store L2 keys for TC tunnel decap */ + struct rhashtable decap_l2_table; + struct rhashtable_params decap_l2_ht_params; + /* hash table to store tunnel decap entries */ + struct rhashtable decap_table; + struct rhashtable_params decap_ht_params; + /* hash table to store tunnel encap entries */ + struct rhashtable encap_table; + struct rhashtable_params encap_ht_params; /* lock to atomically add/del an l2 node when a flow is * added or deleted. diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index f3f6aa868d6c..402fa32f7a88 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -29,7 +29,7 @@ int bnxt_dl_register(struct bnxt *bp) if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV)) return 0; - if (bp->hwrm_spec_code < 0x10800) { + if (bp->hwrm_spec_code < 0x10803) { netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n"); return -ENOTSUPP; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index a9cb653b4d29..f14edc9c1412 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -16,6 +16,7 @@ #include <net/tc_act/tc_skbedit.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> +#include <net/tc_act/tc_tunnel_key.h> #include "bnxt_hsi.h" #include "bnxt.h" @@ -89,6 +90,23 @@ static void bnxt_tc_parse_vlan(struct bnxt *bp, } } +static int bnxt_tc_parse_tunnel_set(struct bnxt *bp, + struct bnxt_tc_actions *actions, + const struct tc_action *tc_act) +{ + struct ip_tunnel_info *tun_info = tcf_tunnel_info(tc_act); + struct ip_tunnel_key *tun_key = &tun_info->key; + + if (ip_tunnel_info_af(tun_info) != AF_INET) { + netdev_info(bp->dev, "only IPv4 tunnel-encap is supported"); + return -EOPNOTSUPP; + } + + actions->tun_encap_key = *tun_key; + actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP; + return 0; +} + static int bnxt_tc_parse_actions(struct bnxt *bp, struct bnxt_tc_actions *actions, struct tcf_exts *tc_exts) @@ -123,9 +141,35 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, bnxt_tc_parse_vlan(bp, actions, tc_act); continue; } + + /* Tunnel encap */ + if (is_tcf_tunnel_set(tc_act)) { + rc = bnxt_tc_parse_tunnel_set(bp, actions, tc_act); + if (rc) + return rc; + continue; + } + + /* Tunnel decap */ + if (is_tcf_tunnel_release(tc_act)) { + actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP; + continue; + } } - return 0; + if (rc) + return rc; + + /* Tunnel encap/decap action must be accompanied by a redirect action */ + if ((actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP || + actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) && + !(actions->flags & BNXT_TC_ACTION_FLAG_FWD)) { + netdev_info(bp->dev, + "error: no redir action along with encap/decap"); + return -EINVAL; + } + + return rc; } #define GET_KEY(flow_cmd, key_type) \ @@ -252,6 +296,54 @@ static int bnxt_tc_parse_flow(struct bnxt *bp, flow->l4_mask.icmp.code = mask->code; } + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_control *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_CONTROL); + + addr_type = key->addr_type; + } + + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_dissector_key_ipv4_addrs *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); + struct flow_dissector_key_ipv4_addrs *mask = + GET_MASK(tc_flow_cmd, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); + + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS; + flow->tun_key.u.ipv4.dst = key->dst; + flow->tun_mask.u.ipv4.dst = mask->dst; + flow->tun_key.u.ipv4.src = key->src; + flow->tun_mask.u.ipv4.src = mask->src; + } else if (dissector_uses_key(dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + return -EOPNOTSUPP; + } + + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID); + struct flow_dissector_key_keyid *mask = + GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID); + + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID; + flow->tun_key.tun_id = key32_to_tunnel_id(key->keyid); + flow->tun_mask.tun_id = key32_to_tunnel_id(mask->keyid); + } + + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_dissector_key_ports *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS); + struct flow_dissector_key_ports *mask = + GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS); + + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS; + flow->tun_key.tp_dst = key->dst; + flow->tun_mask.tp_dst = mask->dst; + flow->tun_key.tp_src = key->src; + flow->tun_mask.tp_src = mask->src; + } + return bnxt_tc_parse_actions(bp, &flow->actions, tc_flow_cmd->exts); } @@ -293,7 +385,8 @@ static bool is_wildcard(void *mask, int len) } static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, - __le16 ref_flow_handle, __le16 *flow_handle) + __le16 ref_flow_handle, + __le32 tunnel_handle, __le16 *flow_handle) { struct hwrm_cfa_flow_alloc_output *resp = bp->hwrm_cmd_resp_addr; struct bnxt_tc_actions *actions = &flow->actions; @@ -307,6 +400,14 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, req.src_fid = cpu_to_le16(flow->src_fid); req.ref_flow_handle = ref_flow_handle; + + if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP || + actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) { + req.tunnel_handle = tunnel_handle; + flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL; + action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL; + } + req.ethertype = flow->l2_key.ether_type; req.ip_proto = flow->l4_key.ip_proto; @@ -478,6 +579,35 @@ static int bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, return rc; } +static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_l2_key *l2_info, + __le32 ref_decap_handle, + __le32 *decap_filter_handle) +{ + return 0; +} + +static int hwrm_cfa_decap_filter_free(struct bnxt *bp, + __le32 decap_filter_handle) +{ + return 0; +} + +static int hwrm_cfa_encap_record_alloc(struct bnxt *bp, + struct ip_tunnel_key *encap_key, + struct bnxt_tc_l2_key *l2_info, + __le32 *encap_record_handle) +{ + return 0; +} + +static int hwrm_cfa_encap_record_free(struct bnxt *bp, + __le32 encap_record_handle) +{ + return 0; +} + static int bnxt_tc_put_l2_node(struct bnxt *bp, struct bnxt_tc_flow_node *flow_node) { @@ -519,7 +649,7 @@ bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table, rc = rhashtable_insert_fast(l2_table, &l2_node->node, ht_params); if (rc) { - kfree(l2_node); + kfree_rcu(l2_node, rcu); netdev_err(bp->dev, "Error: %s: rhashtable_insert_fast: %d", __func__, rc); @@ -588,6 +718,376 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow) return true; } +/* Returns the final refcount of the node on success + * or a -ve error code on failure + */ +static int bnxt_tc_put_tunnel_node(struct bnxt *bp, + struct rhashtable *tunnel_table, + struct rhashtable_params *ht_params, + struct bnxt_tc_tunnel_node *tunnel_node) +{ + int rc; + + if (--tunnel_node->refcount == 0) { + rc = rhashtable_remove_fast(tunnel_table, &tunnel_node->node, + *ht_params); + if (rc) { + netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc); + rc = -1; + } + kfree_rcu(tunnel_node, rcu); + return rc; + } else { + return tunnel_node->refcount; + } +} + +/* Get (or add) either encap or decap tunnel node from/to the supplied + * hash table. + */ +static struct bnxt_tc_tunnel_node * +bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table, + struct rhashtable_params *ht_params, + struct ip_tunnel_key *tun_key) +{ + struct bnxt_tc_tunnel_node *tunnel_node; + int rc; + + tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params); + if (!tunnel_node) { + tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL); + if (!tunnel_node) { + rc = -ENOMEM; + goto err; + } + + tunnel_node->key = *tun_key; + tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE; + rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node, + *ht_params); + if (rc) { + kfree_rcu(tunnel_node, rcu); + goto err; + } + } + tunnel_node->refcount++; + return tunnel_node; +err: + netdev_info(bp->dev, "error rc=%d", rc); + return NULL; +} + +static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_l2_key *l2_key, + struct bnxt_tc_flow_node *flow_node, + __le32 *ref_decap_handle) +{ + struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_flow_node *ref_flow_node; + struct bnxt_tc_l2_node *decap_l2_node; + + decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table, + tc_info->decap_l2_ht_params, + l2_key); + if (!decap_l2_node) + return -1; + + /* If any other flow is using this decap_l2_node, use it's decap_handle + * as the ref_decap_handle + */ + if (decap_l2_node->refcount > 0) { + ref_flow_node = + list_first_entry(&decap_l2_node->common_l2_flows, + struct bnxt_tc_flow_node, + decap_l2_list_node); + *ref_decap_handle = ref_flow_node->decap_node->tunnel_handle; + } else { + *ref_decap_handle = INVALID_TUNNEL_HANDLE; + } + + /* Insert the l2_node into the flow_node so that subsequent flows + * with a matching decap l2 key can use the decap_filter_handle of + * this flow as their ref_decap_handle + */ + flow_node->decap_l2_node = decap_l2_node; + list_add(&flow_node->decap_l2_list_node, + &decap_l2_node->common_l2_flows); + decap_l2_node->refcount++; + return 0; +} + +static void bnxt_tc_put_decap_l2_node(struct bnxt *bp, + struct bnxt_tc_flow_node *flow_node) +{ + struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node; + struct bnxt_tc_info *tc_info = &bp->tc_info; + int rc; + + /* remove flow_node from the decap L2 sharing flow list */ + list_del(&flow_node->decap_l2_list_node); + if (--decap_l2_node->refcount == 0) { + rc = rhashtable_remove_fast(&tc_info->decap_l2_table, + &decap_l2_node->node, + tc_info->decap_l2_ht_params); + if (rc) + netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc); + kfree_rcu(decap_l2_node, rcu); + } +} + +static void bnxt_tc_put_decap_handle(struct bnxt *bp, + struct bnxt_tc_flow_node *flow_node) +{ + __le32 decap_handle = flow_node->decap_node->tunnel_handle; + struct bnxt_tc_info *tc_info = &bp->tc_info; + int rc; + + if (flow_node->decap_l2_node) + bnxt_tc_put_decap_l2_node(bp, flow_node); + + rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table, + &tc_info->decap_ht_params, + flow_node->decap_node); + if (!rc && decap_handle != INVALID_TUNNEL_HANDLE) + hwrm_cfa_decap_filter_free(bp, decap_handle); +} + +static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, + struct ip_tunnel_key *tun_key, + struct bnxt_tc_l2_key *l2_info, + struct net_device *real_dst_dev) +{ + struct flowi4 flow = { {0} }; + struct net_device *dst_dev; + struct neighbour *nbr; + struct rtable *rt; + int rc; + + flow.flowi4_proto = IPPROTO_UDP; + flow.fl4_dport = tun_key->tp_dst; + flow.daddr = tun_key->u.ipv4.dst; + + rt = ip_route_output_key(dev_net(real_dst_dev), &flow); + if (IS_ERR(rt)) { + netdev_info(bp->dev, "no route to %pI4b", &flow.daddr); + return -EOPNOTSUPP; + } + + /* The route must either point to the real_dst_dev or a dst_dev that + * uses the real_dst_dev. + */ + dst_dev = rt->dst.dev; + if (is_vlan_dev(dst_dev)) { + struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev); + + if (vlan->real_dev != real_dst_dev) { + netdev_info(bp->dev, + "dst_dev(%s) doesn't use PF-if(%s)", + netdev_name(dst_dev), + netdev_name(real_dst_dev)); + rc = -EOPNOTSUPP; + goto put_rt; + } + l2_info->inner_vlan_tci = htons(vlan->vlan_id); + l2_info->inner_vlan_tpid = vlan->vlan_proto; + l2_info->num_vlans = 1; + } else if (dst_dev != real_dst_dev) { + netdev_info(bp->dev, + "dst_dev(%s) for %pI4b is not PF-if(%s)", + netdev_name(dst_dev), &flow.daddr, + netdev_name(real_dst_dev)); + rc = -EOPNOTSUPP; + goto put_rt; + } + + nbr = dst_neigh_lookup(&rt->dst, &flow.daddr); + if (!nbr) { + netdev_info(bp->dev, "can't lookup neighbor for %pI4b", + &flow.daddr); + rc = -EOPNOTSUPP; + goto put_rt; + } + + tun_key->u.ipv4.src = flow.saddr; + tun_key->ttl = ip4_dst_hoplimit(&rt->dst); + neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev); + ether_addr_copy(l2_info->smac, dst_dev->dev_addr); + neigh_release(nbr); + ip_rt_put(rt); + + return 0; +put_rt: + ip_rt_put(rt); + return rc; +} + +static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node, + __le32 *decap_filter_handle) +{ + struct ip_tunnel_key *decap_key = &flow->tun_key; + struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_l2_key l2_info = { {0} }; + struct bnxt_tc_tunnel_node *decap_node; + struct ip_tunnel_key tun_key = { 0 }; + struct bnxt_tc_l2_key *decap_l2_info; + __le32 ref_decap_handle; + int rc; + + /* Check if there's another flow using the same tunnel decap. + * If not, add this tunnel to the table and resolve the other + * tunnel header fileds + */ + decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table, + &tc_info->decap_ht_params, + decap_key); + if (!decap_node) + return -ENOMEM; + + flow_node->decap_node = decap_node; + + if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE) + goto done; + + /* Resolve the L2 fields for tunnel decap + * Resolve the route for remote vtep (saddr) of the decap key + * Find it's next-hop mac addrs + */ + tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src; + tun_key.tp_dst = flow->tun_key.tp_dst; + rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info, bp->dev); + if (rc) + goto put_decap; + + decap_key->ttl = tun_key.ttl; + decap_l2_info = &decap_node->l2_info; + ether_addr_copy(decap_l2_info->dmac, l2_info.smac); + ether_addr_copy(decap_l2_info->smac, l2_info.dmac); + if (l2_info.num_vlans) { + decap_l2_info->num_vlans = l2_info.num_vlans; + decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid; + decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci; + } + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS; + + /* For getting a decap_filter_handle we first need to check if + * there are any other decap flows that share the same tunnel L2 + * key and if so, pass that flow's decap_filter_handle as the + * ref_decap_handle for this flow. + */ + rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node, + &ref_decap_handle); + if (rc) + goto put_decap; + + /* Issue the hwrm cmd to allocate a decap filter handle */ + rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info, + ref_decap_handle, + &decap_node->tunnel_handle); + if (rc) + goto put_decap_l2; + +done: + *decap_filter_handle = decap_node->tunnel_handle; + return 0; + +put_decap_l2: + bnxt_tc_put_decap_l2_node(bp, flow_node); +put_decap: + bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table, + &tc_info->decap_ht_params, + flow_node->decap_node); + return rc; +} + +static void bnxt_tc_put_encap_handle(struct bnxt *bp, + struct bnxt_tc_tunnel_node *encap_node) +{ + __le32 encap_handle = encap_node->tunnel_handle; + struct bnxt_tc_info *tc_info = &bp->tc_info; + int rc; + + rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table, + &tc_info->encap_ht_params, encap_node); + if (!rc && encap_handle != INVALID_TUNNEL_HANDLE) + hwrm_cfa_encap_record_free(bp, encap_handle); +} + +/* Lookup the tunnel encap table and check if there's an encap_handle + * alloc'd already. + * If not, query L2 info via a route lookup and issue an encap_record_alloc + * cmd to FW. + */ +static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node, + __le32 *encap_handle) +{ + struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key; + struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_tunnel_node *encap_node; + int rc; + + /* Check if there's another flow using the same tunnel encap. + * If not, add this tunnel to the table and resolve the other + * tunnel header fileds + */ + encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table, + &tc_info->encap_ht_params, + encap_key); + if (!encap_node) + return -ENOMEM; + + flow_node->encap_node = encap_node; + + if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE) + goto done; + + rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info, + flow->actions.dst_dev); + if (rc) + goto put_encap; + + /* Allocate a new tunnel encap record */ + rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info, + &encap_node->tunnel_handle); + if (rc) + goto put_encap; + +done: + *encap_handle = encap_node->tunnel_handle; + return 0; + +put_encap: + bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table, + &tc_info->encap_ht_params, encap_node); + return rc; +} + +static void bnxt_tc_put_tunnel_handle(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node) +{ + if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) + bnxt_tc_put_decap_handle(bp, flow_node); + else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) + bnxt_tc_put_encap_handle(bp, flow_node->encap_node); +} + +static int bnxt_tc_get_tunnel_handle(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node, + __le32 *tunnel_handle) +{ + if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) + return bnxt_tc_get_decap_handle(bp, flow, flow_node, + tunnel_handle); + else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) + return bnxt_tc_get_encap_handle(bp, flow, flow_node, + tunnel_handle); + else + return 0; +} static int __bnxt_tc_del_flow(struct bnxt *bp, struct bnxt_tc_flow_node *flow_node) { @@ -599,6 +1099,9 @@ static int __bnxt_tc_del_flow(struct bnxt *bp, mutex_lock(&tc_info->lock); + /* release references to any tunnel encap/decap nodes */ + bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node); + /* release reference to l2 node */ bnxt_tc_put_l2_node(bp, flow_node); @@ -633,6 +1136,7 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, struct bnxt_tc_flow_node *new_node, *old_node; struct bnxt_tc_info *tc_info = &bp->tc_info; struct bnxt_tc_flow *flow; + __le32 tunnel_handle = 0; __le16 ref_flow_handle; int rc; @@ -670,11 +1174,16 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, if (rc) goto unlock; + /* If the flow involves tunnel encap/decap, get tunnel_handle */ + rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle); + if (rc) + goto put_l2; + /* send HWRM cmd to alloc the flow */ rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle, - &new_node->flow_handle); + tunnel_handle, &new_node->flow_handle); if (rc) - goto put_l2; + goto put_tunnel; /* add new flow to flow-table */ rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node, @@ -687,12 +1196,14 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, hwrm_flow_free: bnxt_hwrm_cfa_flow_free(bp, new_node->flow_handle); +put_tunnel: + bnxt_tc_put_tunnel_handle(bp, flow, new_node); put_l2: bnxt_tc_put_l2_node(bp, new_node); unlock: mutex_unlock(&tc_info->lock); free_node: - kfree(new_node); + kfree_rcu(new_node, rcu); done: netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d", __func__, tc_flow_cmd->cookie, rc); @@ -781,6 +1292,20 @@ static const struct rhashtable_params bnxt_tc_l2_ht_params = { .automatic_shrinking = true }; +static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = { + .head_offset = offsetof(struct bnxt_tc_l2_node, node), + .key_offset = offsetof(struct bnxt_tc_l2_node, key), + .key_len = BNXT_TC_L2_KEY_LEN, + .automatic_shrinking = true +}; + +static const struct rhashtable_params bnxt_tc_tunnel_ht_params = { + .head_offset = offsetof(struct bnxt_tc_tunnel_node, node), + .key_offset = offsetof(struct bnxt_tc_tunnel_node, key), + .key_len = sizeof(struct ip_tunnel_key), + .automatic_shrinking = true +}; + /* convert counter width in bits to a mask */ #define mask(width) ((u64)~0 >> (64 - (width))) @@ -789,7 +1314,7 @@ int bnxt_init_tc(struct bnxt *bp) struct bnxt_tc_info *tc_info = &bp->tc_info; int rc; - if (bp->hwrm_spec_code < 0x10800) { + if (bp->hwrm_spec_code < 0x10803) { netdev_warn(bp->dev, "Firmware does not support TC flower offload.\n"); return -ENOTSUPP; @@ -810,11 +1335,35 @@ int bnxt_init_tc(struct bnxt *bp) if (rc) goto destroy_flow_table; + tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params; + rc = rhashtable_init(&tc_info->decap_l2_table, + &tc_info->decap_l2_ht_params); + if (rc) + goto destroy_l2_table; + + tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params; + rc = rhashtable_init(&tc_info->decap_table, + &tc_info->decap_ht_params); + if (rc) + goto destroy_decap_l2_table; + + tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params; + rc = rhashtable_init(&tc_info->encap_table, + &tc_info->encap_ht_params); + if (rc) + goto destroy_decap_table; + tc_info->enabled = true; bp->dev->hw_features |= NETIF_F_HW_TC; bp->dev->features |= NETIF_F_HW_TC; return 0; +destroy_decap_table: + rhashtable_destroy(&tc_info->decap_table); +destroy_decap_l2_table: + rhashtable_destroy(&tc_info->decap_l2_table); +destroy_l2_table: + rhashtable_destroy(&tc_info->l2_table); destroy_flow_table: rhashtable_destroy(&tc_info->flow_table); return rc; @@ -829,4 +1378,7 @@ void bnxt_shutdown_tc(struct bnxt *bp) rhashtable_destroy(&tc_info->flow_table); rhashtable_destroy(&tc_info->l2_table); + rhashtable_destroy(&tc_info->decap_l2_table); + rhashtable_destroy(&tc_info->decap_table); + rhashtable_destroy(&tc_info->encap_table); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h index 6c4c1ed279ef..2beccd41c886 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h @@ -12,6 +12,8 @@ #ifdef CONFIG_BNXT_FLOWER_OFFLOAD +#include <net/ip_tunnels.h> + /* Structs used for storing the filter/actions of the TC cmd. */ struct bnxt_tc_l2_key { @@ -50,6 +52,13 @@ struct bnxt_tc_l4_key { }; }; +struct bnxt_tc_tunnel_key { + struct bnxt_tc_l2_key l2; + struct bnxt_tc_l3_key l3; + struct bnxt_tc_l4_key l4; + __be32 id; +}; + struct bnxt_tc_actions { u32 flags; #define BNXT_TC_ACTION_FLAG_FWD BIT(0) @@ -57,11 +66,16 @@ struct bnxt_tc_actions { #define BNXT_TC_ACTION_FLAG_PUSH_VLAN BIT(3) #define BNXT_TC_ACTION_FLAG_POP_VLAN BIT(4) #define BNXT_TC_ACTION_FLAG_DROP BIT(5) +#define BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP BIT(6) +#define BNXT_TC_ACTION_FLAG_TUNNEL_DECAP BIT(7) u16 dst_fid; struct net_device *dst_dev; __be16 push_vlan_tpid; __be16 push_vlan_tci; + + /* tunnel encap */ + struct ip_tunnel_key tun_encap_key; }; struct bnxt_tc_flow_stats { @@ -76,6 +90,16 @@ struct bnxt_tc_flow { #define BNXT_TC_FLOW_FLAGS_IPV6_ADDRS BIT(3) #define BNXT_TC_FLOW_FLAGS_PORTS BIT(4) #define BNXT_TC_FLOW_FLAGS_ICMP BIT(5) +#define BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS BIT(6) +#define BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS BIT(7) +#define BNXT_TC_FLOW_FLAGS_TUNL_IPV6_ADDRS BIT(8) +#define BNXT_TC_FLOW_FLAGS_TUNL_PORTS BIT(9) +#define BNXT_TC_FLOW_FLAGS_TUNL_ID BIT(10) +#define BNXT_TC_FLOW_FLAGS_TUNNEL (BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS | \ + BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS | \ + BNXT_TC_FLOW_FLAGS_TUNL_IPV6_ADDRS |\ + BNXT_TC_FLOW_FLAGS_TUNL_PORTS |\ + BNXT_TC_FLOW_FLAGS_TUNL_ID) /* flow applicable to pkts ingressing on this fid */ u16 src_fid; @@ -85,6 +109,8 @@ struct bnxt_tc_flow { struct bnxt_tc_l3_key l3_mask; struct bnxt_tc_l4_key l4_key; struct bnxt_tc_l4_key l4_mask; + struct ip_tunnel_key tun_key; + struct ip_tunnel_key tun_mask; struct bnxt_tc_actions actions; @@ -95,11 +121,33 @@ struct bnxt_tc_flow { unsigned long lastused; /* jiffies */ }; +/* Tunnel encap/decap hash table + * This table is used to maintain a list of flows that use + * the same tunnel encap/decap params (ip_daddrs, vni, udp_dport) + * and the FW returned handle. + * A separate table is maintained for encap and decap + */ +struct bnxt_tc_tunnel_node { + struct ip_tunnel_key key; + struct rhash_head node; + + /* tunnel l2 info */ + struct bnxt_tc_l2_key l2_info; + +#define INVALID_TUNNEL_HANDLE cpu_to_le32(0xffffffff) + /* tunnel handle returned by FW */ + __le32 tunnel_handle; + + u32 refcount; + struct rcu_head rcu; +}; + /* L2 hash table - * This data-struct is used for L2-flow table. - * The L2 part of a flow is stored in a hash table. + * The same data-struct is used for L2-flow table and L2-tunnel table. + * The L2 part of a flow or tunnel is stored in a hash table. * A flow that shares the same L2 key/mask with an - * already existing flow must refer to it's flow handle. + * already existing flow/tunnel must refer to it's flow handle or + * decap_filter_id respectively. */ struct bnxt_tc_l2_node { /* hash key: first 16b of key */ @@ -110,7 +158,7 @@ struct bnxt_tc_l2_node { /* a linked list of flows that share the same l2 key */ struct list_head common_l2_flows; - /* number of flows sharing the l2 key */ + /* number of flows/tunnels sharing the l2 key */ u16 refcount; struct rcu_head rcu; @@ -130,6 +178,16 @@ struct bnxt_tc_flow_node { /* for the shared_flows list maintained in l2_node */ struct list_head l2_list_node; + /* tunnel encap related */ + struct bnxt_tc_tunnel_node *encap_node; + + /* tunnel decap related */ + struct bnxt_tc_tunnel_node *decap_node; + /* L2 node in tunnel-l2 hashtable that shares flow's tunnel l2 key */ + struct bnxt_tc_l2_node *decap_l2_node; + /* for the shared_flows list maintained in tunnel decap l2_node */ + struct list_head decap_l2_list_node; + struct rcu_head rcu; }; |