diff options
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 225 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 39 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 282 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 23 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 2 | ||||
-rw-r--r-- | include/linux/skbuff.h | 6 | ||||
-rw-r--r-- | include/net/act_api.h | 2 | ||||
-rw-r--r-- | include/net/flow_offload.h | 5 | ||||
-rw-r--r-- | include/net/pkt_cls.h | 34 | ||||
-rw-r--r-- | include/net/sch_generic.h | 2 | ||||
-rw-r--r-- | net/openvswitch/flow.c | 3 | ||||
-rw-r--r-- | net/sched/act_api.c | 28 | ||||
-rw-r--r-- | net/sched/cls_api.c | 243 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 73 |
20 files changed, 636 insertions, 359 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 26685fd0fdaa..bb1d7b039a7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -85,7 +85,7 @@ config MLX5_BRIDGE config MLX5_CLS_ACT bool "MLX5 TC classifier action support" - depends on MLX5_ESWITCH && NET_CLS_ACT + depends on MLX5_ESWITCH && NET_CLS_ACT && NET_TC_SKB_EXT default y help mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT), @@ -100,7 +100,7 @@ config MLX5_CLS_ACT config MLX5_TC_CT bool "MLX5 TC connection tracking offload support" - depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT + depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT default y help Say Y here if you want to support offloading connection tracking rules diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 3b590cfe33b8..e24b46953542 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020 Mellanox Technologies. */ -#include <net/dst_metadata.h> #include <linux/netdevice.h> #include <linux/if_macvlan.h> #include <linux/list.h> @@ -665,232 +664,54 @@ void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) mlx5e_rep_indr_block_unbind); } -static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, - struct mlx5e_tc_update_priv *tc_priv, - u32 tunnel_id) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct tunnel_match_enc_opts enc_opts = {}; - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - struct metadata_dst *tun_dst; - struct tunnel_match_key key; - u32 tun_id, enc_opts_id; - struct net_device *dev; - int err; - - enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; - tun_id = tunnel_id >> ENC_OPTS_BITS; - - if (!tun_id) - return true; - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - - err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); - if (err) { - netdev_dbg(priv->netdev, - "Couldn't find tunnel for tun_id: %d, err: %d\n", - tun_id, err); - return false; - } - - if (enc_opts_id) { - err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, - enc_opts_id, &enc_opts); - if (err) { - netdev_dbg(priv->netdev, - "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", - enc_opts_id, err); - return false; - } - } - - if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, - key.enc_ip.tos, key.enc_ip.ttl, - key.enc_tp.dst, TUNNEL_KEY, - key32_to_tunnel_id(key.enc_key_id.keyid), - enc_opts.key.len); - } else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, - key.enc_ip.tos, key.enc_ip.ttl, - key.enc_tp.dst, 0, TUNNEL_KEY, - key32_to_tunnel_id(key.enc_key_id.keyid), - enc_opts.key.len); - } else { - netdev_dbg(priv->netdev, - "Couldn't restore tunnel, unsupported addr_type: %d\n", - key.enc_control.addr_type); - return false; - } - - if (!tun_dst) { - netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n"); - return false; - } - - tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; - - if (enc_opts.key.len) - ip_tunnel_info_opts_set(&tun_dst->u.tun_info, - enc_opts.key.data, - enc_opts.key.len, - enc_opts.key.dst_opt_type); - - skb_dst_set(skb, (struct dst_entry *)tun_dst); - dev = dev_get_by_index(&init_net, key.filter_ifindex); - if (!dev) { - netdev_dbg(priv->netdev, - "Couldn't find tunnel device with ifindex: %d\n", - key.filter_ifindex); - return false; - } - - /* Set fwd_dev so we do dev_put() after datapath */ - tc_priv->fwd_dev = dev; - - skb->dev = dev; - - return true; -} - -static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1, - struct mlx5e_tc_update_priv *tc_priv) -{ - struct mlx5e_priv *priv = netdev_priv(skb->dev); - u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; - -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - if (chain) { - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - struct tc_skb_ext *tc_skb_ext; - struct mlx5_eswitch *esw; - u32 zone_restore_id; - - tc_skb_ext = tc_skb_ext_alloc(skb); - if (!tc_skb_ext) { - WARN_ON(1); - return false; - } - tc_skb_ext->chain = chain; - zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK; - esw = priv->mdev->priv.eswitch; - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb, - zone_restore_id)) - return false; - } -#endif /* CONFIG_NET_TC_SKB_EXT */ - - return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); -} - -static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) -{ - if (tc_priv->fwd_dev) - dev_put(tc_priv->fwd_dev); -} - -static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, - struct mlx5_mapped_obj *mapped_obj, - struct mlx5e_tc_update_priv *tc_priv) -{ - if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { - netdev_dbg(priv->netdev, - "Failed to restore tunnel info for sampled packet\n"); - return; - } - mlx5e_tc_sample_skb(skb, mapped_obj); - mlx5_rep_tc_post_napi_receive(tc_priv); -} - -static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb, - struct mlx5_mapped_obj *mapped_obj, - struct mlx5e_tc_update_priv *tc_priv, - bool *forward_tx, - u32 reg_c1) -{ - u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - - /* Tunnel restore takes precedence over int port restore */ - if (tunnel_id) - return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - - if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb, - mapped_obj->int_port_metadata, forward_tx)) { - /* Set fwd_dev for future dev_put */ - tc_priv->fwd_dev = skb->dev; - - return true; - } - - return false; -} - void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, struct sk_buff *skb) { - u32 reg_c1 = be32_to_cpu(cqe->ft_metadata); + u32 reg_c0, reg_c1, zone_restore_id, tunnel_id; struct mlx5e_tc_update_priv tc_priv = {}; - struct mlx5_mapped_obj mapped_obj; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_tc_ct_priv *ct_priv; + struct mapping_ctx *mapping_ctx; struct mlx5_eswitch *esw; - bool forward_tx = false; struct mlx5e_priv *priv; - u32 reg_c0; - int err; reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) goto forward; - /* If reg_c0 is not equal to the default flow tag then skb->mark + /* If mapped_obj_id is not equal to the default flow tag then skb->mark * is not supported and must be reset back to 0. */ skb->mark = 0; priv = netdev_priv(skb->dev); esw = priv->mdev->priv.eswitch; - err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj); - if (err) { - netdev_dbg(priv->netdev, - "Couldn't find mapped object for reg_c0: %d, err: %d\n", - reg_c0, err); - goto free_skb; - } + mapping_ctx = esw->offloads.reg_c0_obj_pool; + reg_c1 = be32_to_cpu(cqe->ft_metadata); + zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK; + tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; - if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { - if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) && - !mlx5_ipsec_is_rx_flow(cqe)) - goto free_skb; - } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) { - mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv); - goto free_skb; - } else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) { - if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv, - &forward_tx, reg_c1)) - goto free_skb; - } else { - netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + ct_priv = uplink_priv->ct_priv; + + if (!mlx5_ipsec_is_rx_flow(cqe) && + !mlx5e_tc_update_skb(cqe, skb, mapping_ctx, reg_c0, ct_priv, zone_restore_id, tunnel_id, + &tc_priv)) goto free_skb; - } forward: - if (forward_tx) + if (tc_priv.skb_done) + goto free_skb; + + if (tc_priv.forward_tx) dev_queue_xmit(skb); else napi_gro_receive(rq->cq.napi, skb); - mlx5_rep_tc_post_napi_receive(&tc_priv); + if (tc_priv.fwd_dev) + dev_put(tc_priv.fwd_dev); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index f2c2c752bd1c..558a776359af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -237,7 +237,7 @@ sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id, int err; err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB, - CHAIN_TO_REG, obj_id); + MAPPED_OBJ_TO_REG, obj_id); if (err) goto err_set_regc0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 193562c14c44..5c58ec279b10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -59,6 +59,7 @@ struct mlx5_tc_ct_debugfs { struct mlx5_tc_ct_priv { struct mlx5_core_dev *dev; + struct mlx5e_priv *priv; const struct net_device *netdev; struct mod_hdr_tbl *mod_hdr_tbl; struct xarray tuple_ids; @@ -85,7 +86,6 @@ struct mlx5_ct_flow { struct mlx5_flow_attr *pre_ct_attr; struct mlx5_flow_handle *pre_ct_rule; struct mlx5_ct_ft *ft; - u32 chain_mapping; }; struct mlx5_ct_zone_rule { @@ -1445,6 +1445,7 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, attr->ct_attr.zone = act->ct.zone; attr->ct_attr.ct_action = act->ct.action; attr->ct_attr.nf_ft = act->ct.flow_table; + attr->ct_attr.act_miss_cookie = act->miss_cookie; return 0; } @@ -1782,7 +1783,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * + ft prio (tc chain) + * + original match + * +---------------------+ - * | set chain miss mapping + * | set act_miss_cookie mapping * | set fte_id * | set tunnel_id * | do decap @@ -1827,7 +1828,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_flow_attr *pre_ct_attr; struct mlx5_modify_hdr *mod_hdr; struct mlx5_ct_flow *ct_flow; - int chain_mapping = 0, err; + int act_miss_mapping = 0, err; struct mlx5_ct_ft *ft; u16 zone; @@ -1862,22 +1863,18 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - /* Write chain miss tag for miss in ct table as we - * don't go though all prios of this chain as normal tc rules - * miss. - */ - err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain, - &chain_mapping); + err = mlx5e_tc_action_miss_mapping_get(ct_priv->priv, attr, attr->ct_attr.act_miss_cookie, + &act_miss_mapping); if (err) { - ct_dbg("Failed to get chain register mapping for chain"); - goto err_get_chain; + ct_dbg("Failed to get register mapping for act miss"); + goto err_get_act_miss; } - ct_flow->chain_mapping = chain_mapping; + attr->ct_attr.act_miss_mapping = act_miss_mapping; err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, - CHAIN_TO_REG, chain_mapping); + MAPPED_OBJ_TO_REG, act_miss_mapping); if (err) { - ct_dbg("Failed to set chain register mapping"); + ct_dbg("Failed to set act miss register mapping"); goto err_mapping; } @@ -1941,8 +1938,8 @@ err_insert_orig: mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); err_mapping: mlx5e_mod_hdr_dealloc(pre_mod_acts); - mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); -err_get_chain: + mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, act_miss_mapping); +err_get_act_miss: kfree(ct_flow->pre_ct_attr); err_alloc_pre: mlx5_tc_ct_del_ft_cb(ct_priv, ft); @@ -1981,7 +1978,7 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr); mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); - mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); + mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, attr->ct_attr.act_miss_mapping); mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); kfree(ct_flow->pre_ct_attr); @@ -2078,13 +2075,6 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, const char *err_msg = NULL; int err = 0; -#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - /* cannot restore chain ID on HW miss */ - - err_msg = "tc skb extension missing"; - err = -EOPNOTSUPP; - goto out_err; -#endif if (IS_ERR_OR_NULL(post_act)) { /* Ignore_flow_level support isn't supported by default for VFs and so post_act * won't be supported. Skip showing error msg. @@ -2161,6 +2151,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, } spin_lock_init(&ct_priv->ht_lock); + ct_priv->priv = priv; ct_priv->ns_type = ns_type; ct_priv->chains = chains; ct_priv->netdev = priv->netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 5bbd6b92840f..5c5ddaa83055 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -28,6 +28,8 @@ struct mlx5_ct_attr { struct mlx5_ct_flow *ct_flow; struct nf_flowtable *nf_ft; u32 ct_labels_id; + u32 act_miss_mapping; + u64 act_miss_cookie; }; #define zone_to_reg_ct {\ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index b2c7ec4692f0..2e3445d74539 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1792,7 +1792,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); if (mlx5e_cqe_regb_chain(cqe)) - if (!mlx5e_tc_update_skb(cqe, skb)) { + if (!mlx5e_tc_update_skb_nic(cqe, skb)) { dev_kfree_skb_any(skb); goto free_wqe; } @@ -2259,7 +2259,7 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); if (mlx5e_cqe_regb_chain(cqe)) - if (!mlx5e_tc_update_skb(cqe, skb)) { + if (!mlx5e_tc_update_skb_nic(cqe, skb)) { dev_kfree_skb_any(skb); goto mpwrq_cqe_out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9bbd31e304be..e34d9b5fb504 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -43,6 +43,7 @@ #include <net/ipv6_stubs.h> #include <net/bareudp.h> #include <net/bonding.h> +#include <net/dst_metadata.h> #include "en.h" #include "en/tc/post_act.h" #include "en/tc/act_stats.h" @@ -108,7 +109,7 @@ struct mlx5e_tc_table { }; struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { - [CHAIN_TO_REG] = { + [MAPPED_OBJ_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, .moffset = 0, .mlen = 16, @@ -135,7 +136,7 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { * into reg_b that is passed to SW since we don't * jump between steering domains. */ - [NIC_CHAIN_TO_REG] = { + [NIC_MAPPED_OBJ_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B, .moffset = 0, .mlen = 16, @@ -1604,7 +1605,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, goto err_get_chain; err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, - CHAIN_TO_REG, chain_mapping); + MAPPED_OBJ_TO_REG, chain_mapping); if (err) goto err_reg_set; @@ -3815,6 +3816,7 @@ mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, attr2->parse_attr = parse_attr; attr2->dest_chain = 0; attr2->dest_ft = NULL; + attr2->act_id_restore_rule = NULL; if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { attr2->esw_attr->out_count = 0; @@ -4176,7 +4178,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, parse_state->actions |= attr->action; if (!tc_act->stats_action) - attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->act_cookie; + attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie; /* Split attr for multi table act if not the last act. */ if (jump_state.jump_target || @@ -5604,48 +5606,268 @@ int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } } -bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, - struct sk_buff *skb) +static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) { -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - u32 chain = 0, chain_tag, reg_b, zone_restore_id; - struct mlx5e_priv *priv = netdev_priv(skb->dev); - struct mlx5_mapped_obj mapped_obj; - struct tc_skb_ext *tc_skb_ext; - struct mlx5e_tc_table *tc; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct tunnel_match_enc_opts enc_opts = {}; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct metadata_dst *tun_dst; + struct tunnel_match_key key; + u32 tun_id, enc_opts_id; + struct net_device *dev; int err; - reg_b = be32_to_cpu(cqe->ft_metadata); - tc = mlx5e_fs_get_tc(priv->fs); - chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; + tun_id = tunnel_id >> ENC_OPTS_BITS; + + if (!tun_id) + return true; - err = mapping_find(tc->mapping, chain_tag, &mapped_obj); + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); if (err) { netdev_dbg(priv->netdev, - "Couldn't find chain for chain tag: %d, err: %d\n", - chain_tag, err); + "Couldn't find tunnel for tun_id: %d, err: %d\n", + tun_id, err); return false; } - if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { - chain = mapped_obj.chain; - tc_skb_ext = tc_skb_ext_alloc(skb); - if (WARN_ON(!tc_skb_ext)) + if (enc_opts_id) { + err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id, &enc_opts); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", + enc_opts_id, err); return false; + } + } + + switch (key.enc_control.addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, + key.enc_ip.tos, key.enc_ip.ttl, + key.enc_tp.dst, TUNNEL_KEY, + key32_to_tunnel_id(key.enc_key_id.keyid), + enc_opts.key.len); + break; + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, + key.enc_ip.tos, key.enc_ip.ttl, + key.enc_tp.dst, 0, TUNNEL_KEY, + key32_to_tunnel_id(key.enc_key_id.keyid), + enc_opts.key.len); + break; + default: + netdev_dbg(priv->netdev, + "Couldn't restore tunnel, unsupported addr_type: %d\n", + key.enc_control.addr_type); + return false; + } + + if (!tun_dst) { + netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n"); + return false; + } + + tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; + + if (enc_opts.key.len) + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, + enc_opts.key.data, + enc_opts.key.len, + enc_opts.key.dst_opt_type); - tc_skb_ext->chain = chain; + skb_dst_set(skb, (struct dst_entry *)tun_dst); + dev = dev_get_by_index(&init_net, key.filter_ifindex); + if (!dev) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel device with ifindex: %d\n", + key.filter_ifindex); + return false; + } + + /* Set fwd_dev so we do dev_put() after datapath */ + tc_priv->fwd_dev = dev; - zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & - ESW_ZONE_ID_MASK; + skb->dev = dev; - if (!mlx5e_tc_ct_restore_flow(tc->ct, skb, - zone_restore_id)) + return true; +} + +static bool mlx5e_tc_restore_skb_tc_meta(struct sk_buff *skb, struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_mapped_obj *mapped_obj, u32 zone_restore_id, + u32 tunnel_id, struct mlx5e_tc_update_priv *tc_priv) +{ + struct mlx5e_priv *priv = netdev_priv(skb->dev); + struct tc_skb_ext *tc_skb_ext; + u64 act_miss_cookie; + u32 chain; + + chain = mapped_obj->type == MLX5_MAPPED_OBJ_CHAIN ? mapped_obj->chain : 0; + act_miss_cookie = mapped_obj->type == MLX5_MAPPED_OBJ_ACT_MISS ? + mapped_obj->act_miss_cookie : 0; + if (chain || act_miss_cookie) { + if (!mlx5e_tc_ct_restore_flow(ct_priv, skb, zone_restore_id)) return false; - } else { + + tc_skb_ext = tc_skb_ext_alloc(skb); + if (!tc_skb_ext) { + WARN_ON(1); + return false; + } + + if (act_miss_cookie) { + tc_skb_ext->act_miss_cookie = act_miss_cookie; + tc_skb_ext->act_miss = 1; + } else { + tc_skb_ext->chain = chain; + } + } + + if (tc_priv) + return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); + + return true; +} + +static void mlx5e_tc_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_mapped_obj *mapped_obj, + struct mlx5e_tc_update_priv *tc_priv) +{ + if (!mlx5e_tc_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { + netdev_dbg(priv->netdev, + "Failed to restore tunnel info for sampled packet\n"); + return; + } + mlx5e_tc_sample_skb(skb, mapped_obj); +} + +static bool mlx5e_tc_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_mapped_obj *mapped_obj, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + bool forward_tx = false; + + /* Tunnel restore takes precedence over int port restore */ + if (tunnel_id) + return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb, + mapped_obj->int_port_metadata, &forward_tx)) { + /* Set fwd_dev for future dev_put */ + tc_priv->fwd_dev = skb->dev; + tc_priv->forward_tx = forward_tx; + + return true; + } + + return false; +} + +bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, + struct mapping_ctx *mapping_ctx, u32 mapped_obj_id, + struct mlx5_tc_ct_priv *ct_priv, + u32 zone_restore_id, u32 tunnel_id, + struct mlx5e_tc_update_priv *tc_priv) +{ + struct mlx5e_priv *priv = netdev_priv(skb->dev); + struct mlx5_mapped_obj mapped_obj; + int err; + + err = mapping_find(mapping_ctx, mapped_obj_id, &mapped_obj); + if (err) { + netdev_dbg(skb->dev, + "Couldn't find mapped object for mapped_obj_id: %d, err: %d\n", + mapped_obj_id, err); + return false; + } + + switch (mapped_obj.type) { + case MLX5_MAPPED_OBJ_CHAIN: + case MLX5_MAPPED_OBJ_ACT_MISS: + return mlx5e_tc_restore_skb_tc_meta(skb, ct_priv, &mapped_obj, zone_restore_id, + tunnel_id, tc_priv); + case MLX5_MAPPED_OBJ_SAMPLE: + mlx5e_tc_restore_skb_sample(priv, skb, &mapped_obj, tc_priv); + tc_priv->skb_done = true; + return true; + case MLX5_MAPPED_OBJ_INT_PORT_METADATA: + return mlx5e_tc_restore_skb_int_port(priv, skb, &mapped_obj, tc_priv, tunnel_id); + default: netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); return false; } -#endif /* CONFIG_NET_TC_SKB_EXT */ - return true; + return false; +} + +bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb) +{ + struct mlx5e_priv *priv = netdev_priv(skb->dev); + u32 mapped_obj_id, reg_b, zone_restore_id; + struct mlx5_tc_ct_priv *ct_priv; + struct mapping_ctx *mapping_ctx; + struct mlx5e_tc_table *tc; + + reg_b = be32_to_cpu(cqe->ft_metadata); + tc = mlx5e_fs_get_tc(priv->fs); + mapped_obj_id = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; + zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & + ESW_ZONE_ID_MASK; + ct_priv = tc->ct; + mapping_ctx = tc->mapping; + + return mlx5e_tc_update_skb(cqe, skb, mapping_ctx, mapped_obj_id, ct_priv, zone_restore_id, + 0, NULL); +} + +int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u64 act_miss_cookie, u32 *act_miss_mapping) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_mapped_obj mapped_obj = {}; + struct mapping_ctx *ctx; + int err; + + ctx = esw->offloads.reg_c0_obj_pool; + + mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS; + mapped_obj.act_miss_cookie = act_miss_cookie; + err = mapping_add(ctx, &mapped_obj, act_miss_mapping); + if (err) + return err; + + attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping); + if (IS_ERR(attr->act_id_restore_rule)) + goto err_rule; + + return 0; + +err_rule: + mapping_remove(ctx, *act_miss_mapping); + return err; +} + +void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u32 act_miss_mapping) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mapping_ctx *ctx; + + ctx = esw->offloads.reg_c0_obj_pool; + mlx5_del_flow_rules(attr->act_id_restore_rule); + mapping_remove(ctx, act_miss_mapping); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index f6b10bd3368b..adb39e30f90f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -59,6 +59,8 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); struct mlx5e_tc_update_priv { struct net_device *fwd_dev; + bool skb_done; + bool forward_tx; }; struct mlx5_nic_flow_attr { @@ -95,6 +97,7 @@ struct mlx5_flow_attr { struct mlx5_flow_attr *branch_true; struct mlx5_flow_attr *branch_false; struct mlx5_flow_attr *jumping_attr; + struct mlx5_flow_handle *act_id_restore_rule; /* keep this union last */ union { DECLARE_FLEX_ARRAY(struct mlx5_esw_flow_attr, esw_attr); @@ -225,7 +228,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); enum mlx5e_tc_attr_to_reg { - CHAIN_TO_REG, + MAPPED_OBJ_TO_REG, VPORT_TO_REG, TUNNEL_TO_REG, CTSTATE_TO_REG, @@ -234,7 +237,7 @@ enum mlx5e_tc_attr_to_reg { MARK_TO_REG, LABELS_TO_REG, FTEID_TO_REG, - NIC_CHAIN_TO_REG, + NIC_MAPPED_OBJ_TO_REG, NIC_ZONE_RESTORE_TO_REG, PACKET_COLOR_TO_REG, }; @@ -368,7 +371,6 @@ struct mlx5e_tc_table *mlx5e_tc_table_alloc(void); void mlx5e_tc_table_free(struct mlx5e_tc_table *tc); static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) { -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) u32 chain, reg_b; reg_b = be32_to_cpu(cqe->ft_metadata); @@ -379,20 +381,29 @@ static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; if (chain) return true; -#endif return false; } -bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb); +bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb); +bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, + struct mapping_ctx *mapping_ctx, u32 mapped_obj_id, + struct mlx5_tc_ct_priv *ct_priv, + u32 zone_restore_id, u32 tunnel_id, + struct mlx5e_tc_update_priv *tc_priv); #else /* CONFIG_MLX5_CLS_ACT */ static inline struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) { return NULL; } static inline void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) {} static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) { return false; } static inline bool -mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb) +mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { return true; } #endif +int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u64 act_miss_cookie, u32 *act_miss_mapping); +void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u32 act_miss_mapping); + #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index fd03f076551b..19e9a77c4633 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -52,12 +52,14 @@ enum mlx5_mapped_obj_type { MLX5_MAPPED_OBJ_CHAIN, MLX5_MAPPED_OBJ_SAMPLE, MLX5_MAPPED_OBJ_INT_PORT_METADATA, + MLX5_MAPPED_OBJ_ACT_MISS, }; struct mlx5_mapped_obj { enum mlx5_mapped_obj_type type; union { u32 chain; + u64 act_miss_cookie; struct { u32 group_id; u32 rate; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index df58cba37930..81ed91fee59b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -214,7 +214,7 @@ create_chain_restore(struct fs_chain *chain) struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch; u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; struct mlx5_fs_chains *chains = chain->chains; - enum mlx5e_tc_attr_to_reg chain_to_reg; + enum mlx5e_tc_attr_to_reg mapped_obj_to_reg; struct mlx5_modify_hdr *mod_hdr; u32 index; int err; @@ -242,7 +242,7 @@ create_chain_restore(struct fs_chain *chain) chain->id = index; if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) { - chain_to_reg = CHAIN_TO_REG; + mapped_obj_to_reg = MAPPED_OBJ_TO_REG; chain->restore_rule = esw_add_restore_rule(esw, chain->id); if (IS_ERR(chain->restore_rule)) { err = PTR_ERR(chain->restore_rule); @@ -253,7 +253,7 @@ create_chain_restore(struct fs_chain *chain) * since we write the metadata to reg_b * that is passed to SW directly. */ - chain_to_reg = NIC_CHAIN_TO_REG; + mapped_obj_to_reg = NIC_MAPPED_OBJ_TO_REG; } else { err = -EINVAL; goto err_rule; @@ -261,12 +261,12 @@ create_chain_restore(struct fs_chain *chain) MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, modact, field, - mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mfield); + mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mfield); MLX5_SET(set_action_in, modact, offset, - mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset); + mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].moffset); MLX5_SET(set_action_in, modact, length, - mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen == 32 ? - 0 : mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen); + mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mlen == 32 ? + 0 : mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mlen); MLX5_SET(set_action_in, modact, data, chain->id); mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns, 1, modact); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index e91fb205e0b4..594cdcb90b3d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -103,7 +103,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, } ingress = mlxsw_sp_flow_block_is_ingress_bound(block); err = mlxsw_sp_acl_rulei_act_drop(rulei, ingress, - act->cookie, extack); + act->user_cookie, extack); if (err) { NL_SET_ERR_MSG_MOD(extack, "Cannot append drop action"); return err; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d5602b15c714..ff7ad331fb82 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -319,12 +319,16 @@ struct nf_bridge_info { * and read by ovs to recirc_id. */ struct tc_skb_ext { - __u32 chain; + union { + u64 act_miss_cookie; + __u32 chain; + }; __u16 mru; __u16 zone; u8 post_ct:1; u8 post_ct_snat:1; u8 post_ct_dnat:1; + u8 act_miss:1; /* Set if act_miss_cookie is used */ }; #endif diff --git a/include/net/act_api.h b/include/net/act_api.h index 2a6f443f0ef6..4ae0580b63ca 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -39,7 +39,7 @@ struct tc_action { struct gnet_stats_basic_sync __percpu *cpu_bstats; struct gnet_stats_basic_sync __percpu *cpu_bstats_hw; struct gnet_stats_queue __percpu *cpu_qstats; - struct tc_cookie __rcu *act_cookie; + struct tc_cookie __rcu *user_cookie; struct tcf_chain __rcu *goto_chain; u32 tcfa_flags; u8 hw_stats; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 8c05455b1e34..118082eae48c 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -228,7 +228,8 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie); struct flow_action_entry { enum flow_action_id id; u32 hw_index; - unsigned long act_cookie; + unsigned long cookie; + u64 miss_cookie; enum flow_action_hw_stats hw_stats; action_destr destructor; void *destructor_priv; @@ -321,7 +322,7 @@ struct flow_action_entry { u16 sid; } pppoe; }; - struct flow_action_cookie *cookie; /* user defined action cookie */ + struct flow_action_cookie *user_cookie; /* user defined action cookie */ }; struct flow_action { diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ace437c6754b..b3b5b0b62f16 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -59,6 +59,8 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, void tcf_block_put(struct tcf_block *block); void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei); +int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action, + int police, struct tcf_proto *tp, u32 handle, bool used_action_miss); static inline bool tcf_block_shared(struct tcf_block *block) { @@ -229,6 +231,7 @@ struct tcf_exts { struct tc_action **actions; struct net *net; netns_tracker ns_tracker; + struct tcf_exts_miss_cookie_node *miss_cookie_node; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -240,21 +243,11 @@ struct tcf_exts { static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net, int action, int police) { -#ifdef CONFIG_NET_CLS_ACT - exts->type = 0; - exts->nr_actions = 0; - /* Note: we do not own yet a reference on net. - * This reference might be taken later from tcf_exts_get_net(). - */ - exts->net = net; - exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), - GFP_KERNEL); - if (!exts->actions) - return -ENOMEM; +#ifdef CONFIG_NET_CLS + return tcf_exts_init_ex(exts, net, action, police, NULL, 0, false); +#else + return -EOPNOTSUPP; #endif - exts->action = action; - exts->police = police; - return 0; } /* Return false if the netns is being destroyed in cleanup_net(). Callers @@ -360,6 +353,18 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, return TC_ACT_OK; } +static inline int +tcf_exts_exec_ex(struct sk_buff *skb, struct tcf_exts *exts, int act_index, + struct tcf_result *res) +{ +#ifdef CONFIG_NET_CLS_ACT + return tcf_action_exec(skb, exts->actions + act_index, + exts->nr_actions - act_index, res); +#else + return TC_ACT_OK; +#endif +} + int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, u32 flags, @@ -584,6 +589,7 @@ int tc_setup_offload_action(struct flow_action *flow_action, void tc_cleanup_offload_action(struct flow_action *flow_action); int tc_setup_action(struct flow_action *flow_action, struct tc_action *actions[], + u32 miss_cookie_base, struct netlink_ext_ack *extack); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index af4aa66aaa4e..fab5ba3e61b7 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -369,6 +369,8 @@ struct tcf_proto_ops { struct nlattr **tca, struct netlink_ext_ack *extack); void (*tmplt_destroy)(void *tmplt_priv); + struct tcf_exts * (*get_exts)(const struct tcf_proto *tp, + u32 handle); /* rtnetlink specific */ int (*dump)(struct net*, struct tcf_proto*, void *, diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 416976f70322..33b21a0c0548 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -1041,7 +1041,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) if (tc_skb_ext_tc_enabled()) { tc_ext = skb_ext_find(skb, TC_SKB_EXT); - key->recirc_id = tc_ext ? tc_ext->chain : 0; + key->recirc_id = tc_ext && !tc_ext->act_miss ? + tc_ext->chain : 0; OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; post_ct = tc_ext ? tc_ext->post_ct : false; post_ct_snat = post_ct ? tc_ext->post_ct_snat : false; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index eda58b78da13..fce522886099 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -125,7 +125,7 @@ static void free_tcf(struct tc_action *p) free_percpu(p->cpu_bstats_hw); free_percpu(p->cpu_qstats); - tcf_set_action_cookie(&p->act_cookie, NULL); + tcf_set_action_cookie(&p->user_cookie, NULL); if (chain) tcf_chain_put_by_act(chain); @@ -268,7 +268,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action, if (err) goto fl_err; - err = tc_setup_action(&fl_action->action, actions, extack); + err = tc_setup_action(&fl_action->action, actions, 0, extack); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to setup tc actions for offload"); @@ -431,14 +431,14 @@ EXPORT_SYMBOL(tcf_idr_release); static size_t tcf_action_shared_attrs_size(const struct tc_action *act) { - struct tc_cookie *act_cookie; + struct tc_cookie *user_cookie; u32 cookie_len = 0; rcu_read_lock(); - act_cookie = rcu_dereference(act->act_cookie); + user_cookie = rcu_dereference(act->user_cookie); - if (act_cookie) - cookie_len = nla_total_size(act_cookie->len); + if (user_cookie) + cookie_len = nla_total_size(user_cookie->len); rcu_read_unlock(); return nla_total_size(0) /* action number nested */ @@ -488,7 +488,7 @@ tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act) goto nla_put_failure; rcu_read_lock(); - cookie = rcu_dereference(a->act_cookie); + cookie = rcu_dereference(a->user_cookie); if (cookie) { if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) { rcu_read_unlock(); @@ -1362,9 +1362,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, { bool police = flags & TCA_ACT_FLAGS_POLICE; struct nla_bitfield32 userflags = { 0, 0 }; + struct tc_cookie *user_cookie = NULL; u8 hw_stats = TCA_ACT_HW_STATS_ANY; struct nlattr *tb[TCA_ACT_MAX + 1]; - struct tc_cookie *cookie = NULL; struct tc_action *a; int err; @@ -1375,8 +1375,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (err < 0) return ERR_PTR(err); if (tb[TCA_ACT_COOKIE]) { - cookie = nla_memdup_cookie(tb); - if (!cookie) { + user_cookie = nla_memdup_cookie(tb); + if (!user_cookie) { NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); err = -ENOMEM; goto err_out; @@ -1402,7 +1402,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, *init_res = err; if (!police && tb[TCA_ACT_COOKIE]) - tcf_set_action_cookie(&a->act_cookie, cookie); + tcf_set_action_cookie(&a->user_cookie, user_cookie); if (!police) a->hw_stats = hw_stats; @@ -1410,9 +1410,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, return a; err_out: - if (cookie) { - kfree(cookie->data); - kfree(cookie); + if (user_cookie) { + kfree(user_cookie->data); + kfree(user_cookie); } return ERR_PTR(err); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index bfabc9c95fa9..3569e2c3660c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -22,6 +22,7 @@ #include <linux/idr.h> #include <linux/jhash.h> #include <linux/rculist.h> +#include <linux/rhashtable.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> @@ -50,6 +51,109 @@ static LIST_HEAD(tcf_proto_base); /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(cls_mod_lock); +static struct xarray tcf_exts_miss_cookies_xa; +struct tcf_exts_miss_cookie_node { + const struct tcf_chain *chain; + const struct tcf_proto *tp; + const struct tcf_exts *exts; + u32 chain_index; + u32 tp_prio; + u32 handle; + u32 miss_cookie_base; + struct rcu_head rcu; +}; + +/* Each tc action entry cookie will be comprised of 32bit miss_cookie_base + + * action index in the exts tc actions array. + */ +union tcf_exts_miss_cookie { + struct { + u32 miss_cookie_base; + u32 act_index; + }; + u64 miss_cookie; +}; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static int +tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp, + u32 handle) +{ + struct tcf_exts_miss_cookie_node *n; + static u32 next; + int err; + + if (WARN_ON(!handle || !tp->ops->get_exts)) + return -EINVAL; + + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (!n) + return -ENOMEM; + + n->chain_index = tp->chain->index; + n->chain = tp->chain; + n->tp_prio = tp->prio; + n->tp = tp; + n->exts = exts; + n->handle = handle; + + err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base, + n, xa_limit_32b, &next, GFP_KERNEL); + if (err) + goto err_xa_alloc; + + exts->miss_cookie_node = n; + return 0; + +err_xa_alloc: + kfree(n); + return err; +} + +static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts) +{ + struct tcf_exts_miss_cookie_node *n; + + if (!exts->miss_cookie_node) + return; + + n = exts->miss_cookie_node; + xa_erase(&tcf_exts_miss_cookies_xa, n->miss_cookie_base); + kfree_rcu(n, rcu); +} + +static struct tcf_exts_miss_cookie_node * +tcf_exts_miss_cookie_lookup(u64 miss_cookie, int *act_index) +{ + union tcf_exts_miss_cookie mc = { .miss_cookie = miss_cookie, }; + + *act_index = mc.act_index; + return xa_load(&tcf_exts_miss_cookies_xa, mc.miss_cookie_base); +} +#else /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */ +static int +tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp, + u32 handle) +{ + return 0; +} + +static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts) +{ +} +#endif /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */ + +static u64 tcf_exts_miss_cookie_get(u32 miss_cookie_base, int act_index) +{ + union tcf_exts_miss_cookie mc = { .act_index = act_index, }; + + if (!miss_cookie_base) + return 0; + + mc.miss_cookie_base = miss_cookie_base; + return mc.miss_cookie; +} + #ifdef CONFIG_NET_CLS_ACT DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc); EXPORT_SYMBOL(tc_skb_ext_tc); @@ -1549,6 +1653,8 @@ static inline int __tcf_classify(struct sk_buff *skb, const struct tcf_proto *orig_tp, struct tcf_result *res, bool compat_mode, + struct tcf_exts_miss_cookie_node *n, + int act_index, u32 *last_executed_chain) { #ifdef CONFIG_NET_CLS_ACT @@ -1560,13 +1666,36 @@ reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { __be16 protocol = skb_protocol(skb, false); - int err; + int err = 0; - if (tp->protocol != protocol && - tp->protocol != htons(ETH_P_ALL)) - continue; + if (n) { + struct tcf_exts *exts; + + if (n->tp_prio != tp->prio) + continue; + + /* We re-lookup the tp and chain based on index instead + * of having hard refs and locks to them, so do a sanity + * check if any of tp,chain,exts was replaced by the + * time we got here with a cookie from hardware. + */ + if (unlikely(n->tp != tp || n->tp->chain != n->chain || + !tp->ops->get_exts)) + return TC_ACT_SHOT; + + exts = tp->ops->get_exts(tp, n->handle); + if (unlikely(!exts || n->exts != exts)) + return TC_ACT_SHOT; - err = tc_classify(skb, tp, res); + n = NULL; + err = tcf_exts_exec_ex(skb, exts, act_index, res); + } else { + if (tp->protocol != protocol && + tp->protocol != htons(ETH_P_ALL)) + continue; + + err = tc_classify(skb, tp, res); + } #ifdef CONFIG_NET_CLS_ACT if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { first_tp = orig_tp; @@ -1582,6 +1711,9 @@ reclassify: return err; } + if (unlikely(n)) + return TC_ACT_SHOT; + return TC_ACT_UNSPEC; /* signal: continue lookup */ #ifdef CONFIG_NET_CLS_ACT reset: @@ -1606,21 +1738,35 @@ int tcf_classify(struct sk_buff *skb, #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) u32 last_executed_chain = 0; - return __tcf_classify(skb, tp, tp, res, compat_mode, + return __tcf_classify(skb, tp, tp, res, compat_mode, NULL, 0, &last_executed_chain); #else u32 last_executed_chain = tp ? tp->chain->index : 0; + struct tcf_exts_miss_cookie_node *n = NULL; const struct tcf_proto *orig_tp = tp; struct tc_skb_ext *ext; + int act_index = 0; int ret; if (block) { ext = skb_ext_find(skb, TC_SKB_EXT); - if (ext && ext->chain) { + if (ext && (ext->chain || ext->act_miss)) { struct tcf_chain *fchain; + u32 chain; + + if (ext->act_miss) { + n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie, + &act_index); + if (!n) + return TC_ACT_SHOT; - fchain = tcf_chain_lookup_rcu(block, ext->chain); + chain = n->chain_index; + } else { + chain = ext->chain; + } + + fchain = tcf_chain_lookup_rcu(block, chain); if (!fchain) return TC_ACT_SHOT; @@ -1632,7 +1778,7 @@ int tcf_classify(struct sk_buff *skb, } } - ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, + ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, n, act_index, &last_executed_chain); if (tc_skb_ext_tc_enabled()) { @@ -3056,9 +3202,48 @@ out: return skb->len; } +int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action, + int police, struct tcf_proto *tp, u32 handle, + bool use_action_miss) +{ + int err = 0; + +#ifdef CONFIG_NET_CLS_ACT + exts->type = 0; + exts->nr_actions = 0; + /* Note: we do not own yet a reference on net. + * This reference might be taken later from tcf_exts_get_net(). + */ + exts->net = net; + exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), + GFP_KERNEL); + if (!exts->actions) + return -ENOMEM; +#endif + + exts->action = action; + exts->police = police; + + if (!use_action_miss) + return 0; + + err = tcf_exts_miss_cookie_base_alloc(exts, tp, handle); + if (err) + goto err_miss_alloc; + + return 0; + +err_miss_alloc: + tcf_exts_destroy(exts); + return err; +} +EXPORT_SYMBOL(tcf_exts_init_ex); + void tcf_exts_destroy(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT + tcf_exts_miss_cookie_base_destroy(exts); + if (exts->actions) { tcf_action_destroy(exts->actions, TCA_ACT_UNBIND); kfree(exts->actions); @@ -3490,28 +3675,28 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, } EXPORT_SYMBOL(tc_setup_cb_reoffload); -static int tcf_act_get_cookie(struct flow_action_entry *entry, - const struct tc_action *act) +static int tcf_act_get_user_cookie(struct flow_action_entry *entry, + const struct tc_action *act) { - struct tc_cookie *cookie; + struct tc_cookie *user_cookie; int err = 0; rcu_read_lock(); - cookie = rcu_dereference(act->act_cookie); - if (cookie) { - entry->cookie = flow_action_cookie_create(cookie->data, - cookie->len, - GFP_ATOMIC); - if (!entry->cookie) + user_cookie = rcu_dereference(act->user_cookie); + if (user_cookie) { + entry->user_cookie = flow_action_cookie_create(user_cookie->data, + user_cookie->len, + GFP_ATOMIC); + if (!entry->user_cookie) err = -ENOMEM; } rcu_read_unlock(); return err; } -static void tcf_act_put_cookie(struct flow_action_entry *entry) +static void tcf_act_put_user_cookie(struct flow_action_entry *entry) { - flow_action_cookie_destroy(entry->cookie); + flow_action_cookie_destroy(entry->user_cookie); } void tc_cleanup_offload_action(struct flow_action *flow_action) @@ -3520,7 +3705,7 @@ void tc_cleanup_offload_action(struct flow_action *flow_action) int i; flow_action_for_each(i, entry, flow_action) { - tcf_act_put_cookie(entry); + tcf_act_put_user_cookie(entry); if (entry->destructor) entry->destructor(entry->destructor_priv); } @@ -3547,6 +3732,7 @@ static int tc_setup_offload_act(struct tc_action *act, int tc_setup_action(struct flow_action *flow_action, struct tc_action *actions[], + u32 miss_cookie_base, struct netlink_ext_ack *extack) { int i, j, k, index, err = 0; @@ -3565,7 +3751,7 @@ int tc_setup_action(struct flow_action *flow_action, entry = &flow_action->entries[j]; spin_lock_bh(&act->tcfa_lock); - err = tcf_act_get_cookie(entry, act); + err = tcf_act_get_user_cookie(entry, act); if (err) goto err_out_locked; @@ -3577,7 +3763,9 @@ int tc_setup_action(struct flow_action *flow_action, for (k = 0; k < index ; k++) { entry[k].hw_stats = tc_act_hw_stats(act->hw_stats); entry[k].hw_index = act->tcfa_index; - entry[k].act_cookie = (unsigned long)act; + entry[k].cookie = (unsigned long)act; + entry[k].miss_cookie = + tcf_exts_miss_cookie_get(miss_cookie_base, i); } j += index; @@ -3600,10 +3788,15 @@ int tc_setup_offload_action(struct flow_action *flow_action, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT + u32 miss_cookie_base; + if (!exts) return 0; - return tc_setup_action(flow_action, exts->actions, extack); + miss_cookie_base = exts->miss_cookie_node ? + exts->miss_cookie_node->miss_cookie_base : 0; + return tc_setup_action(flow_action, exts->actions, miss_cookie_base, + extack); #else return 0; #endif @@ -3771,6 +3964,8 @@ static int __init tc_filter_init(void) if (err) goto err_register_pernet_subsys; + xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1); + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 885c95191ccf..e960a46b0520 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -529,6 +529,15 @@ static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle) return f; } +static struct tcf_exts *fl_get_exts(const struct tcf_proto *tp, u32 handle) +{ + struct cls_fl_head *head = rcu_dereference_bh(tp->root); + struct cls_fl_filter *f; + + f = idr_find(&head->handle_idr, handle); + return f ? &f->exts : NULL; +} + static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) @@ -2187,10 +2196,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, INIT_LIST_HEAD(&fnew->hw_list); refcount_set(&fnew->refcnt, 1); - err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0); - if (err < 0) - goto errout; - if (tb[TCA_FLOWER_FLAGS]) { fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); @@ -2200,15 +2205,46 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } } + if (!fold) { + spin_lock(&tp->lock); + if (!handle) { + handle = 1; + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + INT_MAX, GFP_ATOMIC); + } else { + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + handle, GFP_ATOMIC); + + /* Filter with specified handle was concurrently + * inserted after initial check in cls_api. This is not + * necessarily an error if NLM_F_EXCL is not set in + * message flags. Returning EAGAIN will cause cls_api to + * try to update concurrently inserted rule. + */ + if (err == -ENOSPC) + err = -EAGAIN; + } + spin_unlock(&tp->lock); + + if (err) + goto errout; + } + fnew->handle = handle; + + err = tcf_exts_init_ex(&fnew->exts, net, TCA_FLOWER_ACT, 0, tp, handle, + !tc_skip_hw(fnew->flags)); + if (err < 0) + goto errout_idr; + err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], tp->chain->tmplt_priv, flags, fnew->flags, extack); if (err) - goto errout; + goto errout_idr; err = fl_check_assign_mask(head, fnew, fold, mask); if (err) - goto errout; + goto errout_idr; err = fl_ht_insert_unique(fnew, fold, &in_ht); if (err) @@ -2274,29 +2310,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, refcount_dec(&fold->refcnt); __fl_put(fold); } else { - if (handle) { - /* user specifies a handle and it doesn't exist */ - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - handle, GFP_ATOMIC); - - /* Filter with specified handle was concurrently - * inserted after initial check in cls_api. This is not - * necessarily an error if NLM_F_EXCL is not set in - * message flags. Returning EAGAIN will cause cls_api to - * try to update concurrently inserted rule. - */ - if (err == -ENOSPC) - err = -EAGAIN; - } else { - handle = 1; - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - INT_MAX, GFP_ATOMIC); - } - if (err) - goto errout_hw; + idr_replace(&head->handle_idr, fnew, fnew->handle); refcount_inc(&fnew->refcnt); - fnew->handle = handle; list_add_tail_rcu(&fnew->list, &fnew->mask->filters); spin_unlock(&tp->lock); } @@ -2319,6 +2335,8 @@ errout_hw: fnew->mask->filter_ht_params); errout_mask: fl_mask_put(head, fnew->mask); +errout_idr: + idr_remove(&head->handle_idr, fnew->handle); errout: __fl_put(fnew); errout_tb: @@ -3436,6 +3454,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .tmplt_create = fl_tmplt_create, .tmplt_destroy = fl_tmplt_destroy, .tmplt_dump = fl_tmplt_dump, + .get_exts = fl_get_exts, .owner = THIS_MODULE, .flags = TCF_PROTO_OPS_DOIT_UNLOCKED, }; |