diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 9 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en.h | 9 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 17 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 44 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 242 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 97 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 20 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 13 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/vport.c | 17 |
12 files changed, 372 insertions, 118 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8727116a4cab..9d8535385bb8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -53,6 +53,7 @@ #include <linux/in.h> #include <linux/etherdevice.h> #include <linux/mlx5/fs.h> +#include <linux/mlx5/vport.h> #include "mlx5_ib.h" #define DRIVER_NAME "mlx5_ib" @@ -1202,6 +1203,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.response_length += sizeof(resp.cmds_supp_uhw); } + if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) { + if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { + mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline); + resp.eth_min_inline++; + } + resp.response_length += sizeof(resp.eth_min_inline); + } + /* * We don't want to expose information from the PCI bar that is located * after 4096 bytes, so if the arch only supports larger pages, let's diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index d603a30ad639..46f728de9e76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -101,6 +101,7 @@ #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) +#define MLX5E_MIN_NUM_CHANNELS 0x1 #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 @@ -786,7 +787,7 @@ void mlx5e_pps_event_handler(struct mlx5e_priv *priv, struct ptp_clock_event *event); int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); -void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val); +void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); @@ -847,12 +848,6 @@ static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) return wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); } -static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) -{ - return min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); -} - extern const struct ethtool_ops mlx5e_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 349dc72cd2b6..37e66eef6fb5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -106,11 +106,12 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) return -ERANGE; } + mutex_lock(&priv->state_lock); /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_def); + mlx5e_modify_rx_cqe_compression_locked(priv, priv->params.rx_cqe_compress_def); break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -128,14 +129,16 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: /* Disable CQE compression */ netdev_warn(dev, "Disabling cqe compression"); - mlx5e_modify_rx_cqe_compression(priv, false); + mlx5e_modify_rx_cqe_compression_locked(priv, false); config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: + mutex_unlock(&priv->state_lock); return -ERANGE; } memcpy(&priv->tstamp.hwtstamp_config, &config, sizeof(config)); + mutex_unlock(&priv->state_lock); return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 4021863e3840..6c1a5cb43f8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -552,7 +552,7 @@ static void mlx5e_get_channels(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); - ch->max_combined = mlx5e_get_max_num_channels(priv->mdev); + ch->max_combined = priv->profile->max_nch(priv->mdev); ch->combined_count = priv->params.num_channels; } @@ -560,7 +560,7 @@ static int mlx5e_set_channels(struct net_device *dev, struct ethtool_channels *ch) { struct mlx5e_priv *priv = netdev_priv(dev); - int ncv = mlx5e_get_max_num_channels(priv->mdev); + int ncv = priv->profile->max_nch(priv->mdev); unsigned int count = ch->combined_count; bool arfs_enabled; bool was_opened; @@ -1476,8 +1476,6 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - int err = 0; - bool reset; if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -ENOTSUPP; @@ -1487,17 +1485,10 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, return -EINVAL; } - reset = test_bit(MLX5E_STATE_OPENED, &priv->state); - - if (reset) - mlx5e_close_locked(netdev); - - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, enable); + mlx5e_modify_rx_cqe_compression_locked(priv, enable); priv->params.rx_cqe_compress_def = enable; - if (reset) - err = mlx5e_open_locked(netdev); - return err; + return 0; } static int mlx5e_handle_pflag(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3a06c81ef85e..e829143efc14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -31,6 +31,7 @@ */ #include <net/tc_act/tc_gact.h> +#include <linux/crash_dump.h> #include <net/pkt_cls.h> #include <linux/mlx5/fs.h> #include <net/vxlan.h> @@ -83,7 +84,9 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) priv->params.rq_wq_type = rq_type; switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.log_rq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; priv->params.mpwqe_log_stride_sz = MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) ? MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : @@ -92,7 +95,9 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) priv->params.mpwqe_log_stride_sz; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + priv->params.log_rq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; } priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, BIT(priv->params.log_rq_size)); @@ -1508,6 +1513,14 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) +{ + return is_kdump_kernel() ? + MLX5E_MIN_NUM_CHANNELS : + min_t(int, mdev->priv.eq_table.num_comp_vectors, + MLX5E_MAX_NUM_CHANNELS); +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) @@ -3021,11 +3034,8 @@ static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - if (min_tx_rate) - return -EOPNOTSUPP; - return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1, - max_tx_rate); + max_tx_rate, min_tx_rate); } static int mlx5_vport_link2ifla(u8 esw_link) @@ -3461,22 +3471,6 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; } -static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev, - u8 *min_inline_mode) -{ - switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { - case MLX5_CAP_INLINE_MODE_L2: - *min_inline_mode = MLX5_INLINE_MODE_L2; - break; - case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: - mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode); - break; - case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: - *min_inline_mode = MLX5_INLINE_MODE_NONE; - break; - } -} - u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@ -3510,7 +3504,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); - priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + priv->params.log_sq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ priv->params.rx_cqe_compress_def = false; @@ -3536,7 +3532,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - mlx5e_query_min_inline(mdev, &priv->params.tx_min_inline_mode); + mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode); priv->params.num_tc = 1; priv->params.rss_hfunc = ETH_RSS_HASH_XOR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 20f116f8c457..ba50583ea3ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -155,17 +155,15 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; } -void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val) +void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val) { bool was_opened; if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) return; - mutex_lock(&priv->state_lock); - if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) == val) - goto unlock; + return; was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (was_opened) @@ -176,8 +174,6 @@ void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val) if (was_opened) mlx5e_open_locked(priv->netdev); -unlock: - mutex_unlock(&priv->state_lock); } #define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d4af5507679f..640f10f2e994 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -298,6 +298,32 @@ vxlan_match_offload_err: MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); + } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + f->mask); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); } /* Enforce DMAC when offloading incoming tunneled flows. @@ -358,12 +384,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, f->key); switch (key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: if (parse_tunnel_attr(priv, spec, f)) return -EOPNOTSUPP; break; - case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - netdev_warn(priv->netdev, - "IPv6 tunnel decap offload isn't supported\n"); default: return -EOPNOTSUPP; } @@ -644,15 +668,15 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } -static inline int cmp_encap_info(struct mlx5_encap_info *a, - struct mlx5_encap_info *b) +static inline int cmp_encap_info(struct ip_tunnel_key *a, + struct ip_tunnel_key *b) { return memcmp(a, b, sizeof(*a)); } -static inline int hash_encap_info(struct mlx5_encap_info *info) +static inline int hash_encap_info(struct ip_tunnel_key *key) { - return jhash(info, sizeof(*info), 0); + return jhash(key, sizeof(*key), 0); } static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, @@ -660,12 +684,10 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, struct net_device **out_dev, struct flowi4 *fl4, struct neighbour **out_n, - __be32 *saddr, int *out_ttl) { struct rtable *rt; struct neighbour *n = NULL; - int ttl; #if IS_ENABLED(CONFIG_INET) int ret; @@ -684,20 +706,59 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - ttl = ip4_dst_hoplimit(&rt->dst); + *out_ttl = ip4_dst_hoplimit(&rt->dst); n = dst_neigh_lookup(&rt->dst, &fl4->daddr); ip_rt_put(rt); if (!n) return -ENOMEM; *out_n = n; - *saddr = fl4->saddr; - *out_ttl = ttl; *out_dev = rt->dst.dev; return 0; } +static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct flowi6 *fl6, + struct neighbour **out_n, + int *out_ttl) +{ + struct neighbour *n = NULL; + struct dst_entry *dst; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int ret; + + dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6); + if (dst->error) { + ret = dst->error; + dst_release(dst); + return ret; + } + + *out_ttl = ip6_dst_hoplimit(dst); + + /* if the egress device isn't on the same HW e-switch, we use the uplink */ + if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) + *out_dev = mlx5_eswitch_get_uplink_netdev(esw); + else + *out_dev = dst->dev; +#else + return -EOPNOTSUPP; +#endif + + n = dst_neigh_lookup(dst, &fl6->daddr); + dst_release(dst); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + static int gen_vxlan_header_ipv4(struct net_device *out_dev, char buf[], unsigned char h_dest[ETH_ALEN], @@ -734,19 +795,52 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev, return encap_size; } +static int gen_vxlan_header_ipv6(struct net_device *out_dev, + char buf[], + unsigned char h_dest[ETH_ALEN], + int ttl, + struct in6_addr *daddr, + struct in6_addr *saddr, + __be16 udp_dst_port, + __be32 vx_vni) +{ + int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN; + struct ethhdr *eth = (struct ethhdr *)buf; + struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); + struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); + struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + + memset(buf, 0, encap_size); + + ether_addr_copy(eth->h_dest, h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IPV6); + + ip6_flow_hdr(ip6h, 0, 0); + /* the HW fills up ipv6 payload len */ + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + + udp->dest = udp_dst_port; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(vx_vni); + + return encap_size; +} + static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5_encap_entry *e, struct net_device **out_dev) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + int encap_size, ttl, err; struct neighbour *n = NULL; struct flowi4 fl4 = {}; char *encap_header; - int encap_size; - __be32 saddr; - int ttl; - int err; encap_header = kzalloc(max_encap_size, GFP_KERNEL); if (!encap_header) @@ -755,37 +849,108 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: fl4.flowi4_proto = IPPROTO_UDP; - fl4.fl4_dport = e->tun_info.tp_dst; + fl4.fl4_dport = tun_key->tp_dst; break; default: err = -EOPNOTSUPP; goto out; } - fl4.daddr = e->tun_info.daddr; + fl4.flowi4_tos = tun_key->tos; + fl4.daddr = tun_key->u.ipv4.dst; + fl4.saddr = tun_key->u.ipv4.src; err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev, - &fl4, &n, &saddr, &ttl); + &fl4, &n, &ttl); if (err) goto out; + if (!(n->nud_state & NUD_VALID)) { + pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr); + err = -EOPNOTSUPP; + goto out; + } + e->n = n; e->out_dev = *out_dev; + neigh_ha_snapshot(e->h_dest, n, *out_dev); + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, + e->h_dest, ttl, + fl4.daddr, + fl4.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); + break; + default: + err = -EOPNOTSUPP; + goto out; + } + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + encap_size, encap_header, &e->encap_id); +out: + if (err && n) + neigh_release(n); + kfree(encap_header); + return err; +} + +static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5_encap_entry *e, + struct net_device **out_dev) + +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + int encap_size, err, ttl = 0; + struct neighbour *n = NULL; + struct flowi6 fl6 = {}; + char *encap_header; + + encap_header = kzalloc(max_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + fl6.flowi6_proto = IPPROTO_UDP; + fl6.fl6_dport = tun_key->tp_dst; + break; + default: + err = -EOPNOTSUPP; + goto out; + } + + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + fl6.daddr = tun_key->u.ipv6.dst; + fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev, + &fl6, &n, &ttl); + if (err) + goto out; + if (!(n->nud_state & NUD_VALID)) { - pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr); + pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr); err = -EOPNOTSUPP; goto out; } + e->n = n; + e->out_dev = *out_dev; + neigh_ha_snapshot(e->h_dest, n, *out_dev); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, + encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header, e->h_dest, ttl, - e->tun_info.daddr, - saddr, e->tun_info.tp_dst, - e->tun_info.tun_id); + &fl6.daddr, + &fl6.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); break; default: err = -EOPNOTSUPP; @@ -809,13 +974,11 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; unsigned short family = ip_tunnel_info_af(tun_info); struct ip_tunnel_key *key = &tun_info->key; - struct mlx5_encap_info info; struct mlx5_encap_entry *e; struct net_device *out_dev; + int tunnel_type, err = -EOPNOTSUPP; uintptr_t hash_key; bool found = false; - int tunnel_type; - int err; /* udp dst port must be set */ if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) @@ -831,8 +994,6 @@ vxlan_encap_offload_err: if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - info.tp_dst = key->tp_dst; - info.tun_id = tunnel_id_to_key32(key->tun_id); tunnel_type = MLX5_HEADER_TYPE_VXLAN; } else { netdev_warn(priv->netdev, @@ -840,22 +1001,11 @@ vxlan_encap_offload_err: return -EOPNOTSUPP; } - switch (family) { - case AF_INET: - info.daddr = key->u.ipv4.dst; - break; - case AF_INET6: - netdev_warn(priv->netdev, - "IPv6 tunnel encap offload isn't supported\n"); - default: - return -EOPNOTSUPP; - } - - hash_key = hash_encap_info(&info); + hash_key = hash_encap_info(key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - if (!cmp_encap_info(&e->tun_info, &info)) { + if (!cmp_encap_info(&e->tun_info.key, key)) { found = true; break; } @@ -870,11 +1020,15 @@ vxlan_encap_offload_err: if (!e) return -ENOMEM; - e->tun_info = info; + e->tun_info = *tun_info; e->tunnel_type = tunnel_type; INIT_LIST_HEAD(&e->flows); - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + if (family == AF_INET) + err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + else if (family == AF_INET6) + err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev); + if (err) goto out_err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 4b3b60be319d..efa1a7a76d8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1415,7 +1415,7 @@ static void esw_destroy_tsar(struct mlx5_eswitch *esw) } static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, - u32 initial_max_rate) + u32 initial_max_rate, u32 initial_bw_share) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_vport *vport = &esw->vports[vport_num]; @@ -1439,6 +1439,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, esw->qos.root_tsar_id); MLX5_SET(scheduling_context, &sched_ctx, max_average_bw, initial_max_rate); + MLX5_SET(scheduling_context, &sched_ctx, bw_share, initial_bw_share); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -1473,7 +1474,7 @@ static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) } static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, - u32 max_rate) + u32 max_rate, u32 bw_share) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_vport *vport = &esw->vports[vport_num]; @@ -1497,7 +1498,9 @@ static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, esw->qos.root_tsar_id); MLX5_SET(scheduling_context, &sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, &sched_ctx, bw_share, bw_share); bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; err = mlx5_modify_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -1563,7 +1566,8 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_apply_vport_conf(esw, vport); /* Attach vport to the eswitch rate limiter */ - if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate)) + if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate, + vport->qos.bw_share)) esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); /* Sync with current vport context */ @@ -1952,6 +1956,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, ivi->qos = evport->info.qos; ivi->spoofchk = evport->info.spoofchk; ivi->trusted = evport->info.trusted; + ivi->min_tx_rate = evport->info.min_rate; ivi->max_tx_rate = evport->info.max_rate; mutex_unlock(&esw->state_lock); @@ -2046,23 +2051,103 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, return 0; } -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, - int vport, u32 max_rate) +static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) { + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); struct mlx5_vport *evport; + u32 max_guarantee = 0; + int i; + + for (i = 0; i <= esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled || evport->info.min_rate < max_guarantee) + continue; + max_guarantee = evport->info.min_rate; + } + + return max_t(u32, max_guarantee / fw_max_bw_share, 1); +} + +static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_vport *evport; + u32 vport_max_rate; + u32 vport_min_rate; + u32 bw_share; + int err; + int i; + + for (i = 0; i <= esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled) + continue; + vport_min_rate = evport->info.min_rate; + vport_max_rate = evport->info.max_rate; + bw_share = MLX5_MIN_BW_SHARE; + + if (vport_min_rate) + bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate, + divider, + fw_max_bw_share); + + if (bw_share == evport->qos.bw_share) + continue; + + err = esw_vport_qos_config(esw, i, vport_max_rate, + bw_share); + if (!err) + evport->qos.bw_share = bw_share; + else + return err; + } + + return 0; +} + +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, + u32 max_rate, u32 min_rate) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + bool min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + bool max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + struct mlx5_vport *evport; + u32 previous_min_rate; + u32 divider; int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported)) + return -EOPNOTSUPP; mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - err = esw_vport_qos_config(esw, vport, max_rate); + + if (min_rate == evport->info.min_rate) + goto set_max_rate; + + previous_min_rate = evport->info.min_rate; + evport->info.min_rate = min_rate; + divider = calculate_vports_min_rate_divider(esw); + err = normalize_vports_min_rate(esw, divider); + if (err) { + evport->info.min_rate = previous_min_rate; + goto unlock; + } + +set_max_rate: + if (max_rate == evport->info.max_rate) + goto unlock; + + err = esw_vport_qos_config(esw, vport, max_rate, evport->qos.bw_share); if (!err) evport->info.max_rate = max_rate; +unlock: mutex_unlock(&esw->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 8661dd3f542c..5b78883d5654 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -36,6 +36,7 @@ #include <linux/if_ether.h> #include <linux/if_link.h> #include <net/devlink.h> +#include <net/ip_tunnels.h> #include <linux/mlx5/device.h> #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -49,6 +50,11 @@ #define FDB_UPLINK_VPORT 0xffff +#define MLX5_MIN_BW_SHARE 1 + +#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ + min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit) + /* L2 -mac address based- hash helpers */ struct l2addr_node { struct hlist_node hlist; @@ -115,6 +121,7 @@ struct mlx5_vport_info { u8 qos; u64 node_guid; int link_state; + u32 min_rate; u32 max_rate; bool spoofchk; bool trusted; @@ -137,6 +144,7 @@ struct mlx5_vport { struct { bool enabled; u32 esw_tsar_ix; + u32 bw_share; } qos; bool enabled; @@ -248,8 +256,8 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, int vport, bool spoofchk); int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, int vport_num, bool setting); -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, - int vport, u32 max_rate); +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, + u32 max_rate, u32 min_rate); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi); int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, @@ -274,18 +282,12 @@ enum { #define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 #define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 -struct mlx5_encap_info { - __be32 daddr; - __be32 tun_id; - __be16 tp_dst; -}; - struct mlx5_encap_entry { struct hlist_node encap_hlist; struct list_head flows; u32 encap_id; struct neighbour *n; - struct mlx5_encap_info tun_info; + struct ip_tunnel_info tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 03293ed1cc22..348182599294 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -402,19 +402,18 @@ out: } #define MAX_PF_SQ 256 -#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */ #define ESW_OFFLOADS_NUM_GROUPS 4 static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int table_size, ix, esw_size, err = 0; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; struct mlx5_flow_group *g; u32 *flow_group_in; void *match_criteria; - int table_size, ix, err = 0; u32 flags = 0; flow_group_in = mlx5_vzalloc(inlen); @@ -427,15 +426,19 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) goto ns_err; } - esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), + MLX5_CAP_GEN(dev, max_flow_counter), ESW_OFFLOADS_NUM_GROUPS); + + esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS, + 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN; fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, - ESW_OFFLOADS_NUM_ENTRIES, + esw_size, ESW_OFFLOADS_NUM_GROUPS, 0, flags); if (IS_ERR(fdb)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index c4478ecd8056..b5253b59e8fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -473,10 +473,13 @@ int mlx5_encap_alloc(struct mlx5_core_dev *dev, int err; u32 *in; - if (size > MLX5_CAP_ESW(dev, max_encap_header_size)) + if (size > max_encap_size) { + mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n", + size, max_encap_size); return -EINVAL; + } - in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + max_encap_size, + in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size, GFP_KERNEL); if (!in) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 269e4401c342..b49cfc895c10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -127,6 +127,23 @@ int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline); +void mlx5_query_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline_mode) +{ + switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_L2: + *min_inline_mode = MLX5_INLINE_MODE_L2; + break; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode); + break; + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + *min_inline_mode = MLX5_INLINE_MODE_NONE; + break; + } +} +EXPORT_SYMBOL_GPL(mlx5_query_min_inline); + int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, u16 vport, u8 min_inline) { |