diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
48 files changed, 9966 insertions, 1967 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index eb8a4988de63..af975a2b74c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -155,13 +155,11 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; - if (cq->is_tx) { - netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, - NAPI_POLL_WEIGHT); - } else { + if (cq->is_tx) + netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, + NAPI_POLL_WEIGHT); + else netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); - napi_hash_add(&cq->napi); - } napi_enable(&cq->napi); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index ddb5541882f5..dd84cabb2a51 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -337,11 +337,7 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset) case ETH_SS_STATS: return bitmap_iterator_count(&it) + (priv->tx_ring_num * 2) + -#ifdef CONFIG_NET_RX_BUSY_POLL - (priv->rx_ring_num * 5); -#else (priv->rx_ring_num * 2); -#endif case ETH_SS_TEST: return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2; @@ -408,11 +404,6 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, for (i = 0; i < priv->rx_ring_num; i++) { data[index++] = priv->rx_ring[i]->packets; data[index++] = priv->rx_ring[i]->bytes; -#ifdef CONFIG_NET_RX_BUSY_POLL - data[index++] = priv->rx_ring[i]->yields; - data[index++] = priv->rx_ring[i]->misses; - data[index++] = priv->rx_ring[i]->cleaned; -#endif } spin_unlock_bh(&priv->stats_lock); @@ -486,14 +477,6 @@ static void mlx4_en_get_strings(struct net_device *dev, "rx%d_packets", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_bytes", i); -#ifdef CONFIG_NET_RX_BUSY_POLL - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_napi_yield", i); - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_misses", i); - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_cleaned", i); -#endif } break; case ETH_SS_PRIV_FLAGS: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 7869f97de5da..0c7e3f69a73b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -69,34 +69,6 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } -#ifdef CONFIG_NET_RX_BUSY_POLL -/* must be called with local_bh_disable()d */ -static int mlx4_en_low_latency_recv(struct napi_struct *napi) -{ - struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); - struct net_device *dev = cq->dev; - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; - int done; - - if (!priv->port_up) - return LL_FLUSH_FAILED; - - if (!mlx4_en_cq_lock_poll(cq)) - return LL_FLUSH_BUSY; - - done = mlx4_en_process_rx_cq(dev, cq, 4); - if (likely(done)) - rx_ring->cleaned += done; - else - rx_ring->misses++; - - mlx4_en_cq_unlock_poll(cq); - - return done; -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { @@ -1561,8 +1533,6 @@ int mlx4_en_start_port(struct net_device *dev) for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; - mlx4_en_cq_init_lock(cq); - err = mlx4_en_init_affinity_hint(priv, i); if (err) { en_err(priv, "Failed preparing IRQ affinity hint\n"); @@ -1859,13 +1829,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) for (i = 0; i < priv->rx_ring_num; i++) { struct mlx4_en_cq *cq = priv->rx_cq[i]; - local_bh_disable(); - while (!mlx4_en_cq_lock_napi(cq)) { - pr_info("CQ %d locked\n", i); - mdelay(1); - } - local_bh_enable(); - napi_synchronize(&cq->napi); mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); @@ -2507,9 +2470,6 @@ static const struct net_device_ops mlx4_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif -#ifdef CONFIG_NET_RX_BUSY_POLL - .ndo_busy_poll = mlx4_en_low_latency_recv, -#endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, #ifdef CONFIG_MLX4_EN_VXLAN .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index e7a5000aa12c..41440b2b20a3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -873,10 +873,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * - TCP/IP (v4) * - without IP options * - not an IP fragment - * - no LLS polling in progress */ - if (!mlx4_en_cq_busy_polling(cq) && - (dev->features & NETIF_F_GRO)) { + if (dev->features & NETIF_F_GRO) { struct sk_buff *gro_skb = napi_get_frags(&cq->napi); if (!gro_skb) goto next; @@ -927,7 +925,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud PKT_HASH_TYPE_L3); skb_record_rx_queue(gro_skb, cq->ring); - skb_mark_napi_id(gro_skb, &cq->napi); if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { timestamp = mlx4_en_get_cqe_ts(cqe); @@ -990,13 +987,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud timestamp); } - skb_mark_napi_id(skb, &cq->napi); - - if (!mlx4_en_cq_busy_polling(cq)) - napi_gro_receive(&cq->napi, skb); - else - netif_receive_skb(skb); - + napi_gro_receive(&cq->napi, skb); next: for (nr = 0; nr < priv->num_frags; nr++) mlx4_en_free_frag(priv, frags, nr); @@ -1038,13 +1029,8 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) struct mlx4_en_priv *priv = netdev_priv(dev); int done; - if (!mlx4_en_cq_lock_napi(cq)) - return budget; - done = mlx4_en_process_rx_cq(dev, cq, budget); - mlx4_en_cq_unlock_napi(cq); - /* If we used up all the quota - we're probably not done yet... */ if (done == budget) { const struct cpumask *aff; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 603d1c3d3b2e..4696053165f8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -151,6 +151,17 @@ void mlx4_gen_slave_eqe(struct work_struct *work) eqe = next_slave_event_eqe(slave_eq)) { slave = eqe->slave_id; + if (eqe->type == MLX4_EVENT_TYPE_PORT_CHANGE && + eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN && + mlx4_is_bonded(dev)) { + struct mlx4_port_cap port_cap; + + if (!mlx4_QUERY_PORT(dev, 1, &port_cap) && port_cap.link_state) + goto consume; + + if (!mlx4_QUERY_PORT(dev, 2, &port_cap) && port_cap.link_state) + goto consume; + } /* All active slaves need to receive the event */ if (slave == ALL_SLAVES) { for (i = 0; i <= dev->persist->num_vfs; i++) { @@ -174,6 +185,7 @@ void mlx4_gen_slave_eqe(struct work_struct *work) mlx4_warn(dev, "Failed to generate event for slave %d\n", slave); } +consume: ++slave_eq->cons; } } @@ -594,7 +606,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; for (i = 0; i < dev->persist->num_vfs + 1; i++) { - if (!test_bit(i, slaves_port.slaves)) + int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port); + + if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev)) continue; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { if (i == mlx4_master_func_num(dev)) @@ -606,7 +620,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eqe->event.port_change.port = cpu_to_be32( (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) - | (mlx4_phys_to_slave_port(dev, i, port) << 28)); + | (reported_port << 28)); mlx4_slave_event(dev, i, eqe); } } else { /* IB port */ @@ -636,7 +650,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) for (i = 0; i < dev->persist->num_vfs + 1; i++) { - if (!test_bit(i, slaves_port.slaves)) + int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port); + + if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev)) continue; if (i == mlx4_master_func_num(dev)) continue; @@ -645,7 +661,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eqe->event.port_change.port = cpu_to_be32( (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) - | (mlx4_phys_to_slave_port(dev, i, port) << 28)); + | (reported_port << 28)); mlx4_slave_event(dev, i, eqe); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 90db94e83fde..2c2baab9d880 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1104,6 +1104,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c goto out; MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET); + port_cap->link_state = (field & 0x80) >> 7; port_cap->supported_port_types = field & 3; port_cap->suggested_type = (field >> 3) & 1; port_cap->default_sense = (field >> 4) & 1; @@ -1310,6 +1311,15 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, port_type |= MLX4_PORT_LINK_UP_MASK; else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state) port_type &= ~MLX4_PORT_LINK_UP_MASK; + else if (IFLA_VF_LINK_STATE_AUTO == admin_link_state && mlx4_is_bonded(dev)) { + int other_port = (port == 1) ? 2 : 1; + struct mlx4_port_cap port_cap; + + err = mlx4_QUERY_PORT(dev, other_port, &port_cap); + if (err) + goto out; + port_type |= (port_cap.link_state << 7); + } MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); @@ -1325,7 +1335,7 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_PKEY_OFFSET); } - +out: return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 08de5555c2f4..7ea258af636a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -44,6 +44,7 @@ struct mlx4_mod_stat_cfg { }; struct mlx4_port_cap { + u8 link_state; u8 supported_port_types; u8 suggested_type; u8 default_sense; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 31c491e02e69..f1b6d219e445 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1221,6 +1221,76 @@ err_set_port: return err ? err : count; } +/* bond for multi-function device */ +#define MAX_MF_BOND_ALLOWED_SLAVES 63 +static int mlx4_mf_bond(struct mlx4_dev *dev) +{ + int err = 0; + struct mlx4_slaves_pport slaves_port1; + struct mlx4_slaves_pport slaves_port2; + DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX); + + slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1); + slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2); + bitmap_and(slaves_port_1_2, + slaves_port1.slaves, slaves_port2.slaves, + dev->persist->num_vfs + 1); + + /* only single port vfs are allowed */ + if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) { + mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n"); + return -EINVAL; + } + + /* limit on maximum allowed VFs */ + if ((bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) + + bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1)) > + MAX_MF_BOND_ALLOWED_SLAVES) + return -EINVAL; + + if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) { + mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n"); + return -EINVAL; + } + + err = mlx4_bond_mac_table(dev); + if (err) + return err; + err = mlx4_bond_vlan_table(dev); + if (err) + goto err1; + err = mlx4_bond_fs_rules(dev); + if (err) + goto err2; + + return 0; +err2: + (void)mlx4_unbond_vlan_table(dev); +err1: + (void)mlx4_unbond_mac_table(dev); + return err; +} + +static int mlx4_mf_unbond(struct mlx4_dev *dev) +{ + int ret, ret1; + + ret = mlx4_unbond_fs_rules(dev); + if (ret) + mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret); + ret1 = mlx4_unbond_mac_table(dev); + if (ret1) { + mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1); + ret = ret1; + } + ret1 = mlx4_unbond_vlan_table(dev); + if (ret1) { + mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1); + ret = ret1; + } + return ret; +} + int mlx4_bond(struct mlx4_dev *dev) { int ret = 0; @@ -1228,16 +1298,23 @@ int mlx4_bond(struct mlx4_dev *dev) mutex_lock(&priv->bond_mutex); - if (!mlx4_is_bonded(dev)) + if (!mlx4_is_bonded(dev)) { ret = mlx4_do_bond(dev, true); - else - ret = 0; + if (ret) + mlx4_err(dev, "Failed to bond device: %d\n", ret); + if (!ret && mlx4_is_master(dev)) { + ret = mlx4_mf_bond(dev); + if (ret) { + mlx4_err(dev, "bond for multifunction failed\n"); + mlx4_do_bond(dev, false); + } + } + } mutex_unlock(&priv->bond_mutex); - if (ret) - mlx4_err(dev, "Failed to bond device: %d\n", ret); - else + if (!ret) mlx4_dbg(dev, "Device is bonded\n"); + return ret; } EXPORT_SYMBOL_GPL(mlx4_bond); @@ -1249,14 +1326,24 @@ int mlx4_unbond(struct mlx4_dev *dev) mutex_lock(&priv->bond_mutex); - if (mlx4_is_bonded(dev)) + if (mlx4_is_bonded(dev)) { + int ret2 = 0; + ret = mlx4_do_bond(dev, false); + if (ret) + mlx4_err(dev, "Failed to unbond device: %d\n", ret); + if (mlx4_is_master(dev)) + ret2 = mlx4_mf_unbond(dev); + if (ret2) { + mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2); + ret = ret2; + } + } mutex_unlock(&priv->bond_mutex); - if (ret) - mlx4_err(dev, "Failed to unbond device: %d\n", ret); - else + if (!ret) mlx4_dbg(dev, "Device is unbonded\n"); + return ret; } EXPORT_SYMBOL_GPL(mlx4_unbond); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e1cf9036af22..2404c22ad2b2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -736,6 +736,7 @@ struct mlx4_catas_err { struct mlx4_mac_table { __be64 entries[MLX4_MAX_MAC_NUM]; int refs[MLX4_MAX_MAC_NUM]; + bool is_dup[MLX4_MAX_MAC_NUM]; struct mutex mutex; int total; int max; @@ -758,6 +759,7 @@ struct mlx4_roce_gid_table { struct mlx4_vlan_table { __be32 entries[MLX4_MAX_VLAN_NUM]; int refs[MLX4_MAX_VLAN_NUM]; + int is_dup[MLX4_MAX_VLAN_NUM]; struct mutex mutex; int total; int max; @@ -1225,6 +1227,10 @@ void mlx4_init_roce_gid_table(struct mlx4_dev *dev, struct mlx4_roce_gid_table *table); void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); +int mlx4_bond_vlan_table(struct mlx4_dev *dev); +int mlx4_unbond_vlan_table(struct mlx4_dev *dev); +int mlx4_bond_mac_table(struct mlx4_dev *dev); +int mlx4_unbond_mac_table(struct mlx4_dev *dev); int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); /* resource tracker functions*/ @@ -1385,6 +1391,8 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); int mlx4_config_mad_demux(struct mlx4_dev *dev); int mlx4_do_bond(struct mlx4_dev *dev, bool enable); +int mlx4_bond_fs_rules(struct mlx4_dev *dev); +int mlx4_unbond_fs_rules(struct mlx4_dev *dev); enum mlx4_zone_flags { MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index c41f15102ae0..35de7d2e6b34 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -320,11 +320,6 @@ struct mlx4_en_rx_ring { void *rx_info; unsigned long bytes; unsigned long packets; -#ifdef CONFIG_NET_RX_BUSY_POLL - unsigned long yields; - unsigned long misses; - unsigned long cleaned; -#endif unsigned long csum_ok; unsigned long csum_none; unsigned long csum_complete; @@ -347,18 +342,6 @@ struct mlx4_en_cq { struct mlx4_cqe *buf; #define MLX4_EN_OPCODE_ERROR 0x1e -#ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int state; -#define MLX4_EN_CQ_STATE_IDLE 0 -#define MLX4_EN_CQ_STATE_NAPI 1 /* NAPI owns this CQ */ -#define MLX4_EN_CQ_STATE_POLL 2 /* poll owns this CQ */ -#define MLX4_CQ_LOCKED (MLX4_EN_CQ_STATE_NAPI | MLX4_EN_CQ_STATE_POLL) -#define MLX4_EN_CQ_STATE_NAPI_YIELD 4 /* NAPI yielded this CQ */ -#define MLX4_EN_CQ_STATE_POLL_YIELD 8 /* poll yielded this CQ */ -#define CQ_YIELD (MLX4_EN_CQ_STATE_NAPI_YIELD | MLX4_EN_CQ_STATE_POLL_YIELD) -#define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD) - spinlock_t poll_lock; /* protects from LLS/napi conflicts */ -#endif /* CONFIG_NET_RX_BUSY_POLL */ struct irq_desc *irq_desc; }; @@ -622,115 +605,6 @@ static inline struct mlx4_cqe *mlx4_en_get_cqe(void *buf, int idx, int cqe_sz) return buf + idx * cqe_sz; } -#ifdef CONFIG_NET_RX_BUSY_POLL -static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) -{ - spin_lock_init(&cq->poll_lock); - cq->state = MLX4_EN_CQ_STATE_IDLE; -} - -/* called from the device poll rutine to get ownership of a cq */ -static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) -{ - int rc = true; - spin_lock(&cq->poll_lock); - if (cq->state & MLX4_CQ_LOCKED) { - WARN_ON(cq->state & MLX4_EN_CQ_STATE_NAPI); - cq->state |= MLX4_EN_CQ_STATE_NAPI_YIELD; - rc = false; - } else - /* we don't care if someone yielded */ - cq->state = MLX4_EN_CQ_STATE_NAPI; - spin_unlock(&cq->poll_lock); - return rc; -} - -/* returns true is someone tried to get the cq while napi had it */ -static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) -{ - int rc = false; - spin_lock(&cq->poll_lock); - WARN_ON(cq->state & (MLX4_EN_CQ_STATE_POLL | - MLX4_EN_CQ_STATE_NAPI_YIELD)); - - if (cq->state & MLX4_EN_CQ_STATE_POLL_YIELD) - rc = true; - cq->state = MLX4_EN_CQ_STATE_IDLE; - spin_unlock(&cq->poll_lock); - return rc; -} - -/* called from mlx4_en_low_latency_poll() */ -static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) -{ - int rc = true; - spin_lock_bh(&cq->poll_lock); - if ((cq->state & MLX4_CQ_LOCKED)) { - struct net_device *dev = cq->dev; - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; - - cq->state |= MLX4_EN_CQ_STATE_POLL_YIELD; - rc = false; - rx_ring->yields++; - } else - /* preserve yield marks */ - cq->state |= MLX4_EN_CQ_STATE_POLL; - spin_unlock_bh(&cq->poll_lock); - return rc; -} - -/* returns true if someone tried to get the cq while it was locked */ -static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) -{ - int rc = false; - spin_lock_bh(&cq->poll_lock); - WARN_ON(cq->state & (MLX4_EN_CQ_STATE_NAPI)); - - if (cq->state & MLX4_EN_CQ_STATE_POLL_YIELD) - rc = true; - cq->state = MLX4_EN_CQ_STATE_IDLE; - spin_unlock_bh(&cq->poll_lock); - return rc; -} - -/* true if a socket is polling, even if it did not get the lock */ -static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq) -{ - WARN_ON(!(cq->state & MLX4_CQ_LOCKED)); - return cq->state & CQ_USER_PEND; -} -#else -static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) -{ -} - -static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) -{ - return true; -} - -static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) -{ - return false; -} - -static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) -{ - return false; -} - -static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) -{ - return false; -} - -static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq) -{ - return false; -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) void mlx4_en_update_loopback_state(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index c2b21313dba7..f2550425c251 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -61,6 +61,7 @@ void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { table->entries[i] = 0; table->refs[i] = 0; + table->is_dup[i] = false; } table->max = 1 << dev->caps.log_num_macs; table->total = 0; @@ -74,6 +75,7 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table) for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { table->entries[i] = 0; table->refs[i] = 0; + table->is_dup[i] = false; } table->max = (1 << dev->caps.log_num_vlans) - MLX4_VLAN_REGULAR; table->total = 0; @@ -159,21 +161,94 @@ int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) } EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); +static bool mlx4_need_mf_bond(struct mlx4_dev *dev) +{ + int i, num_eth_ports = 0; + + if (!mlx4_is_mfunc(dev)) + return false; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + ++num_eth_ports; + + return (num_eth_ports == 2) ? true : false; +} + int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; struct mlx4_mac_table *table = &info->mac_table; int i, err = 0; int free = -1; + int free_for_dup = -1; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; + bool need_mf_bond = mlx4_need_mf_bond(dev); + bool can_mf_bond = true; + + mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d %s duplicate\n", + (unsigned long long)mac, port, + dup ? "with" : "without"); + + if (need_mf_bond) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } + + if (need_mf_bond) { + int index_at_port = -1; + int index_at_dup_port = -1; - mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d\n", - (unsigned long long) mac, port); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))) + index_at_port = i; + if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i])))) + index_at_dup_port = i; + } + + /* check that same mac is not in the tables at different indices */ + if ((index_at_port != index_at_dup_port) && + (index_at_port >= 0) && + (index_at_dup_port >= 0)) + can_mf_bond = false; + + /* If the mac is already in the primary table, the slot must be + * available in the duplicate table as well. + */ + if (index_at_port >= 0 && index_at_dup_port < 0 && + dup_table->refs[index_at_port]) { + can_mf_bond = false; + } + /* If the mac is already in the duplicate table, check that the + * corresponding index is not occupied in the primary table, or + * the primary table already contains the mac at the same index. + * Otherwise, you cannot bond (primary contains a different mac + * at that index). + */ + if (index_at_dup_port >= 0) { + if (!table->refs[index_at_dup_port] || + ((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[index_at_dup_port])))) + free_for_dup = index_at_dup_port; + else + can_mf_bond = false; + } + } - mutex_lock(&table->mutex); for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { if (!table->refs[i]) { if (free < 0) free = i; + if (free_for_dup < 0 && need_mf_bond && can_mf_bond) { + if (!dup_table->refs[i]) + free_for_dup = i; + } continue; } @@ -182,10 +257,30 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) /* MAC already registered, increment ref count */ err = i; ++table->refs[i]; + if (dup) { + u64 dup_mac = MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i]); + + if (dup_mac != mac || !dup_table->is_dup[i]) { + mlx4_warn(dev, "register mac: expect duplicate mac 0x%llx on port %d index %d\n", + mac, dup_port, i); + } + } goto out; } } + if (need_mf_bond && (free_for_dup < 0)) { + if (dup) { + mlx4_warn(dev, "Fail to allocate duplicate MAC table entry\n"); + mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n"); + dup = false; + } + can_mf_bond = false; + } + + if (need_mf_bond && can_mf_bond) + free = free_for_dup; + mlx4_dbg(dev, "Free MAC index is %d\n", free); if (table->total == table->max) { @@ -205,10 +300,35 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) goto out; } table->refs[free] = 1; - err = free; + table->is_dup[free] = false; ++table->total; + if (dup) { + dup_table->refs[free] = 0; + dup_table->is_dup[free] = true; + dup_table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding duplicate mac: 0x%llx\n", mac); + dup_table->is_dup[free] = false; + dup_table->entries[free] = 0; + goto out; + } + ++dup_table->total; + } + err = free; out: - mutex_unlock(&table->mutex); + if (need_mf_bond) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } EXPORT_SYMBOL_GPL(__mlx4_register_mac); @@ -255,6 +375,9 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) struct mlx4_port_info *info; struct mlx4_mac_table *table; int index; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; if (port < 1 || port > dev->caps.num_ports) { mlx4_warn(dev, "invalid port number (%d), aborting...\n", port); @@ -262,22 +385,59 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) } info = &mlx4_priv(dev)->port[port]; table = &info->mac_table; - mutex_lock(&table->mutex); + + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } + index = find_index(dev, table, mac); if (validate_index(dev, table, index)) goto out; - if (--table->refs[index]) { + + if (--table->refs[index] || table->is_dup[index]) { mlx4_dbg(dev, "Have more references for index %d, no need to modify mac table\n", index); + if (!table->refs[index]) + dup_table->is_dup[index] = false; goto out; } table->entries[index] = 0; - mlx4_set_port_mac_table(dev, port, table->entries); + if (mlx4_set_port_mac_table(dev, port, table->entries)) + mlx4_warn(dev, "Fail to set mac in port %d during unregister\n", port); --table->total; + + if (dup) { + dup_table->is_dup[index] = false; + if (dup_table->refs[index]) + goto out; + dup_table->entries[index] = 0; + if (mlx4_set_port_mac_table(dev, dup_port, dup_table->entries)) + mlx4_warn(dev, "Fail to set mac in duplicate port %d during unregister\n", dup_port); + + --table->total; + } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } } EXPORT_SYMBOL_GPL(__mlx4_unregister_mac); @@ -311,9 +471,22 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) struct mlx4_mac_table *table = &info->mac_table; int index = qpn - info->base_qpn; int err = 0; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; /* CX1 doesn't support multi-functions */ - mutex_lock(&table->mutex); + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } err = validate_index(dev, table, index); if (err) @@ -326,9 +499,30 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) new_mac); table->entries[index] = 0; + } else { + if (dup) { + dup_table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_err(dev, "Failed adding duplicate MAC: 0x%llx\n", + (unsigned long long)new_mac); + dup_table->entries[index] = 0; + } + } } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } EXPORT_SYMBOL_GPL(__mlx4_replace_mac); @@ -380,8 +574,28 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; int i, err = 0; int free = -1; - - mutex_lock(&table->mutex); + int free_for_dup = -1; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table; + bool need_mf_bond = mlx4_need_mf_bond(dev); + bool can_mf_bond = true; + + mlx4_dbg(dev, "Registering VLAN: %d for port %d %s duplicate\n", + vlan, port, + dup ? "with" : "without"); + + if (need_mf_bond) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } if (table->total == table->max) { /* No free vlan entries */ @@ -389,22 +603,85 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, goto out; } + if (need_mf_bond) { + int index_at_port = -1; + int index_at_dup_port = -1; + + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { + if ((vlan == (MLX4_VLAN_MASK & be32_to_cpu(table->entries[i])))) + index_at_port = i; + if ((vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i])))) + index_at_dup_port = i; + } + /* check that same vlan is not in the tables at different indices */ + if ((index_at_port != index_at_dup_port) && + (index_at_port >= 0) && + (index_at_dup_port >= 0)) + can_mf_bond = false; + + /* If the vlan is already in the primary table, the slot must be + * available in the duplicate table as well. + */ + if (index_at_port >= 0 && index_at_dup_port < 0 && + dup_table->refs[index_at_port]) { + can_mf_bond = false; + } + /* If the vlan is already in the duplicate table, check that the + * corresponding index is not occupied in the primary table, or + * the primary table already contains the vlan at the same index. + * Otherwise, you cannot bond (primary contains a different vlan + * at that index). + */ + if (index_at_dup_port >= 0) { + if (!table->refs[index_at_dup_port] || + (vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[index_at_dup_port])))) + free_for_dup = index_at_dup_port; + else + can_mf_bond = false; + } + } + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { - if (free < 0 && (table->refs[i] == 0)) { - free = i; - continue; + if (!table->refs[i]) { + if (free < 0) + free = i; + if (free_for_dup < 0 && need_mf_bond && can_mf_bond) { + if (!dup_table->refs[i]) + free_for_dup = i; + } } - if (table->refs[i] && + if ((table->refs[i] || table->is_dup[i]) && (vlan == (MLX4_VLAN_MASK & be32_to_cpu(table->entries[i])))) { /* Vlan already registered, increase references count */ + mlx4_dbg(dev, "vlan %u is already registered.\n", vlan); *index = i; ++table->refs[i]; + if (dup) { + u16 dup_vlan = MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i]); + + if (dup_vlan != vlan || !dup_table->is_dup[i]) { + mlx4_warn(dev, "register vlan: expected duplicate vlan %u on port %d index %d\n", + vlan, dup_port, i); + } + } goto out; } } + if (need_mf_bond && (free_for_dup < 0)) { + if (dup) { + mlx4_warn(dev, "Fail to allocate duplicate VLAN table entry\n"); + mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n"); + dup = false; + } + can_mf_bond = false; + } + + if (need_mf_bond && can_mf_bond) + free = free_for_dup; + if (free < 0) { err = -ENOMEM; goto out; @@ -412,6 +689,7 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, /* Register new VLAN */ table->refs[free] = 1; + table->is_dup[free] = false; table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); err = mlx4_set_port_vlan_table(dev, port, table->entries); @@ -421,11 +699,35 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, table->entries[free] = 0; goto out; } + ++table->total; + if (dup) { + dup_table->refs[free] = 0; + dup_table->is_dup[free] = true; + dup_table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); + + err = mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding duplicate vlan: %u\n", vlan); + dup_table->is_dup[free] = false; + dup_table->entries[free] = 0; + goto out; + } + ++dup_table->total; + } *index = free; - ++table->total; out: - mutex_unlock(&table->mutex); + if (need_mf_bond) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } @@ -455,8 +757,22 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) { struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; int index; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table; + + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } - mutex_lock(&table->mutex); if (mlx4_find_cached_vlan(dev, port, vlan, &index)) { mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan); goto out; @@ -467,16 +783,38 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) goto out; } - if (--table->refs[index]) { + if (--table->refs[index] || table->is_dup[index]) { mlx4_dbg(dev, "Have %d more references for index %d, no need to modify vlan table\n", table->refs[index], index); + if (!table->refs[index]) + dup_table->is_dup[index] = false; goto out; } table->entries[index] = 0; - mlx4_set_port_vlan_table(dev, port, table->entries); + if (mlx4_set_port_vlan_table(dev, port, table->entries)) + mlx4_warn(dev, "Fail to set vlan in port %d during unregister\n", port); --table->total; + if (dup) { + dup_table->is_dup[index] = false; + if (dup_table->refs[index]) + goto out; + dup_table->entries[index] = 0; + if (mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries)) + mlx4_warn(dev, "Fail to set vlan in duplicate port %d during unregister\n", dup_port); + --dup_table->total; + } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } } void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) @@ -495,6 +833,220 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) } EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); +int mlx4_bond_mac_table(struct mlx4_dev *dev) +{ + struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table; + struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table; + int ret = 0; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if ((t1->entries[i] != t2->entries[i]) && + t1->entries[i] && t2->entries[i]) { + mlx4_warn(dev, "can't duplicate entry %d in mac table\n", i); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (t1->entries[i] && !t2->entries[i]) { + t2->entries[i] = t1->entries[i]; + t2->is_dup[i] = true; + update2 = true; + } else if (!t1->entries[i] && t2->entries[i]) { + t1->entries[i] = t2->entries[i]; + t1->is_dup[i] = true; + update1 = true; + } else if (t1->entries[i] && t2->entries[i]) { + t1->is_dup[i] = true; + t2->is_dup[i] = true; + } + } + + if (update1) { + ret = mlx4_set_port_mac_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to set MAC table for port 1 (%d)\n", ret); + } + if (!ret && update2) { + ret = mlx4_set_port_mac_table(dev, 2, t2->entries); + if (ret) + mlx4_warn(dev, "failed to set MAC table for port 2 (%d)\n", ret); + } + + if (ret) + mlx4_warn(dev, "failed to create mirror MAC tables\n"); +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_unbond_mac_table(struct mlx4_dev *dev) +{ + struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table; + struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table; + int ret = 0; + int ret1; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (t1->entries[i] != t2->entries[i]) { + mlx4_warn(dev, "mac table is in an unexpected state when trying to unbond\n"); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (!t1->entries[i]) + continue; + t1->is_dup[i] = false; + if (!t1->refs[i]) { + t1->entries[i] = 0; + update1 = true; + } + t2->is_dup[i] = false; + if (!t2->refs[i]) { + t2->entries[i] = 0; + update2 = true; + } + } + + if (update1) { + ret = mlx4_set_port_mac_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to unmirror MAC tables for port 1(%d)\n", ret); + } + if (update2) { + ret1 = mlx4_set_port_mac_table(dev, 2, t2->entries); + if (ret1) { + mlx4_warn(dev, "failed to unmirror MAC tables for port 2(%d)\n", ret1); + ret = ret1; + } + } +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_bond_vlan_table(struct mlx4_dev *dev) +{ + struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table; + struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table; + int ret = 0; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if ((t1->entries[i] != t2->entries[i]) && + t1->entries[i] && t2->entries[i]) { + mlx4_warn(dev, "can't duplicate entry %d in vlan table\n", i); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (t1->entries[i] && !t2->entries[i]) { + t2->entries[i] = t1->entries[i]; + t2->is_dup[i] = true; + update2 = true; + } else if (!t1->entries[i] && t2->entries[i]) { + t1->entries[i] = t2->entries[i]; + t1->is_dup[i] = true; + update1 = true; + } else if (t1->entries[i] && t2->entries[i]) { + t1->is_dup[i] = true; + t2->is_dup[i] = true; + } + } + + if (update1) { + ret = mlx4_set_port_vlan_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to set VLAN table for port 1 (%d)\n", ret); + } + if (!ret && update2) { + ret = mlx4_set_port_vlan_table(dev, 2, t2->entries); + if (ret) + mlx4_warn(dev, "failed to set VLAN table for port 2 (%d)\n", ret); + } + + if (ret) + mlx4_warn(dev, "failed to create mirror VLAN tables\n"); +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_unbond_vlan_table(struct mlx4_dev *dev) +{ + struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table; + struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table; + int ret = 0; + int ret1; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (t1->entries[i] != t2->entries[i]) { + mlx4_warn(dev, "vlan table is in an unexpected state when trying to unbond\n"); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (!t1->entries[i]) + continue; + t1->is_dup[i] = false; + if (!t1->refs[i]) { + t1->entries[i] = 0; + update1 = true; + } + t2->is_dup[i] = false; + if (!t2->refs[i]) { + t2->entries[i] = 0; + update2 = true; + } + } + + if (update1) { + ret = mlx4_set_port_vlan_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to unmirror VLAN tables for port 1(%d)\n", ret); + } + if (update2) { + ret1 = mlx4_set_port_vlan_table(dev, 2, t2->entries); + if (ret1) { + mlx4_warn(dev, "failed to unmirror VLAN tables for port 2(%d)\n", ret1); + ret = ret1; + } + } +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) { struct mlx4_cmd_mailbox *inmailbox, *outmailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index cad6c44df91c..b46dbe29ef6c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -222,6 +222,13 @@ enum res_fs_rule_states { struct res_fs_rule { struct res_common com; int qpn; + /* VF DMFS mbox with port flipped */ + void *mirr_mbox; + /* > 0 --> apply mirror when getting into HA mode */ + /* = 0 --> un-apply mirror when getting out of HA mode */ + u32 mirr_mbox_size; + struct list_head mirr_list; + u64 mirr_rule_id; }; static void *res_tracker_lookup(struct rb_root *root, u64 res_id) @@ -4284,6 +4291,22 @@ err_mac: return err; } +static u32 qp_attach_mbox_size(void *mbox) +{ + u32 size = sizeof(struct mlx4_net_trans_rule_hw_ctrl); + struct _rule_hw *rule_header; + + rule_header = (struct _rule_hw *)(mbox + size); + + while (rule_header->size) { + size += rule_header->size * sizeof(u32); + rule_header += 1; + } + return size; +} + +static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule); + int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -4300,6 +4323,8 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct _rule_hw *rule_header; int header_id; + struct res_fs_rule *rrule; + u32 mbox_size; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4329,7 +4354,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, case MLX4_NET_TRANS_RULE_ID_ETH: if (validate_eth_header_mac(slave, rule_header, rlist)) { err = -EINVAL; - goto err_put; + goto err_put_qp; } break; case MLX4_NET_TRANS_RULE_ID_IB: @@ -4340,7 +4365,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, pr_warn("Can't attach FS rule without L2 headers, adding L2 header\n"); if (add_eth_header(dev, slave, inbox, rlist, header_id)) { err = -EINVAL; - goto err_put; + goto err_put_qp; } vhcr->in_modifier += sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2; @@ -4348,7 +4373,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, default: pr_err("Corrupted mailbox\n"); err = -EINVAL; - goto err_put; + goto err_put_qp; } execute: @@ -4357,23 +4382,69 @@ execute: MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) - goto err_put; + goto err_put_qp; + err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn); if (err) { mlx4_err(dev, "Fail to add flow steering resources\n"); - /* detach rule*/ + goto err_detach; + } + + err = get_res(dev, slave, vhcr->out_param, RES_FS_RULE, &rrule); + if (err) + goto err_detach; + + mbox_size = qp_attach_mbox_size(inbox->buf); + rrule->mirr_mbox = kmalloc(mbox_size, GFP_KERNEL); + if (!rrule->mirr_mbox) { + err = -ENOMEM; + goto err_put_rule; + } + rrule->mirr_mbox_size = mbox_size; + rrule->mirr_rule_id = 0; + memcpy(rrule->mirr_mbox, inbox->buf, mbox_size); + + /* set different port */ + ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)rrule->mirr_mbox; + if (ctrl->port == 1) + ctrl->port = 2; + else + ctrl->port = 1; + + if (mlx4_is_bonded(dev)) + mlx4_do_mirror_rule(dev, rrule); + + atomic_inc(&rqp->ref_count); + +err_put_rule: + put_res(dev, slave, vhcr->out_param, RES_FS_RULE); +err_detach: + /* detach rule on error */ + if (err) mlx4_cmd(dev, vhcr->out_param, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); - goto err_put; - } - atomic_inc(&rqp->ref_count); -err_put: +err_put_qp: put_res(dev, slave, qpn, RES_QP); return err; } +static int mlx4_undo_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule) +{ + int err; + + err = rem_res_range(dev, fs_rule->com.owner, fs_rule->com.res_id, 1, RES_FS_RULE, 0); + if (err) { + mlx4_err(dev, "Fail to remove flow steering resources\n"); + return err; + } + + mlx4_cmd(dev, fs_rule->com.res_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + return 0; +} + int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -4383,6 +4454,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, int err; struct res_qp *rqp; struct res_fs_rule *rrule; + u64 mirr_reg_id; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4391,12 +4463,30 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, err = get_res(dev, slave, vhcr->in_param, RES_FS_RULE, &rrule); if (err) return err; + + if (!rrule->mirr_mbox) { + mlx4_err(dev, "Mirror rules cannot be removed explicitly\n"); + put_res(dev, slave, vhcr->in_param, RES_FS_RULE); + return -EINVAL; + } + mirr_reg_id = rrule->mirr_rule_id; + kfree(rrule->mirr_mbox); + /* Release the rule form busy state before removal */ put_res(dev, slave, vhcr->in_param, RES_FS_RULE); err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); if (err) return err; + if (mirr_reg_id && mlx4_is_bonded(dev)) { + err = get_res(dev, slave, mirr_reg_id, RES_FS_RULE, &rrule); + if (err) { + mlx4_err(dev, "Fail to get resource of mirror rule\n"); + } else { + put_res(dev, slave, mirr_reg_id, RES_FS_RULE); + mlx4_undo_mirror_rule(dev, rrule); + } + } err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0); if (err) { mlx4_err(dev, "Fail to remove flow steering resources\n"); @@ -4834,6 +4924,91 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave) spin_unlock_irq(mlx4_tlock(dev)); } +static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + struct res_fs_rule *mirr_rule; + u64 reg_id; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + if (!fs_rule->mirr_mbox) { + mlx4_err(dev, "rule mirroring mailbox is null\n"); + return -EINVAL; + } + memcpy(mailbox->buf, fs_rule->mirr_mbox, fs_rule->mirr_mbox_size); + err = mlx4_cmd_imm(dev, mailbox->dma, ®_id, fs_rule->mirr_mbox_size >> 2, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + mlx4_free_cmd_mailbox(dev, mailbox); + + if (err) + goto err; + + err = add_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, fs_rule->qpn); + if (err) + goto err_detach; + + err = get_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE, &mirr_rule); + if (err) + goto err_rem; + + fs_rule->mirr_rule_id = reg_id; + mirr_rule->mirr_rule_id = 0; + mirr_rule->mirr_mbox_size = 0; + mirr_rule->mirr_mbox = NULL; + put_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE); + + return 0; +err_rem: + rem_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, 0); +err_detach: + mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); +err: + return err; +} + +static int mlx4_mirror_fs_rules(struct mlx4_dev *dev, bool bond) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = + &priv->mfunc.master.res_tracker; + struct rb_root *root = &tracker->res_tree[RES_FS_RULE]; + struct rb_node *p; + struct res_fs_rule *fs_rule; + int err = 0; + LIST_HEAD(mirr_list); + + for (p = rb_first(root); p; p = rb_next(p)) { + fs_rule = rb_entry(p, struct res_fs_rule, com.node); + if ((bond && fs_rule->mirr_mbox_size) || + (!bond && !fs_rule->mirr_mbox_size)) + list_add_tail(&fs_rule->mirr_list, &mirr_list); + } + + list_for_each_entry(fs_rule, &mirr_list, mirr_list) { + if (bond) + err += mlx4_do_mirror_rule(dev, fs_rule); + else + err += mlx4_undo_mirror_rule(dev, fs_rule); + } + return err; +} + +int mlx4_bond_fs_rules(struct mlx4_dev *dev) +{ + return mlx4_mirror_fs_rules(dev, true); +} + +int mlx4_unbond_fs_rules(struct mlx4_dev *dev) +{ + return mlx4_mirror_fs_rules(dev, false); +} + static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 158c88c69ef9..c503ea05e742 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -13,6 +13,7 @@ config MLX5_CORE config MLX5_CORE_EN bool "Mellanox Technologies ConnectX-4 Ethernet support" depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE + select PTP_1588_CLOCK default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 26a68b8af2c5..01c0256effb8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \ - en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ - en_txrx.o + mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ + en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ + en_txrx.o en_clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 22e72bf1ae48..9ea49a893323 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -32,6 +32,9 @@ #include <linux/if_vlan.h> #include <linux/etherdevice.h> +#include <linux/timecounter.h> +#include <linux/net_tstamp.h> +#include <linux/ptp_clock_kernel.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/qp.h> #include <linux/mlx5/cq.h> @@ -64,6 +67,8 @@ #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 +#define MLX5E_NUM_MAIN_GROUPS 9 + static const char vport_strings[][ETH_GSTRING_LEN] = { /* vport statistics */ "rx_packets", @@ -282,6 +287,19 @@ struct mlx5e_params { u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; }; +struct mlx5e_tstamp { + rwlock_t lock; + struct cyclecounter cycles; + struct timecounter clock; + struct hwtstamp_config hwtstamp_config; + u32 nominal_c_mult; + unsigned long overflow_period; + struct delayed_work overflow_work; + struct mlx5_core_dev *mdev; + struct ptp_clock *ptp; + struct ptp_clock_info ptp_info; +}; + enum { MLX5E_RQ_STATE_POST_WQES_ENABLE, }; @@ -313,6 +331,7 @@ struct mlx5e_rq { struct device *pdev; struct net_device *netdev; + struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; @@ -326,14 +345,12 @@ struct mlx5e_rq { struct mlx5e_priv *priv; } ____cacheline_aligned_in_smp; -struct mlx5e_tx_skb_cb { +struct mlx5e_tx_wqe_info { u32 num_bytes; u8 num_wqebbs; u8 num_dma; }; -#define MLX5E_TX_SKB_CB(__skb) ((struct mlx5e_tx_skb_cb *)__skb->cb) - enum mlx5e_dma_map_type { MLX5E_DMA_MAP_SINGLE, MLX5E_DMA_MAP_PAGE @@ -369,6 +386,7 @@ struct mlx5e_sq { /* pointers to per packet info: write@xmit, read@completion */ struct sk_buff **skb; struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_tx_wqe_info *wqe_info; /* read only */ struct mlx5_wq_cyc wq; @@ -381,6 +399,7 @@ struct mlx5e_sq { u16 max_inline; u16 edge; struct device *pdev; + struct mlx5e_tstamp *tstamp; __be32 mkey_be; unsigned long state; @@ -442,7 +461,7 @@ enum mlx5e_rqt_ix { struct mlx5e_eth_addr_info { u8 addr[ETH_ALEN + 2]; u32 tt_vec; - u32 ft_ix[MLX5E_NUM_TT]; /* flow table index per traffic type */ + struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT]; }; #define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) @@ -465,15 +484,23 @@ enum { }; struct mlx5e_vlan_db { - u32 active_vlans_ft_ix[VLAN_N_VID]; - u32 untagged_rule_ft_ix; - u32 any_vlan_rule_ft_ix; + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + struct mlx5_flow_rule *active_vlans_rule[VLAN_N_VID]; + struct mlx5_flow_rule *untagged_rule; + struct mlx5_flow_rule *any_vlan_rule; bool filter_disabled; }; struct mlx5e_flow_table { - void *vlan; - void *main; + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; +}; + +struct mlx5e_flow_tables { + struct mlx5_flow_namespace *ns; + struct mlx5e_flow_table vlan; + struct mlx5e_flow_table main; }; struct mlx5e_priv { @@ -496,7 +523,7 @@ struct mlx5e_priv { u32 rqtn[MLX5E_NUM_RQT]; u32 tirn[MLX5E_NUM_TT]; - struct mlx5e_flow_table ft; + struct mlx5e_flow_tables fts; struct mlx5e_eth_addr_db eth_addr; struct mlx5e_vlan_db vlan; @@ -509,6 +536,7 @@ struct mlx5e_priv { struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_stats stats; + struct mlx5e_tstamp tstamp; }; #define MLX5E_NET_IP_ALIGN 2 @@ -564,7 +592,7 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq); -bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); @@ -575,6 +603,13 @@ void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv); void mlx5e_init_eth_addr(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); +void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, + struct skb_shared_hwtstamps *hwts); +void mlx5e_timestamp_init(struct mlx5e_priv *priv); +void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv); +int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); +int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); + int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c new file mode 100644 index 000000000000..be6543570b2b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/clocksource.h> +#include "en.h" + +enum { + MLX5E_CYCLES_SHIFT = 23 +}; + +void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp, + struct skb_shared_hwtstamps *hwts) +{ + u64 nsec; + + read_lock(&tstamp->lock); + nsec = timecounter_cyc2time(&tstamp->clock, timestamp); + read_unlock(&tstamp->lock); + + hwts->hwtstamp = ns_to_ktime(nsec); +} + +static cycle_t mlx5e_read_internal_timer(const struct cyclecounter *cc) +{ + struct mlx5e_tstamp *tstamp = container_of(cc, struct mlx5e_tstamp, + cycles); + + return mlx5_read_internal_timer(tstamp->mdev) & cc->mask; +} + +static void mlx5e_timestamp_overflow(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp, + overflow_work); + + write_lock(&tstamp->lock); + timecounter_read(&tstamp->clock); + write_unlock(&tstamp->lock); + schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period); +} + +int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct hwtstamp_config config; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* TX HW timestamp */ + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + /* RX HW timestamp */ + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + memcpy(&priv->tstamp.hwtstamp_config, &config, sizeof(config)); + + return copy_to_user(ifr->ifr_data, &config, + sizeof(config)) ? -EFAULT : 0; +} + +int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct hwtstamp_config *cfg = &priv->tstamp.hwtstamp_config; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0; +} + +static int mlx5e_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, + ptp_info); + u64 ns = timespec64_to_ns(ts); + + write_lock(&tstamp->lock); + timecounter_init(&tstamp->clock, &tstamp->cycles, ns); + write_unlock(&tstamp->lock); + + return 0; +} + +static int mlx5e_ptp_gettime(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, + ptp_info); + u64 ns; + + write_lock(&tstamp->lock); + ns = timecounter_read(&tstamp->clock); + write_unlock(&tstamp->lock); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int mlx5e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, + ptp_info); + + write_lock(&tstamp->lock); + timecounter_adjtime(&tstamp->clock, delta); + write_unlock(&tstamp->lock); + + return 0; +} + +static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) +{ + u64 adj; + u32 diff; + int neg_adj = 0; + struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, + ptp_info); + + if (delta < 0) { + neg_adj = 1; + delta = -delta; + } + + adj = tstamp->nominal_c_mult; + adj *= delta; + diff = div_u64(adj, 1000000000ULL); + + write_lock(&tstamp->lock); + timecounter_read(&tstamp->clock); + tstamp->cycles.mult = neg_adj ? tstamp->nominal_c_mult - diff : + tstamp->nominal_c_mult + diff; + write_unlock(&tstamp->lock); + + return 0; +} + +static const struct ptp_clock_info mlx5e_ptp_clock_info = { + .owner = THIS_MODULE, + .max_adj = 100000000, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .n_pins = 0, + .pps = 0, + .adjfreq = mlx5e_ptp_adjfreq, + .adjtime = mlx5e_ptp_adjtime, + .gettime64 = mlx5e_ptp_gettime, + .settime64 = mlx5e_ptp_settime, + .enable = NULL, +}; + +static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp) +{ + tstamp->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF; + tstamp->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; +} + +void mlx5e_timestamp_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tstamp *tstamp = &priv->tstamp; + u64 ns; + u64 frac = 0; + u32 dev_freq; + + mlx5e_timestamp_init_config(tstamp); + dev_freq = MLX5_CAP_GEN(priv->mdev, device_frequency_khz); + if (!dev_freq) { + mlx5_core_warn(priv->mdev, "invalid device_frequency_khz, aborting HW clock init\n"); + return; + } + rwlock_init(&tstamp->lock); + tstamp->cycles.read = mlx5e_read_internal_timer; + tstamp->cycles.shift = MLX5E_CYCLES_SHIFT; + tstamp->cycles.mult = clocksource_khz2mult(dev_freq, + tstamp->cycles.shift); + tstamp->nominal_c_mult = tstamp->cycles.mult; + tstamp->cycles.mask = CLOCKSOURCE_MASK(41); + tstamp->mdev = priv->mdev; + + timecounter_init(&tstamp->clock, &tstamp->cycles, + ktime_to_ns(ktime_get_real())); + + /* Calculate period in seconds to call the overflow watchdog - to make + * sure counter is checked at least once every wrap around. + */ + ns = cyclecounter_cyc2ns(&tstamp->cycles, tstamp->cycles.mask, + frac, &frac); + do_div(ns, NSEC_PER_SEC / 2 / HZ); + tstamp->overflow_period = ns; + + INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow); + if (tstamp->overflow_period) + schedule_delayed_work(&tstamp->overflow_work, 0); + else + mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n"); + + /* Configure the PHC */ + tstamp->ptp_info = mlx5e_ptp_clock_info; + snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp"); + + tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, + &priv->mdev->pdev->dev); + if (IS_ERR_OR_NULL(tstamp->ptp)) { + mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n", + PTR_ERR(tstamp->ptp)); + tstamp->ptp = NULL; + } +} + +void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_tstamp *tstamp = &priv->tstamp; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return; + + if (priv->tstamp.ptp) { + ptp_clock_unregister(priv->tstamp.ptp); + priv->tstamp.ptp = NULL; + } + + cancel_delayed_work_sync(&tstamp->overflow_work); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2e022e900939..65624ac65b4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -855,6 +855,35 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, return err; } +static int mlx5e_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int ret; + + ret = ethtool_op_get_ts_info(dev, info); + if (ret) + return ret; + + info->phc_index = priv->tstamp.ptp ? + ptp_clock_index(priv->tstamp.ptp) : -1; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return 0; + + info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = (BIT(1) << HWTSTAMP_TX_OFF) | + (BIT(1) << HWTSTAMP_TX_ON); + + info->rx_filters = (BIT(1) << HWTSTAMP_FILTER_NONE) | + (BIT(1) << HWTSTAMP_FILTER_ALL); + + return 0; +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -878,4 +907,5 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, .set_pauseparam = mlx5e_set_pauseparam, + .get_ts_info = mlx5e_get_ts_info, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c deleted file mode 100644 index 22d603f78273..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c +++ /dev/null @@ -1,907 +0,0 @@ -/* - * Copyright (c) 2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/list.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/tcp.h> -#include <linux/mlx5/flow_table.h> -#include "en.h" - -enum { - MLX5E_FULLMATCH = 0, - MLX5E_ALLMULTI = 1, - MLX5E_PROMISC = 2, -}; - -enum { - MLX5E_UC = 0, - MLX5E_MC_IPV4 = 1, - MLX5E_MC_IPV6 = 2, - MLX5E_MC_OTHER = 3, -}; - -enum { - MLX5E_ACTION_NONE = 0, - MLX5E_ACTION_ADD = 1, - MLX5E_ACTION_DEL = 2, -}; - -struct mlx5e_eth_addr_hash_node { - struct hlist_node hlist; - u8 action; - struct mlx5e_eth_addr_info ai; -}; - -static inline int mlx5e_hash_eth_addr(u8 *addr) -{ - return addr[5]; -} - -static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) -{ - struct mlx5e_eth_addr_hash_node *hn; - int ix = mlx5e_hash_eth_addr(addr); - int found = 0; - - hlist_for_each_entry(hn, &hash[ix], hlist) - if (ether_addr_equal_64bits(hn->ai.addr, addr)) { - found = 1; - break; - } - - if (found) { - hn->action = MLX5E_ACTION_NONE; - return; - } - - hn = kzalloc(sizeof(*hn), GFP_ATOMIC); - if (!hn) - return; - - ether_addr_copy(hn->ai.addr, addr); - hn->action = MLX5E_ACTION_ADD; - - hlist_add_head(&hn->hlist, &hash[ix]); -} - -static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) -{ - hlist_del(&hn->hlist); - kfree(hn); -} - -static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai) -{ - void *ft = priv->ft.main; - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV6_IPSEC_ESP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV4_IPSEC_ESP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_TCP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_UDP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_UDP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4]); - - if (ai->tt_vec & BIT(MLX5E_TT_ANY)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_ANY]); -} - -static int mlx5e_get_eth_addr_type(u8 *addr) -{ - if (is_unicast_ether_addr(addr)) - return MLX5E_UC; - - if ((addr[0] == 0x01) && - (addr[1] == 0x00) && - (addr[2] == 0x5e) && - !(addr[3] & 0x80)) - return MLX5E_MC_IPV4; - - if ((addr[0] == 0x33) && - (addr[1] == 0x33)) - return MLX5E_MC_IPV6; - - return MLX5E_MC_OTHER; -} - -static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) -{ - int eth_addr_type; - u32 ret; - - switch (type) { - case MLX5E_FULLMATCH: - eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); - switch (eth_addr_type) { - case MLX5E_UC: - ret = - BIT(MLX5E_TT_IPV4_TCP) | - BIT(MLX5E_TT_IPV6_TCP) | - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4_IPSEC_AH) | - BIT(MLX5E_TT_IPV6_IPSEC_AH) | - BIT(MLX5E_TT_IPV4_IPSEC_ESP) | - BIT(MLX5E_TT_IPV6_IPSEC_ESP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - - case MLX5E_MC_IPV4: - ret = - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV4) | - 0; - break; - - case MLX5E_MC_IPV6: - ret = - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV6) | - 0; - break; - - case MLX5E_MC_OTHER: - ret = - BIT(MLX5E_TT_ANY) | - 0; - break; - } - - break; - - case MLX5E_ALLMULTI: - ret = - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - - default: /* MLX5E_PROMISC */ - ret = - BIT(MLX5E_TT_IPV4_TCP) | - BIT(MLX5E_TT_IPV6_TCP) | - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4_IPSEC_AH) | - BIT(MLX5E_TT_IPV6_IPSEC_AH) | - BIT(MLX5E_TT_IPV4_IPSEC_ESP) | - BIT(MLX5E_TT_IPV6_IPSEC_ESP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - } - - return ret; -} - -static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai, int type, - void *flow_context, void *match_criteria) -{ - u8 match_criteria_enable = 0; - void *match_value; - void *dest; - u8 *dmac; - u8 *match_criteria_dmac; - void *ft = priv->ft.main; - u32 *tirn = priv->tirn; - u32 *ft_ix; - u32 tt_vec; - int err; - - match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); - dmac = MLX5_ADDR_OF(fte_match_param, match_value, - outer_headers.dmac_47_16); - match_criteria_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, - outer_headers.dmac_47_16); - dest = MLX5_ADDR_OF(flow_context, flow_context, destination); - - MLX5_SET(flow_context, flow_context, action, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); - MLX5_SET(flow_context, flow_context, destination_list_size, 1); - MLX5_SET(dest_format_struct, dest, destination_type, - MLX5_FLOW_CONTEXT_DEST_TYPE_TIR); - - switch (type) { - case MLX5E_FULLMATCH: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - memset(match_criteria_dmac, 0xff, ETH_ALEN); - ether_addr_copy(dmac, ai->addr); - break; - - case MLX5E_ALLMULTI: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - match_criteria_dmac[0] = 0x01; - dmac[0] = 0x01; - break; - - case MLX5E_PROMISC: - break; - } - - tt_vec = mlx5e_get_tt_vec(ai, type); - - ft_ix = &ai->ft_ix[MLX5E_TT_ANY]; - if (tt_vec & BIT(MLX5E_TT_ANY)) { - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_ANY]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_ANY); - } - - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.ethertype); - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4]; - if (tt_vec & BIT(MLX5E_TT_IPV4)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV4); - } - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6]; - if (tt_vec & BIT(MLX5E_TT_IPV6)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6); - } - - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.ip_protocol); - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_UDP); - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_UDP]; - if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_UDP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP); - } - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_UDP]; - if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_UDP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP); - } - - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_TCP); - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_TCP]; - if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_TCP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP); - } - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_TCP]; - if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_TCP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP); - } - - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_AH); - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH]; - if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_IPSEC_AH]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH); - } - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH]; - if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_IPSEC_AH]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH); - } - - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_ESP); - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_IPSEC_ESP]; - if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_IPSEC_ESP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP); - } - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_IPSEC_ESP]; - if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_IPSEC_ESP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP); - } - - return 0; - -err_del_ai: - mlx5e_del_eth_addr_from_flow_table(priv, ai); - - return err; -} - -static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai, int type) -{ - u32 *flow_context; - u32 *match_criteria; - int err; - - flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + - MLX5_ST_SZ_BYTES(dest_format_struct)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!flow_context || !match_criteria) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_eth_addr_rule_out; - } - - err = __mlx5e_add_eth_addr_rule(priv, ai, type, flow_context, - match_criteria); - if (err) - netdev_err(priv->netdev, "%s: failed\n", __func__); - -add_eth_addr_rule_out: - kvfree(match_criteria); - kvfree(flow_context); - return err; -} - -enum mlx5e_vlan_rule_type { - MLX5E_VLAN_RULE_TYPE_UNTAGGED, - MLX5E_VLAN_RULE_TYPE_ANY_VID, - MLX5E_VLAN_RULE_TYPE_MATCH_VID, -}; - -static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, - enum mlx5e_vlan_rule_type rule_type, u16 vid) -{ - u8 match_criteria_enable = 0; - u32 *flow_context; - void *match_value; - void *dest; - u32 *match_criteria; - u32 *ft_ix; - int err; - - flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + - MLX5_ST_SZ_BYTES(dest_format_struct)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!flow_context || !match_criteria) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_vlan_rule_out; - } - match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); - dest = MLX5_ADDR_OF(flow_context, flow_context, destination); - - MLX5_SET(flow_context, flow_context, action, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); - MLX5_SET(flow_context, flow_context, destination_list_size, 1); - MLX5_SET(dest_format_struct, dest, destination_type, - MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE); - MLX5_SET(dest_format_struct, dest, destination_id, - mlx5_get_flow_table_id(priv->ft.main)); - - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.vlan_tag); - - switch (rule_type) { - case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - ft_ix = &priv->vlan.untagged_rule_ft_ix; - break; - case MLX5E_VLAN_RULE_TYPE_ANY_VID: - ft_ix = &priv->vlan.any_vlan_rule_ft_ix; - MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, - 1); - break; - default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ - ft_ix = &priv->vlan.active_vlans_ft_ix[vid]; - MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, - 1); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.first_vid); - MLX5_SET(fte_match_param, match_value, outer_headers.first_vid, - vid); - break; - } - - err = mlx5_add_flow_table_entry(priv->ft.vlan, match_criteria_enable, - match_criteria, flow_context, ft_ix); - if (err) - netdev_err(priv->netdev, "%s: failed\n", __func__); - -add_vlan_rule_out: - kvfree(match_criteria); - kvfree(flow_context); - return err; -} - -static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, - enum mlx5e_vlan_rule_type rule_type, u16 vid) -{ - switch (rule_type) { - case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.untagged_rule_ft_ix); - break; - case MLX5E_VLAN_RULE_TYPE_ANY_VID: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.any_vlan_rule_ft_ix); - break; - case MLX5E_VLAN_RULE_TYPE_MATCH_VID: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.active_vlans_ft_ix[vid]); - break; - } -} - -void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) -{ - if (!priv->vlan.filter_disabled) - return; - - priv->vlan.filter_disabled = false; - if (priv->netdev->flags & IFF_PROMISC) - return; - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); -} - -void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) -{ - if (priv->vlan.filter_disabled) - return; - - priv->vlan.filter_disabled = true; - if (priv->netdev->flags & IFF_PROMISC) - return; - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); -} - -int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, - u16 vid) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); -} - -int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, - u16 vid) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); - - return 0; -} - -#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ - for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ - hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) - -static void mlx5e_execute_action(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_hash_node *hn) -{ - switch (hn->action) { - case MLX5E_ACTION_ADD: - mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); - hn->action = MLX5E_ACTION_NONE; - break; - - case MLX5E_ACTION_DEL: - mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); - mlx5e_del_eth_addr_from_hash(hn); - break; - } -} - -static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) -{ - struct net_device *netdev = priv->netdev; - struct netdev_hw_addr *ha; - - netif_addr_lock_bh(netdev); - - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, - priv->netdev->dev_addr); - - netdev_for_each_uc_addr(ha, netdev) - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr); - - netdev_for_each_mc_addr(ha, netdev) - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr); - - netif_addr_unlock_bh(netdev); -} - -static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) -{ - struct mlx5e_eth_addr_hash_node *hn; - struct hlist_node *tmp; - int i; - - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) - mlx5e_execute_action(priv, hn); - - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) - mlx5e_execute_action(priv, hn); -} - -static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) -{ - struct mlx5e_eth_addr_hash_node *hn; - struct hlist_node *tmp; - int i; - - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) - hn->action = MLX5E_ACTION_DEL; - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) - hn->action = MLX5E_ACTION_DEL; - - if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state)) - mlx5e_sync_netdev_addr(priv); - - mlx5e_apply_netdev_addr(priv); -} - -void mlx5e_set_rx_mode_work(struct work_struct *work) -{ - struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, - set_rx_mode_work); - - struct mlx5e_eth_addr_db *ea = &priv->eth_addr; - struct net_device *ndev = priv->netdev; - - bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state); - bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC); - bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI); - bool broadcast_enabled = rx_mode_enable; - - bool enable_promisc = !ea->promisc_enabled && promisc_enabled; - bool disable_promisc = ea->promisc_enabled && !promisc_enabled; - bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; - bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; - bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; - bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; - - if (enable_promisc) { - mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); - if (!priv->vlan.filter_disabled) - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, - 0); - } - if (enable_allmulti) - mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); - if (enable_broadcast) - mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); - - mlx5e_handle_netdev_addr(priv); - - if (disable_broadcast) - mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); - if (disable_allmulti) - mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); - if (disable_promisc) { - if (!priv->vlan.filter_disabled) - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, - 0); - mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); - } - - ea->promisc_enabled = promisc_enabled; - ea->allmulti_enabled = allmulti_enabled; - ea->broadcast_enabled = broadcast_enabled; -} - -void mlx5e_init_eth_addr(struct mlx5e_priv *priv) -{ - ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast); -} - -static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) -{ - struct mlx5_flow_table_group *g; - u8 *dmac; - - g = kcalloc(9, sizeof(*g), GFP_KERNEL); - if (!g) - return -ENOMEM; - - g[0].log_sz = 3; - g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.ip_protocol); - - g[1].log_sz = 1; - g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, - outer_headers.ethertype); - - g[2].log_sz = 0; - - g[3].log_sz = 14; - g[3].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[3].match_criteria, - outer_headers.dmac_47_16); - memset(dmac, 0xff, ETH_ALEN); - MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, - outer_headers.ip_protocol); - - g[4].log_sz = 13; - g[4].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[4].match_criteria, - outer_headers.dmac_47_16); - memset(dmac, 0xff, ETH_ALEN); - MLX5_SET_TO_ONES(fte_match_param, g[4].match_criteria, - outer_headers.ethertype); - - g[5].log_sz = 11; - g[5].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[5].match_criteria, - outer_headers.dmac_47_16); - memset(dmac, 0xff, ETH_ALEN); - - g[6].log_sz = 2; - g[6].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[6].match_criteria, - outer_headers.dmac_47_16); - dmac[0] = 0x01; - MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, - outer_headers.ip_protocol); - - g[7].log_sz = 1; - g[7].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[7].match_criteria, - outer_headers.dmac_47_16); - dmac[0] = 0x01; - MLX5_SET_TO_ONES(fte_match_param, g[7].match_criteria, - outer_headers.ethertype); - - g[8].log_sz = 0; - g[8].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[8].match_criteria, - outer_headers.dmac_47_16); - dmac[0] = 0x01; - priv->ft.main = mlx5_create_flow_table(priv->mdev, 1, - MLX5_FLOW_TABLE_TYPE_NIC_RCV, - 9, g); - kfree(g); - - return priv->ft.main ? 0 : -ENOMEM; -} - -static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) -{ - mlx5_destroy_flow_table(priv->ft.main); -} - -static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) -{ - struct mlx5_flow_table_group *g; - - g = kcalloc(2, sizeof(*g), GFP_KERNEL); - if (!g) - return -ENOMEM; - - g[0].log_sz = 12; - g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.vlan_tag); - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.first_vid); - - /* untagged + any vlan id */ - g[1].log_sz = 1; - g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, - outer_headers.vlan_tag); - - priv->ft.vlan = mlx5_create_flow_table(priv->mdev, 0, - MLX5_FLOW_TABLE_TYPE_NIC_RCV, - 2, g); - - kfree(g); - return priv->ft.vlan ? 0 : -ENOMEM; -} - -static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) -{ - mlx5_destroy_flow_table(priv->ft.vlan); -} - -int mlx5e_create_flow_tables(struct mlx5e_priv *priv) -{ - int err; - - err = mlx5e_create_main_flow_table(priv); - if (err) - return err; - - err = mlx5e_create_vlan_flow_table(priv); - if (err) - goto err_destroy_main_flow_table; - - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - if (err) - goto err_destroy_vlan_flow_table; - - return 0; - -err_destroy_vlan_flow_table: - mlx5e_destroy_vlan_flow_table(priv); - -err_destroy_main_flow_table: - mlx5e_destroy_main_flow_table(priv); - - return err; -} - -void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv) -{ - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - mlx5e_destroy_vlan_flow_table(priv); - mlx5e_destroy_main_flow_table(priv); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c new file mode 100644 index 000000000000..80d81abc4820 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -0,0 +1,1224 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/list.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/mlx5/fs.h> +#include "en.h" + +#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) + +enum { + MLX5E_FULLMATCH = 0, + MLX5E_ALLMULTI = 1, + MLX5E_PROMISC = 2, +}; + +enum { + MLX5E_UC = 0, + MLX5E_MC_IPV4 = 1, + MLX5E_MC_IPV6 = 2, + MLX5E_MC_OTHER = 3, +}; + +enum { + MLX5E_ACTION_NONE = 0, + MLX5E_ACTION_ADD = 1, + MLX5E_ACTION_DEL = 2, +}; + +struct mlx5e_eth_addr_hash_node { + struct hlist_node hlist; + u8 action; + struct mlx5e_eth_addr_info ai; +}; + +static inline int mlx5e_hash_eth_addr(u8 *addr) +{ + return addr[5]; +} + +static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) +{ + struct mlx5e_eth_addr_hash_node *hn; + int ix = mlx5e_hash_eth_addr(addr); + int found = 0; + + hlist_for_each_entry(hn, &hash[ix], hlist) + if (ether_addr_equal_64bits(hn->ai.addr, addr)) { + found = 1; + break; + } + + if (found) { + hn->action = MLX5E_ACTION_NONE; + return; + } + + hn = kzalloc(sizeof(*hn), GFP_ATOMIC); + if (!hn) + return; + + ether_addr_copy(hn->ai.addr, addr); + hn->action = MLX5E_ACTION_ADD; + + hlist_add_head(&hn->hlist, &hash[ix]); +} + +static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) +{ + hlist_del(&hn->hlist); + kfree(hn); +} + +static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai) +{ + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_TCP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_TCP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_UDP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_UDP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4]); + + if (ai->tt_vec & BIT(MLX5E_TT_ANY)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_ANY]); +} + +static int mlx5e_get_eth_addr_type(u8 *addr) +{ + if (is_unicast_ether_addr(addr)) + return MLX5E_UC; + + if ((addr[0] == 0x01) && + (addr[1] == 0x00) && + (addr[2] == 0x5e) && + !(addr[3] & 0x80)) + return MLX5E_MC_IPV4; + + if ((addr[0] == 0x33) && + (addr[1] == 0x33)) + return MLX5E_MC_IPV6; + + return MLX5E_MC_OTHER; +} + +static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) +{ + int eth_addr_type; + u32 ret; + + switch (type) { + case MLX5E_FULLMATCH: + eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); + switch (eth_addr_type) { + case MLX5E_UC: + ret = + BIT(MLX5E_TT_IPV4_TCP) | + BIT(MLX5E_TT_IPV6_TCP) | + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV4_IPSEC_AH) | + BIT(MLX5E_TT_IPV6_IPSEC_AH) | + BIT(MLX5E_TT_IPV4_IPSEC_ESP) | + BIT(MLX5E_TT_IPV6_IPSEC_ESP) | + BIT(MLX5E_TT_IPV4) | + BIT(MLX5E_TT_IPV6) | + BIT(MLX5E_TT_ANY) | + 0; + break; + + case MLX5E_MC_IPV4: + ret = + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV4) | + 0; + break; + + case MLX5E_MC_IPV6: + ret = + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV6) | + 0; + break; + + case MLX5E_MC_OTHER: + ret = + BIT(MLX5E_TT_ANY) | + 0; + break; + } + + break; + + case MLX5E_ALLMULTI: + ret = + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV4) | + BIT(MLX5E_TT_IPV6) | + BIT(MLX5E_TT_ANY) | + 0; + break; + + default: /* MLX5E_PROMISC */ + ret = + BIT(MLX5E_TT_IPV4_TCP) | + BIT(MLX5E_TT_IPV6_TCP) | + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV4_IPSEC_AH) | + BIT(MLX5E_TT_IPV6_IPSEC_AH) | + BIT(MLX5E_TT_IPV4_IPSEC_ESP) | + BIT(MLX5E_TT_IPV6_IPSEC_ESP) | + BIT(MLX5E_TT_IPV4) | + BIT(MLX5E_TT_IPV6) | + BIT(MLX5E_TT_ANY) | + 0; + break; + } + + return ret; +} + +static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, + int type, u32 *mc, u32 *mv) +{ + struct mlx5_flow_destination dest; + u8 match_criteria_enable = 0; + struct mlx5_flow_rule **rule_p; + struct mlx5_flow_table *ft = priv->fts.main.t; + u8 *mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers.dmac_47_16); + u8 *mv_dmac = MLX5_ADDR_OF(fte_match_param, mv, + outer_headers.dmac_47_16); + u32 *tirn = priv->tirn; + u32 tt_vec; + int err = 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + + switch (type) { + case MLX5E_FULLMATCH: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + eth_broadcast_addr(mc_dmac); + ether_addr_copy(mv_dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + mc_dmac[0] = 0x01; + mv_dmac[0] = 0x01; + break; + + case MLX5E_PROMISC: + break; + } + + tt_vec = mlx5e_get_tt_vec(ai, type); + + if (tt_vec & BIT(MLX5E_TT_ANY)) { + rule_p = &ai->ft_rule[MLX5E_TT_ANY]; + dest.tir_num = tirn[MLX5E_TT_ANY]; + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_ANY); + } + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + if (tt_vec & BIT(MLX5E_TT_IPV4)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4]; + dest.tir_num = tirn[MLX5E_TT_IPV4]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IP); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6]; + dest.tir_num = tirn[MLX5E_TT_IPV6]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IPV6); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6); + } + + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_UDP); + + if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_UDP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IP); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_UDP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IPV6); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP); + } + + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_TCP); + + if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_TCP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IP); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_TCP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IPV6); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP); + } + + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_AH); + + if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]; + dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_AH]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IP); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]; + dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_AH]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IPV6); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH); + } + + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_ESP); + + if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_ESP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IP); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_ESP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IPV6); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP); + } + + return 0; + +err_del_ai: + err = PTR_ERR(*rule_p); + *rule_p = NULL; + mlx5e_del_eth_addr_from_flow_table(priv, ai); + + return err; +} + +static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, int type) +{ + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_eth_addr_rule_out; + } + + err = __mlx5e_add_eth_addr_rule(priv, ai, type, match_criteria, + match_value); + +add_eth_addr_rule_out: + kvfree(match_criteria); + kvfree(match_value); + + return err; +} + +static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) +{ + struct net_device *ndev = priv->netdev; + int max_list_size; + int list_size; + u16 *vlans; + int vlan; + int err; + int i; + + list_size = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) + list_size++; + + max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); + + if (list_size > max_list_size) { + netdev_warn(ndev, + "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", + list_size, max_list_size); + list_size = max_list_size; + } + + vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL); + if (!vlans) + return -ENOMEM; + + i = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) { + if (i >= list_size) + break; + vlans[i++] = vlan; + } + + err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size); + if (err) + netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n", + err); + + kfree(vlans); + return err; +} + +enum mlx5e_vlan_rule_type { + MLX5E_VLAN_RULE_TYPE_UNTAGGED, + MLX5E_VLAN_RULE_TYPE_ANY_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_VID, +}; + +static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, + u16 vid, u32 *mc, u32 *mv) +{ + struct mlx5_flow_table *ft = priv->fts.vlan.t; + struct mlx5_flow_destination dest; + u8 match_criteria_enable = 0; + struct mlx5_flow_rule **rule_p; + int err = 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fts.main.t; + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + rule_p = &priv->vlan.untagged_rule; + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + rule_p = &priv->vlan.any_vlan_rule; + MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ + rule_p = &priv->vlan.active_vlans_rule[vid]; + MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid); + break; + } + + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest); + + if (IS_ERR(*rule_p)) { + err = PTR_ERR(*rule_p); + *rule_p = NULL; + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + return err; +} + +static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_vlan_rule_out; + } + + if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID) + mlx5e_vport_context_update_vlans(priv); + + err = __mlx5e_add_vlan_rule(priv, rule_type, vid, match_criteria, + match_value); + +add_vlan_rule_out: + kvfree(match_criteria); + kvfree(match_value); + + return err; +} + +static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + if (priv->vlan.untagged_rule) { + mlx5_del_flow_rule(priv->vlan.untagged_rule); + priv->vlan.untagged_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + if (priv->vlan.any_vlan_rule) { + mlx5_del_flow_rule(priv->vlan.any_vlan_rule); + priv->vlan.any_vlan_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_VID: + mlx5e_vport_context_update_vlans(priv); + if (priv->vlan.active_vlans_rule[vid]) { + mlx5_del_flow_rule(priv->vlan.active_vlans_rule[vid]); + priv->vlan.active_vlans_rule[vid] = NULL; + } + mlx5e_vport_context_update_vlans(priv); + break; + } +} + +void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) +{ + if (!priv->vlan.filter_disabled) + return; + + priv->vlan.filter_disabled = false; + if (priv->netdev->flags & IFF_PROMISC) + return; + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + +void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) +{ + if (priv->vlan.filter_disabled) + return; + + priv->vlan.filter_disabled = true; + if (priv->netdev->flags & IFF_PROMISC) + return; + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + set_bit(vid, priv->vlan.active_vlans); + + return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); +} + +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + clear_bit(vid, priv->vlan.active_vlans); + + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + + return 0; +} + +#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +static void mlx5e_execute_action(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_hash_node *hn) +{ + switch (hn->action) { + case MLX5E_ACTION_ADD: + mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); + hn->action = MLX5E_ACTION_NONE; + break; + + case MLX5E_ACTION_DEL: + mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); + mlx5e_del_eth_addr_from_hash(hn); + break; + } +} + +static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(netdev); + + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, + priv->netdev->dev_addr); + + netdev_for_each_uc_addr(ha, netdev) + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr); + + netdev_for_each_mc_addr(ha, netdev) + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr); + + netif_addr_unlock_bh(netdev); +} + +static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, + u8 addr_array[][ETH_ALEN], int size) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct net_device *ndev = priv->netdev; + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int i = 0; + int hi; + + addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + + if (is_uc) /* Make sure our own address is pushed first */ + ether_addr_copy(addr_array[i++], ndev->dev_addr); + else if (priv->eth_addr.broadcast_enabled) + ether_addr_copy(addr_array[i++], ndev->broadcast); + + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { + if (ether_addr_equal(ndev->dev_addr, hn->ai.addr)) + continue; + if (i >= size) + break; + ether_addr_copy(addr_array[i++], hn->ai.addr); + } +} + +static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, + int list_type) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct mlx5e_eth_addr_hash_node *hn; + u8 (*addr_array)[ETH_ALEN] = NULL; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int max_size; + int size; + int err; + int hi; + + size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0); + max_size = is_uc ? + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); + + addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) + size++; + + if (size > max_size) { + netdev_warn(priv->netdev, + "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", + is_uc ? "UC" : "MC", size, max_size); + size = max_size; + } + + if (size) { + addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!addr_array) { + err = -ENOMEM; + goto out; + } + mlx5e_fill_addr_array(priv, list_type, addr_array, size); + } + + err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size); +out: + if (err) + netdev_err(priv->netdev, + "Failed to modify vport %s list err(%d)\n", + is_uc ? "UC" : "MC", err); + kfree(addr_array); +} + +static void mlx5e_vport_context_update(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC); + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC); + mlx5_modify_nic_vport_promisc(priv->mdev, 0, + ea->allmulti_enabled, + ea->promisc_enabled); +} + +static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + mlx5e_execute_action(priv, hn); + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + mlx5e_execute_action(priv, hn); +} + +static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + hn->action = MLX5E_ACTION_DEL; + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + hn->action = MLX5E_ACTION_DEL; + + if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state)) + mlx5e_sync_netdev_addr(priv); + + mlx5e_apply_netdev_addr(priv); +} + +void mlx5e_set_rx_mode_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + set_rx_mode_work); + + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct net_device *ndev = priv->netdev; + + bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state); + bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC); + bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI); + bool broadcast_enabled = rx_mode_enable; + + bool enable_promisc = !ea->promisc_enabled && promisc_enabled; + bool disable_promisc = ea->promisc_enabled && !promisc_enabled; + bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; + bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; + bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; + bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; + + if (enable_promisc) { + mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (!priv->vlan.filter_disabled) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + } + if (enable_allmulti) + mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); + if (enable_broadcast) + mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); + + mlx5e_handle_netdev_addr(priv); + + if (disable_broadcast) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); + if (disable_allmulti) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); + if (disable_promisc) { + if (!priv->vlan.filter_disabled) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); + } + + ea->promisc_enabled = promisc_enabled; + ea->allmulti_enabled = allmulti_enabled; + ea->broadcast_enabled = broadcast_enabled; + + mlx5e_vport_context_update(priv); +} + +static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) +{ + int i; + + for (i = ft->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ft->g[i])) + mlx5_destroy_flow_group(ft->g[i]); + ft->g[i] = NULL; + } + ft->num_groups = 0; +} + +void mlx5e_init_eth_addr(struct mlx5e_priv *priv) +{ + ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast); +} + +#define MLX5E_MAIN_GROUP0_SIZE BIT(3) +#define MLX5E_MAIN_GROUP1_SIZE BIT(1) +#define MLX5E_MAIN_GROUP2_SIZE BIT(0) +#define MLX5E_MAIN_GROUP3_SIZE BIT(14) +#define MLX5E_MAIN_GROUP4_SIZE BIT(13) +#define MLX5E_MAIN_GROUP5_SIZE BIT(11) +#define MLX5E_MAIN_GROUP6_SIZE BIT(2) +#define MLX5E_MAIN_GROUP7_SIZE BIT(1) +#define MLX5E_MAIN_GROUP8_SIZE BIT(0) +#define MLX5E_MAIN_TABLE_SIZE (MLX5E_MAIN_GROUP0_SIZE +\ + MLX5E_MAIN_GROUP1_SIZE +\ + MLX5E_MAIN_GROUP2_SIZE +\ + MLX5E_MAIN_GROUP3_SIZE +\ + MLX5E_MAIN_GROUP4_SIZE +\ + MLX5E_MAIN_GROUP5_SIZE +\ + MLX5E_MAIN_GROUP6_SIZE +\ + MLX5E_MAIN_GROUP7_SIZE +\ + MLX5E_MAIN_GROUP8_SIZE) + +static int __mlx5e_create_main_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) +{ + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + u8 *dmac = MLX5_ADDR_OF(create_flow_group_in, in, + match_criteria.outer_headers.dmac_47_16); + int err; + int ix = 0; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + eth_broadcast_addr(dmac); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + eth_broadcast_addr(dmac); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP4_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + eth_broadcast_addr(dmac); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP5_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP6_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP7_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP8_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + + return err; +} + +static int mlx5e_create_main_groups(struct mlx5e_flow_table *ft) +{ + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + err = __mlx5e_create_main_groups(ft, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fts.main; + int err; + + ft->num_groups = 0; + ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_MAIN_TABLE_SIZE); + + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + ft->g = kcalloc(MLX5E_NUM_MAIN_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_main_flow_table; + } + + err = mlx5e_create_main_groups(ft); + if (err) + goto err_free_g; + return 0; + +err_free_g: + kfree(ft->g); + +err_destroy_main_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +static void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) +{ + mlx5e_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; +} + +static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) +{ + mlx5e_destroy_flow_table(&priv->fts.main); +} + +#define MLX5E_NUM_VLAN_GROUPS 2 +#define MLX5E_VLAN_GROUP0_SIZE BIT(12) +#define MLX5E_VLAN_GROUP1_SIZE BIT(1) +#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ + MLX5E_VLAN_GROUP1_SIZE) + +static int __mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) +{ + int err; + int ix = 0; + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + + return err; +} + +static int mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft) +{ + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + err = __mlx5e_create_vlan_groups(ft, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fts.vlan; + int err; + + ft->num_groups = 0; + ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_VLAN_TABLE_SIZE); + + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_vlan_flow_table; + } + + err = mlx5e_create_vlan_groups(ft); + if (err) + goto err_free_g; + + return 0; + +err_free_g: + kfree(ft->g); + +err_destroy_vlan_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) +{ + mlx5e_destroy_flow_table(&priv->fts.vlan); +} + +int mlx5e_create_flow_tables(struct mlx5e_priv *priv) +{ + int err; + + priv->fts.ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + + if (!priv->fts.ns) + return -EINVAL; + + err = mlx5e_create_vlan_flow_table(priv); + if (err) + return err; + + err = mlx5e_create_main_flow_table(priv); + if (err) + goto err_destroy_vlan_flow_table; + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + if (err) + goto err_destroy_main_flow_table; + + return 0; + +err_destroy_main_flow_table: + mlx5e_destroy_main_flow_table(priv); +err_destroy_vlan_flow_table: + mlx5e_destroy_vlan_flow_table(priv); + + return err; +} + +void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv) +{ + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + mlx5e_destroy_main_flow_table(priv); + mlx5e_destroy_vlan_flow_table(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1e52db32c73d..5c74a734f158 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -30,8 +30,9 @@ * SOFTWARE. */ -#include <linux/mlx5/flow_table.h> +#include <linux/mlx5/fs.h> #include "en.h" +#include "eswitch.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -63,7 +64,7 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) u8 port_state; port_state = mlx5_query_vport_state(mdev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT); + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); if (port_state == VPORT_STATE_UP) netif_carrier_on(priv->netdev); @@ -350,6 +351,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->pdev = c->pdev; rq->netdev = c->netdev; + rq->tstamp = &priv->tstamp; rq->channel = c; rq->ix = c->ix; rq->priv = c->priv; @@ -506,6 +508,7 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) static void mlx5e_free_sq_db(struct mlx5e_sq *sq) { + kfree(sq->wqe_info); kfree(sq->dma_fifo); kfree(sq->skb); } @@ -518,8 +521,10 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa); sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL, numa); + sq->wqe_info = kzalloc_node(wq_sz * sizeof(*sq->wqe_info), GFP_KERNEL, + numa); - if (!sq->skb || !sq->dma_fifo) { + if (!sq->skb || !sq->dma_fifo || !sq->wqe_info) { mlx5e_free_sq_db(sq); return -ENOMEM; } @@ -567,6 +572,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); sq->pdev = c->pdev; + sq->tstamp = &priv->tstamp; sq->mkey_be = c->mkey_be; sq->channel = c; sq->tc = tc; @@ -1020,6 +1026,7 @@ err_close_tx_cqs: err_napi_del: netif_napi_del(&c->napi); + napi_hash_del(&c->napi); kfree(c); return err; @@ -1033,6 +1040,10 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); netif_napi_del(&c->napi); + + napi_hash_del(&c->napi); + synchronize_rcu(); + kfree(c); } @@ -1421,6 +1432,7 @@ int mlx5e_open_locked(struct net_device *netdev) mlx5e_update_carrier(priv); mlx5e_redirect_rqts(priv); + mlx5e_timestamp_init(priv); schedule_delayed_work(&priv->update_stats_work, 0); @@ -1457,6 +1469,7 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); + mlx5e_timestamp_cleanup(priv); mlx5e_redirect_rqts(priv); netif_carrier_off(priv->netdev); mlx5e_close_channels(priv); @@ -1926,6 +1939,91 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) return err; } +static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCSHWTSTAMP: + return mlx5e_hwstamp_set(dev, ifr); + case SIOCGHWTSTAMP: + return mlx5e_hwstamp_get(dev, ifr); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); +} + +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, + vlan, qos); +} + +static int mlx5_vport_link2ifla(u8 esw_link) +{ + switch (esw_link) { + case MLX5_ESW_VPORT_ADMIN_STATE_DOWN: + return IFLA_VF_LINK_STATE_DISABLE; + case MLX5_ESW_VPORT_ADMIN_STATE_UP: + return IFLA_VF_LINK_STATE_ENABLE; + } + return IFLA_VF_LINK_STATE_AUTO; +} + +static int mlx5_ifla_link2vport(u8 ifla_link) +{ + switch (ifla_link) { + case IFLA_VF_LINK_STATE_DISABLE: + return MLX5_ESW_VPORT_ADMIN_STATE_DOWN; + case IFLA_VF_LINK_STATE_ENABLE: + return MLX5_ESW_VPORT_ADMIN_STATE_UP; + } + return MLX5_ESW_VPORT_ADMIN_STATE_AUTO; +} + +static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, + int link_state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, + mlx5_ifla_link2vport(link_state)); +} + +static int mlx5e_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); + if (err) + return err; + ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate); + return 0; +} + +static int mlx5e_get_vf_stats(struct net_device *dev, + int vf, struct ifla_vf_stats *vf_stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, + vf_stats); +} + static struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -1937,6 +2035,7 @@ static struct net_device_ops mlx5e_netdev_ops = { .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, .ndo_set_features = mlx5e_set_features, .ndo_change_mtu = mlx5e_change_mtu, + .ndo_do_ioctl = mlx5e_ioctl, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2023,7 +2122,12 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5_query_nic_vport_mac_address(priv->mdev, netdev->dev_addr); + mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr); + if (is_zero_ether_addr(netdev->dev_addr) && + !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) { + eth_hw_addr_random(netdev); + mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr); + } } static void mlx5e_build_netdev(struct net_device *netdev) @@ -2036,6 +2140,14 @@ static void mlx5e_build_netdev(struct net_device *netdev) if (priv->params.num_tc > 1) mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue; + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5e_netdev_ops.ndo_set_vf_mac = mlx5e_set_vf_mac; + mlx5e_netdev_ops.ndo_set_vf_vlan = mlx5e_set_vf_vlan; + mlx5e_netdev_ops.ndo_get_vf_config = mlx5e_get_vf_config; + mlx5e_netdev_ops.ndo_set_vf_link_state = mlx5e_set_vf_link_state; + mlx5e_netdev_ops.ndo_get_vf_stats = mlx5e_get_vf_stats; + } + netdev->netdev_ops = &mlx5e_netdev_ops; netdev->watchdog_timeo = 15 * HZ; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index cf0098596e85..dd959d929aad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -33,8 +33,14 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> +#include <net/busy_poll.h> #include "en.h" +static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) +{ + return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL; +} + static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { @@ -189,6 +195,7 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, { struct net_device *netdev = rq->netdev; u32 cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + struct mlx5e_tstamp *tstamp = rq->tstamp; int lro_num_seg; skb_put(skb, cqe_bcnt); @@ -201,6 +208,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, rq->stats.lro_bytes += cqe_bcnt; } + if (unlikely(mlx5e_rx_hw_stamp(tstamp))) + mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); + mlx5e_handle_csum(netdev, cqe, rq, skb); skb->protocol = eth_type_trans(skb, netdev); @@ -215,16 +225,16 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, be16_to_cpu(cqe->vlan_info)); } -bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - int i; + int work_done; /* avoid accessing cq (dma coherent memory) if not needed */ if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags)) - return false; + return 0; - for (i = 0; i < budget; i++) { + for (work_done = 0; work_done < budget; work_done++) { struct mlx5e_rx_wqe *wqe; struct mlx5_cqe64 *cqe; struct sk_buff *skb; @@ -269,10 +279,8 @@ wq_ll_pop: /* ensure cq space is freed before enabling more cqes */ wmb(); - if (i == budget) { + if (work_done == budget) set_bit(MLX5E_CQ_HAS_CQES, &cq->flags); - return true; - } - return false; + return work_done; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 1341b1d3c421..2c3fba0fff54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -92,11 +92,11 @@ static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) return &sq->dma_fifo[i & sq->dma_fifo_mask]; } -static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, struct sk_buff *skb) +static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma) { int i; - for (i = 0; i < MLX5E_TX_SKB_CB(skb)->num_dma; i++) { + for (i = 0; i < num_dma; i++) { struct mlx5e_sq_dma *last_pushed_dma = mlx5e_dma_get(sq, --sq->dma_fifo_pc); @@ -139,19 +139,28 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, return MLX5E_MIN_INLINE; } -static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) +static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, + unsigned int *skb_len, + unsigned int len) +{ + *skb_len -= len; + *skb_data += len; +} + +static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs, + unsigned char **skb_data, + unsigned int *skb_len) { struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; int cpy1_sz = 2 * ETH_ALEN; int cpy2_sz = ihs - cpy1_sz; - skb_copy_from_linear_data(skb, vhdr, cpy1_sz); - skb_pull_inline(skb, cpy1_sz); + memcpy(vhdr, *skb_data, cpy1_sz); + mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy1_sz); vhdr->h_vlan_proto = skb->vlan_proto; vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); - skb_copy_from_linear_data(skb, &vhdr->h_vlan_encapsulated_proto, - cpy2_sz); - skb_pull_inline(skb, cpy2_sz); + memcpy(&vhdr->h_vlan_encapsulated_proto, *skb_data, cpy2_sz); + mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy2_sz); } static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) @@ -160,11 +169,14 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_tx_wqe_info *wi = &sq->wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg; + unsigned char *skb_data = skb->data; + unsigned int skb_len = skb->len; u8 opcode = MLX5_OPCODE_SEND; dma_addr_t dma_addr = 0; bool bf = false; @@ -192,8 +204,8 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) opcode = MLX5_OPCODE_LSO; ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); payload_len = skb->len - ihs; - MLX5E_TX_SKB_CB(skb)->num_bytes = skb->len + - (skb_shinfo(skb)->gso_segs - 1) * ihs; + wi->num_bytes = skb->len + + (skb_shinfo(skb)->gso_segs - 1) * ihs; sq->stats.tso_packets++; sq->stats.tso_bytes += payload_len; } else { @@ -201,16 +213,16 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) !skb->xmit_more && !skb_shinfo(skb)->nr_frags; ihs = mlx5e_get_inline_hdr_size(sq, skb, bf); - MLX5E_TX_SKB_CB(skb)->num_bytes = max_t(unsigned int, skb->len, - ETH_ZLEN); + wi->num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); } if (skb_vlan_tag_present(skb)) { - mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs); + mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data, + &skb_len); ihs += VLAN_HLEN; } else { - skb_copy_from_linear_data(skb, eseg->inline_hdr_start, ihs); - skb_pull_inline(skb, ihs); + memcpy(eseg->inline_hdr_start, skb_data, ihs); + mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); } eseg->inline_hdr_sz = cpu_to_be16(ihs); @@ -220,11 +232,11 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) MLX5_SEND_WQE_DS); dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; - MLX5E_TX_SKB_CB(skb)->num_dma = 0; + wi->num_dma = 0; - headlen = skb_headlen(skb); + headlen = skb_len - skb->data_len; if (headlen) { - dma_addr = dma_map_single(sq->pdev, skb->data, headlen, + dma_addr = dma_map_single(sq->pdev, skb_data, headlen, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) goto dma_unmap_wqe_err; @@ -234,7 +246,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) dseg->byte_count = cpu_to_be32(headlen); mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); - MLX5E_TX_SKB_CB(skb)->num_dma++; + wi->num_dma++; dseg++; } @@ -253,23 +265,25 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) dseg->byte_count = cpu_to_be32(fsz); mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); - MLX5E_TX_SKB_CB(skb)->num_dma++; + wi->num_dma++; dseg++; } - ds_cnt += MLX5E_TX_SKB_CB(skb)->num_dma; + ds_cnt += wi->num_dma; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); sq->skb[pi] = skb; - MLX5E_TX_SKB_CB(skb)->num_wqebbs = DIV_ROUND_UP(ds_cnt, - MLX5_SEND_WQEBB_NUM_DS); - sq->pc += MLX5E_TX_SKB_CB(skb)->num_wqebbs; + wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + sq->pc += wi->num_wqebbs; + + netdev_tx_sent_queue(sq->txq, wi->num_bytes); - netdev_tx_sent_queue(sq->txq, MLX5E_TX_SKB_CB(skb)->num_bytes); + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) { netif_tx_stop_queue(sq->txq); @@ -280,7 +294,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) int bf_sz = 0; if (bf && sq->uar_bf_map) - bf_sz = MLX5E_TX_SKB_CB(skb)->num_wqebbs << 3; + bf_sz = wi->num_wqebbs << 3; cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; mlx5e_tx_notify_hw(sq, wqe, bf_sz); @@ -297,7 +311,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) dma_unmap_wqe_err: sq->stats.dropped++; - mlx5e_dma_unmap_wqe_err(sq, skb); + mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); dev_kfree_skb_any(skb); @@ -352,6 +366,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) wqe_counter = be16_to_cpu(cqe->wqe_counter); do { + struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; u16 ci; int j; @@ -360,6 +375,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) ci = sqcc & sq->wq.sz_m1; skb = sq->skb[ci]; + wi = &sq->wqe_info[ci]; if (unlikely(!skb)) { /* nop */ sq->stats.nop++; @@ -367,7 +383,16 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) continue; } - for (j = 0; j < MLX5E_TX_SKB_CB(skb)->num_dma; j++) { + if (unlikely(skb_shinfo(skb)->tx_flags & + SKBTX_HW_TSTAMP)) { + struct skb_shared_hwtstamps hwts = {}; + + mlx5e_fill_hwstamp(sq->tstamp, + get_cqe_ts(cqe), &hwts); + skb_tstamp_tx(skb, &hwts); + } + + for (j = 0; j < wi->num_dma; j++) { struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, dma_fifo_cc++); @@ -375,8 +400,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) } npkts++; - nbytes += MLX5E_TX_SKB_CB(skb)->num_bytes; - sqcc += MLX5E_TX_SKB_CB(skb)->num_wqebbs; + nbytes += wi->num_bytes; + sqcc += wi->num_wqebbs; dev_kfree_skb(skb); } while (!last_wqe); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 2c7cb6755d1d..4ac8d716dbdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -54,6 +54,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); bool busy = false; + int work_done; int i; clear_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); @@ -61,26 +62,26 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq); - busy |= mlx5e_poll_rx_cq(&c->rq.cq, budget); - + work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); + busy |= work_done == budget; busy |= mlx5e_post_rx_wqes(&c->rq); if (busy) return budget; - napi_complete(napi); + napi_complete_done(napi, work_done); /* avoid losing completion event during/after polling cqs */ if (test_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags)) { napi_schedule(napi); - return 0; + return work_done; } for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); mlx5e_cq_arm(&c->rq.cq); - return 0; + return work_done; } void mlx5e_completion_event(struct mlx5_core_cq *mcq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 713ead583347..23c244a7e5d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -35,6 +35,9 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif enum { MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), @@ -287,6 +290,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) break; #endif +#ifdef CONFIG_MLX5_CORE_EN + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); + break; +#endif default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); @@ -459,6 +467,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, pg)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT); + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && + MLX5_CAP_GEN(dev, vport_group_manager) && + mlx5_core_is_pf(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", &dev->priv.uuari.uars[0]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c new file mode 100644 index 000000000000..bc3d9f8a75c1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -0,0 +1,1097 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "mlx5_core.h" +#include "eswitch.h" + +#define UPLINK_VPORT 0xFFFF + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +enum { + MLX5_ACTION_NONE = 0, + MLX5_ACTION_ADD = 1, + MLX5_ACTION_DEL = 2, +}; + +/* E-Switch UC L2 table hash node */ +struct esw_uc_addr { + struct l2addr_node node; + u32 table_index; + u32 vport; +}; + +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + +/* Vport UC/MC hash node */ +struct vport_addr { + struct l2addr_node node; + u8 action; + u32 vport; + struct mlx5_flow_rule *flow_rule; /* SRIOV only */ +}; + +enum { + UC_ADDR_CHANGE = BIT(0), + MC_ADDR_CHANGE = BIT(1), +}; + +/* Vport context events */ +#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ + MC_ADDR_CHANGE) + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, + u32 events_mask) +{ + int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; + int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int err; + + memset(out, 0, sizeof(out)); + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_nic_vport_context_in, in, + opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_uc_address_change, 1); + if (events_mask & MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_mc_address_change, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + goto ex; + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto ex; + return 0; +ex: + return err; +} + +/* E-Switch vport context HW commands */ +static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + + MLX5_SET(query_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_esw_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 *vlan, u8 *qos) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)]; + int err; + bool cvlan_strip; + bool cvlan_insert; + + memset(out, 0, sizeof(out)); + + *vlan = 0; + *qos = 0; + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out)); + if (err) + goto out; + + cvlan_strip = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_strip); + + cvlan_insert = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_insert); + + if (cvlan_strip || cvlan_insert) { + *vlan = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_id); + *qos = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_pcp); + } + + esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n", + vport, *vlan, *qos); +out: + return err; +} + +static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)]; + + memset(out, 0, sizeof(out)); + + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + + MLX5_SET(modify_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); + + return mlx5_cmd_exec_check_status(dev, in, inlen, + out, sizeof(out)); +} + +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 vlan, u8 qos, bool set) +{ + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", + vport, vlan, qos, set); + + if (set) { + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_strip, 1); + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, 1); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_pcp, qos); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_id, vlan); + } + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_strip, 1); + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_insert, 1); + + return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); +} + +/* HW L2 Table (MPFS) management */ +static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, + u8 *mac, u8 vlan_valid, u16 vlan) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)]; + u8 *in_mac_addr; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(set_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid); + MLX5_SET(set_l2_table_entry_in, in, vlan, vlan); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(delete_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2_table->bitmap, l2_table->size); + if (*ix >= l2_table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2_table->bitmap); + + return err; +} + +static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix) +{ + __clear_bit(ix, l2_table->bitmap); +} + +static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac, + u8 vlan_valid, u16 vlan, + u32 *index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + int err; + + err = alloc_l2_table_index(l2_table, index); + if (err) + return err; + + err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan); + if (err) + free_l2_table_index(l2_table, *index); + + return err; +} + +static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + + del_l2_table_entry_cmd(dev, index); + free_l2_table_index(l2_table, index); +} + +/* E-Switch FDB */ +static struct mlx5_flow_rule * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +{ + int match_header = MLX5_MATCH_OUTER_HEADERS; + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + u32 *match_v; + u32 *match_c; + u8 *dmac_v; + u8 *dmac_c; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + pr_warn("FDB: Failed to alloc match parameters\n"); + goto out; + } + dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers.dmac_47_16); + dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers.dmac_47_16); + + ether_addr_copy(dmac_v, mac); + /* Match criteria mask */ + memset(dmac_c, 0xff, 6); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + esw_debug(esw->dev, + "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", + dmac_v, dmac_c, vport); + flow_rule = + mlx5_add_flow_rule(esw->fdb_table.fdb, + match_header, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR_OR_NULL(flow_rule)) { + pr_warn( + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + flow_rule = NULL; + } +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} + +static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *g; + void *match_criteria; + int table_size; + u32 *flow_group_in; + u8 *dmac; + int err = 0; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -ENOMEM; + } + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + memset(flow_group_in, 0, inlen); + + table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + fdb = mlx5_create_flow_table(root_ns, 0, table_size); + if (IS_ERR_OR_NULL(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create FDB Table err %d\n", err); + goto out; + } + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + eth_broadcast_addr(dmac); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR_OR_NULL(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create flow group err(%d)\n", err); + goto out; + } + + esw->fdb_table.addr_grp = g; + esw->fdb_table.fdb = fdb; +out: + kfree(flow_group_in); + if (err && !IS_ERR_OR_NULL(fdb)) + mlx5_destroy_flow_table(fdb); + return err; +} + +static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy FDB Table\n"); + mlx5_destroy_flow_group(esw->fdb_table.addr_grp); + mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw->fdb_table.fdb = NULL; + esw->fdb_table.addr_grp = NULL; +} + +/* E-Switch vport UC/MC lists management */ +typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, + struct vport_addr *vaddr); + +static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + int err; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (esw_uc) { + esw_warn(esw->dev, + "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", + mac, vport, esw_uc->vport); + return -EEXIST; + } + + esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL); + if (!esw_uc) + return -ENOMEM; + esw_uc->vport = vport; + + err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index); + if (err) + goto abort; + + if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + return err; +abort: + l2addr_hash_del(esw_uc); + return err; +} + +static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (!esw_uc || esw_uc->vport != vport) { + esw_debug(esw->dev, + "MAC(%pM) doesn't belong to vport (%d)\n", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + + del_l2_table_entry(esw->dev, esw_uc->table_index); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + l2addr_hash_del(esw_uc); + return 0; +} + +static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (esw_mc) + goto add; + + esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL); + if (!esw_mc) + return -ENOMEM; + + esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ + esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); +add: + esw_mc->refcnt++; + /* Forward MC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + esw_debug(esw->dev, + "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, + esw_mc->refcnt, esw_mc->uplink_rule); + return 0; +} + +static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to find eswitch MC addr for MAC(%pM) vport(%d)", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, + "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, esw_mc->refcnt, + esw_mc->uplink_rule); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + if (--esw_mc->refcnt) + return 0; + + if (esw_mc->uplink_rule) + mlx5_del_flow_rule(esw_mc->uplink_rule); + + l2addr_hash_del(esw_mc); + return 0; +} + +/* Apply vport UC/MC list to HW l2 table and FDB table */ +static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + vport_addr_action vport_addr_add; + vport_addr_action vport_addr_del; + struct vport_addr *addr; + struct l2addr_node *node; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + vport_addr_add = is_uc ? esw_add_uc_addr : + esw_add_mc_addr; + vport_addr_del = is_uc ? esw_del_uc_addr : + esw_del_mc_addr; + + hash = is_uc ? vport->uc_list : vport->mc_list; + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + switch (addr->action) { + case MLX5_ACTION_ADD: + vport_addr_add(esw, addr); + addr->action = MLX5_ACTION_NONE; + break; + case MLX5_ACTION_DEL: + vport_addr_del(esw, addr); + l2addr_hash_del(addr); + break; + } + } +} + +/* Sync vport UC/MC list from vport context */ +static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + u8 (*mac_list)[ETH_ALEN]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int size; + int err; + int hi; + int i; + + size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : + MLX5_MAX_MC_PER_VPORT(esw->dev); + + mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!mac_list) + return; + + hash = is_uc ? vport->uc_list : vport->mc_list; + + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, + mac_list, &size); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", + vport_num, is_uc ? "UC" : "MC", size); + + for (i = 0; i < size; i++) { + if (is_uc && !is_valid_ether_addr(mac_list[i])) + continue; + + if (!is_uc && !is_multicast_ether_addr(mac_list[i])) + continue; + + addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); + if (addr) { + addr->action = MLX5_ACTION_NONE; + continue; + } + + addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add MAC(%pM) to vport[%d] DB\n", + mac_list[i], vport_num); + continue; + } + addr->vport = vport_num; + addr->action = MLX5_ACTION_ADD; + } + kfree(mac_list); +} + +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_core_dev *dev = vport->dev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", + vport->vport, mac); + + if (vport->enabled_events & UC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + } + + if (vport->enabled_events & MC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + } + + esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); + if (vport->enabled) + arm_vport_context_events_cmd(dev, vport->vport, + vport->enabled_events); +} + +static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, + int enable_events) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + WARN_ON(vport->enabled); + + esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + + /* Sync with current vport context */ + vport->enabled_events = enable_events; + esw_vport_change_handler(&vport->vport_change_handler); + + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = true; + spin_unlock_irqrestore(&vport->lock, flags); + + arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + + esw->enabled_vports++; + esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); +} + +static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_node *tmp; + int hi; + + for_each_l2hash_node(node, tmp, vport->uc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_UC); + + for_each_l2hash_node(node, tmp, vport->mc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_MC); +} + +static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + if (!vport->enabled) + return; + + esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); + /* Mark this vport as disabled to discard new events */ + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = false; + vport->enabled_events = 0; + spin_unlock_irqrestore(&vport->lock, flags); + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); + /* Wait for current already scheduled events to complete */ + flush_workqueue(esw->work_queue); + /* Disable events from this vport */ + arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + /* We don't assume VFs will cleanup after themselves */ + esw_cleanup_vport(esw, vport_num); + esw->enabled_vports--; +} + +/* Public E-Switch API */ +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +{ + int err; + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + return -ENOTSUPP; + } + + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); + + esw_disable_vport(esw, 0); + + err = esw_create_fdb_table(esw, nvfs + 1); + if (err) + goto abort; + + for (i = 0; i <= nvfs; i++) + esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + + esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", + esw->enabled_vports); + return 0; + +abort: + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + return err; +} + +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +{ + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", + esw->enabled_vports); + + for (i = 0; i < esw->total_vports; i++) + esw_disable_vport(esw, i); + + esw_destroy_fdb_table(esw); + + /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); +} + +int mlx5_eswitch_init(struct mlx5_core_dev *dev) +{ + int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + int total_vports = 1 + pci_sriov_get_totalvfs(dev->pdev); + struct mlx5_eswitch *esw; + int vport_num; + int err; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + esw_info(dev, + "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n", + total_vports, l2_table_size, + MLX5_MAX_UC_PER_VPORT(dev), + MLX5_MAX_MC_PER_VPORT(dev)); + + esw = kzalloc(sizeof(*esw), GFP_KERNEL); + if (!esw) + return -ENOMEM; + + esw->dev = dev; + + esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size), + sizeof(uintptr_t), GFP_KERNEL); + if (!esw->l2_table.bitmap) { + err = -ENOMEM; + goto abort; + } + esw->l2_table.size = l2_table_size; + + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); + if (!esw->work_queue) { + err = -ENOMEM; + goto abort; + } + + esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport), + GFP_KERNEL); + if (!esw->vports) { + err = -ENOMEM; + goto abort; + } + + for (vport_num = 0; vport_num < total_vports; vport_num++) { + struct mlx5_vport *vport = &esw->vports[vport_num]; + + vport->vport = vport_num; + vport->dev = dev; + INIT_WORK(&vport->vport_change_handler, + esw_vport_change_handler); + spin_lock_init(&vport->lock); + } + + esw->total_vports = total_vports; + esw->enabled_vports = 0; + + dev->priv.eswitch = esw; + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + /* VF Vports will be enabled when SRIOV is enabled */ + return 0; +abort: + if (esw->work_queue) + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); + return err; +} + +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "cleanup\n"); + esw_disable_vport(esw, 0); + + esw->dev->priv.eswitch = NULL; + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); +} + +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) +{ + struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; + u16 vport_num = be16_to_cpu(vc_eqe->vport_num); + struct mlx5_vport *vport; + + if (!esw) { + pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", + vport_num); + return; + } + + vport = &esw->vports[vport_num]; + spin_lock(&vport->lock); + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); + spin_unlock(&vport->lock); +} + +/* Vport Administration */ +#define ESW_ALLOWED(esw) \ + (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) +#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) + +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]) +{ + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", + vport, err); + return err; + } + + return err; +} + +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state) +{ + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + return mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport, link_state); +} + +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi) +{ + u16 vlan; + u8 qos; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vport - 1; + + mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac); + ivi->linkstate = mlx5_query_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport); + query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); + ivi->vlan = vlan; + ivi->qos = qos; + ivi->spoofchk = 0; + + return 0; +} + +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + int set = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) + return -EINVAL; + + if (vlan || qos) + set = 1; + + return modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); +} + +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + int err = 0; + u32 *out; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + + memset(out, 0, outlen); + err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); + if (err) + goto free_out; + + #define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + + memset(vf_stats, 0, sizeof(*vf_stats)); + vf_stats->rx_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + vf_stats->rx_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_eth_broadcast.octets); + + vf_stats->tx_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + + vf_stats->tx_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + vf_stats->multicast = + MLX5_GET_CTR(out, received_eth_multicast.packets); + + vf_stats->broadcast = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + +free_out: + kvfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h new file mode 100644 index 000000000000..3416a428f70f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_ESWITCH_H__ +#define __MLX5_ESWITCH_H__ + +#include <linux/if_ether.h> +#include <linux/if_link.h> +#include <linux/mlx5/device.h> + +#define MLX5_MAX_UC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) + +#define MLX5_MAX_MC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) + +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +/* L2 -mac address based- hash helpers */ +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &hash[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &hash[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&ptr->node.hlist); \ + kfree(ptr); \ +}) + +struct mlx5_vport { + struct mlx5_core_dev *dev; + int vport; + struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct work_struct vport_change_handler; + + /* This spinlock protects access to vport data, between + * "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event" + * once vport marked as disabled new interrupts are discarded. + */ + spinlock_t lock; /* vport events sync */ + bool enabled; + u16 enabled_events; +}; + +struct mlx5_l2_table { + struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE]; + u32 size; + unsigned long *bitmap; +}; + +struct mlx5_eswitch_fdb { + void *fdb; + struct mlx5_flow_group *addr_grp; +}; + +struct mlx5_eswitch { + struct mlx5_core_dev *dev; + struct mlx5_l2_table l2_table; + struct mlx5_eswitch_fdb fdb_table; + struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct workqueue_struct *work_queue; + struct mlx5_vport *vports; + int total_vports; + int enabled_vports; +}; + +/* E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev); +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]); +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state); +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos); +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi); +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats); + +#endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c deleted file mode 100644 index ca90b9bc3b95..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/export.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/flow_table.h> -#include "mlx5_core.h" - -struct mlx5_ftg { - struct mlx5_flow_table_group g; - u32 id; - u32 start_ix; -}; - -struct mlx5_flow_table { - struct mlx5_core_dev *dev; - u8 level; - u8 type; - u32 id; - struct mutex mutex; /* sync bitmap alloc */ - u16 num_groups; - struct mlx5_ftg *group; - unsigned long *bitmap; - u32 size; -}; - -static int mlx5_set_flow_entry_cmd(struct mlx5_flow_table *ft, u32 group_ix, - u32 flow_index, void *flow_context) -{ - u32 out[MLX5_ST_SZ_DW(set_fte_out)]; - u32 *in; - void *in_flow_context; - int fcdls = - MLX5_GET(flow_context, flow_context, destination_list_size) * - MLX5_ST_SZ_BYTES(dest_format_struct); - int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fcdls; - int err; - - in = mlx5_vzalloc(inlen); - if (!in) { - mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); - return -ENOMEM; - } - - MLX5_SET(set_fte_in, in, table_type, ft->type); - MLX5_SET(set_fte_in, in, table_id, ft->id); - MLX5_SET(set_fte_in, in, flow_index, flow_index); - MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); - - in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); - memcpy(in_flow_context, flow_context, - MLX5_ST_SZ_BYTES(flow_context) + fcdls); - - MLX5_SET(flow_context, in_flow_context, group_id, - ft->group[group_ix].id); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, - sizeof(out)); - kvfree(in); - - return err; -} - -static void mlx5_del_flow_entry_cmd(struct mlx5_flow_table *ft, u32 flow_index) -{ - u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; - u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - -#define MLX5_SET_DFTEI(p, x, v) MLX5_SET(delete_fte_in, p, x, v) - MLX5_SET_DFTEI(in, table_type, ft->type); - MLX5_SET_DFTEI(in, table_id, ft->id); - MLX5_SET_DFTEI(in, flow_index, flow_index); - MLX5_SET_DFTEI(in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); - - mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); -} - -static void mlx5_destroy_flow_group_cmd(struct mlx5_flow_table *ft, int i) -{ - u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - -#define MLX5_SET_DFGI(p, x, v) MLX5_SET(destroy_flow_group_in, p, x, v) - MLX5_SET_DFGI(in, table_type, ft->type); - MLX5_SET_DFGI(in, table_id, ft->id); - MLX5_SET_DFGI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); - MLX5_SET_DFGI(in, group_id, ft->group[i].id); - mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); -} - -static int mlx5_create_flow_group_cmd(struct mlx5_flow_table *ft, int i) -{ - u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; - u32 *in; - void *in_match_criteria; - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_table_group *g = &ft->group[i].g; - u32 start_ix = ft->group[i].start_ix; - u32 end_ix = start_ix + (1 << g->log_sz) - 1; - int err; - - in = mlx5_vzalloc(inlen); - if (!in) { - mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); - return -ENOMEM; - } - in_match_criteria = MLX5_ADDR_OF(create_flow_group_in, in, - match_criteria); - - memset(out, 0, sizeof(out)); - -#define MLX5_SET_CFGI(p, x, v) MLX5_SET(create_flow_group_in, p, x, v) - MLX5_SET_CFGI(in, table_type, ft->type); - MLX5_SET_CFGI(in, table_id, ft->id); - MLX5_SET_CFGI(in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); - MLX5_SET_CFGI(in, start_flow_index, start_ix); - MLX5_SET_CFGI(in, end_flow_index, end_ix); - MLX5_SET_CFGI(in, match_criteria_enable, g->match_criteria_enable); - - memcpy(in_match_criteria, g->match_criteria, - MLX5_ST_SZ_BYTES(fte_match_param)); - - err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, - sizeof(out)); - if (!err) - ft->group[i].id = MLX5_GET(create_flow_group_out, out, - group_id); - - kvfree(in); - - return err; -} - -static void mlx5_destroy_flow_table_groups(struct mlx5_flow_table *ft) -{ - int i; - - for (i = 0; i < ft->num_groups; i++) - mlx5_destroy_flow_group_cmd(ft, i); -} - -static int mlx5_create_flow_table_groups(struct mlx5_flow_table *ft) -{ - int err; - int i; - - for (i = 0; i < ft->num_groups; i++) { - err = mlx5_create_flow_group_cmd(ft, i); - if (err) - goto err_destroy_flow_table_groups; - } - - return 0; - -err_destroy_flow_table_groups: - for (i--; i >= 0; i--) - mlx5_destroy_flow_group_cmd(ft, i); - - return err; -} - -static int mlx5_create_flow_table_cmd(struct mlx5_flow_table *ft) -{ - u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; - u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; - int err; - - memset(in, 0, sizeof(in)); - - MLX5_SET(create_flow_table_in, in, table_type, ft->type); - MLX5_SET(create_flow_table_in, in, level, ft->level); - MLX5_SET(create_flow_table_in, in, log_size, order_base_2(ft->size)); - - MLX5_SET(create_flow_table_in, in, opcode, - MLX5_CMD_OP_CREATE_FLOW_TABLE); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, - sizeof(out)); - if (err) - return err; - - ft->id = MLX5_GET(create_flow_table_out, out, table_id); - - return 0; -} - -static void mlx5_destroy_flow_table_cmd(struct mlx5_flow_table *ft) -{ - u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - -#define MLX5_SET_DFTI(p, x, v) MLX5_SET(destroy_flow_table_in, p, x, v) - MLX5_SET_DFTI(in, table_type, ft->type); - MLX5_SET_DFTI(in, table_id, ft->id); - MLX5_SET_DFTI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); - - mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); -} - -static int mlx5_find_group(struct mlx5_flow_table *ft, u8 match_criteria_enable, - u32 *match_criteria, int *group_ix) -{ - void *mc_outer = MLX5_ADDR_OF(fte_match_param, match_criteria, - outer_headers); - void *mc_misc = MLX5_ADDR_OF(fte_match_param, match_criteria, - misc_parameters); - void *mc_inner = MLX5_ADDR_OF(fte_match_param, match_criteria, - inner_headers); - int mc_outer_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); - int mc_misc_sz = MLX5_ST_SZ_BYTES(fte_match_set_misc); - int mc_inner_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); - int i; - - for (i = 0; i < ft->num_groups; i++) { - struct mlx5_flow_table_group *g = &ft->group[i].g; - void *gmc_outer = MLX5_ADDR_OF(fte_match_param, - g->match_criteria, - outer_headers); - void *gmc_misc = MLX5_ADDR_OF(fte_match_param, - g->match_criteria, - misc_parameters); - void *gmc_inner = MLX5_ADDR_OF(fte_match_param, - g->match_criteria, - inner_headers); - - if (g->match_criteria_enable != match_criteria_enable) - continue; - - if (match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) - if (memcmp(mc_outer, gmc_outer, mc_outer_sz)) - continue; - - if (match_criteria_enable & MLX5_MATCH_MISC_PARAMETERS) - if (memcmp(mc_misc, gmc_misc, mc_misc_sz)) - continue; - - if (match_criteria_enable & MLX5_MATCH_INNER_HEADERS) - if (memcmp(mc_inner, gmc_inner, mc_inner_sz)) - continue; - - *group_ix = i; - return 0; - } - - return -EINVAL; -} - -static int alloc_flow_index(struct mlx5_flow_table *ft, int group_ix, u32 *ix) -{ - struct mlx5_ftg *g = &ft->group[group_ix]; - int err = 0; - - mutex_lock(&ft->mutex); - - *ix = find_next_zero_bit(ft->bitmap, ft->size, g->start_ix); - if (*ix >= (g->start_ix + (1 << g->g.log_sz))) - err = -ENOSPC; - else - __set_bit(*ix, ft->bitmap); - - mutex_unlock(&ft->mutex); - - return err; -} - -static void mlx5_free_flow_index(struct mlx5_flow_table *ft, u32 ix) -{ - __clear_bit(ix, ft->bitmap); -} - -int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable, - void *match_criteria, void *flow_context, - u32 *flow_index) -{ - struct mlx5_flow_table *ft = flow_table; - int group_ix; - int err; - - err = mlx5_find_group(ft, match_criteria_enable, match_criteria, - &group_ix); - if (err) { - mlx5_core_warn(ft->dev, "mlx5_find_group failed\n"); - return err; - } - - err = alloc_flow_index(ft, group_ix, flow_index); - if (err) { - mlx5_core_warn(ft->dev, "alloc_flow_index failed\n"); - return err; - } - - return mlx5_set_flow_entry_cmd(ft, group_ix, *flow_index, flow_context); -} -EXPORT_SYMBOL(mlx5_add_flow_table_entry); - -void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index) -{ - struct mlx5_flow_table *ft = flow_table; - - mlx5_del_flow_entry_cmd(ft, flow_index); - mlx5_free_flow_index(ft, flow_index); -} -EXPORT_SYMBOL(mlx5_del_flow_table_entry); - -void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, - u16 num_groups, - struct mlx5_flow_table_group *group) -{ - struct mlx5_flow_table *ft; - u32 start_ix = 0; - u32 ft_size = 0; - void *gr; - void *bm; - int err; - int i; - - for (i = 0; i < num_groups; i++) - ft_size += (1 << group[i].log_sz); - - ft = kzalloc(sizeof(*ft), GFP_KERNEL); - gr = kcalloc(num_groups, sizeof(struct mlx5_ftg), GFP_KERNEL); - bm = kcalloc(BITS_TO_LONGS(ft_size), sizeof(uintptr_t), GFP_KERNEL); - if (!ft || !gr || !bm) - goto err_free_ft; - - ft->group = gr; - ft->bitmap = bm; - ft->num_groups = num_groups; - ft->level = level; - ft->type = table_type; - ft->size = ft_size; - ft->dev = dev; - mutex_init(&ft->mutex); - - for (i = 0; i < ft->num_groups; i++) { - memcpy(&ft->group[i].g, &group[i], sizeof(*group)); - ft->group[i].start_ix = start_ix; - start_ix += 1 << group[i].log_sz; - } - - err = mlx5_create_flow_table_cmd(ft); - if (err) - goto err_free_ft; - - err = mlx5_create_flow_table_groups(ft); - if (err) - goto err_destroy_flow_table_cmd; - - return ft; - -err_destroy_flow_table_cmd: - mlx5_destroy_flow_table_cmd(ft); - -err_free_ft: - mlx5_core_warn(dev, "failed to alloc flow table\n"); - kfree(bm); - kfree(gr); - kfree(ft); - - return NULL; -} -EXPORT_SYMBOL(mlx5_create_flow_table); - -void mlx5_destroy_flow_table(void *flow_table) -{ - struct mlx5_flow_table *ft = flow_table; - - mlx5_destroy_flow_table_groups(ft); - mlx5_destroy_flow_table_cmd(ft); - kfree(ft->bitmap); - kfree(ft->group); - kfree(ft); -} -EXPORT_SYMBOL(mlx5_destroy_flow_table); - -u32 mlx5_get_flow_table_id(void *flow_table) -{ - struct mlx5_flow_table *ft = flow_table; - - return ft->id; -} -EXPORT_SYMBOL(mlx5_get_flow_table_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c new file mode 100644 index 000000000000..a9894d2e8e26 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/mlx5_ifc.h> + +#include "fs_core.h" +#include "fs_cmd.h" +#include "mlx5_core.h" + +int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); + MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, unsigned int level, + unsigned int log_size, struct mlx5_flow_table + *next_ft, unsigned int *table_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(create_flow_table_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_TABLE); + + if (next_ft) { + MLX5_SET(create_flow_table_in, in, table_miss_mode, 1); + MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id); + } + MLX5_SET(create_flow_table_in, in, table_type, type); + MLX5_SET(create_flow_table_in, in, level, level); + MLX5_SET(create_flow_table_in, in, log_size, log_size); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); + + if (!err) + *table_id = MLX5_GET(create_flow_table_out, out, + table_id); + return err; +} + +int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_table_in, in, table_id, ft->id); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(modify_flow_table_in, in, opcode, + MLX5_CMD_OP_MODIFY_FLOW_TABLE); + MLX5_SET(modify_flow_table_in, in, table_type, ft->type); + MLX5_SET(modify_flow_table_in, in, table_id, ft->id); + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1); + MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0); + } + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; + int err; + + memset(out, 0, sizeof(out)); + + MLX5_SET(create_flow_group_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, ft->type); + MLX5_SET(create_flow_group_in, in, table_id, ft->id); + + err = mlx5_cmd_exec_check_status(dev, in, + inlen, out, + sizeof(out)); + if (!err) + *group_id = MLX5_GET(create_flow_group_out, out, + group_id); + + return err; +} + +int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id) +{ + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(destroy_flow_group_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_group_in, in, table_id, ft->id); + MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + + fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct); + u32 out[MLX5_ST_SZ_DW(set_fte_out)]; + struct mlx5_flow_rule *dst; + void *in_flow_context; + void *in_match_value; + void *in_dests; + u32 *in; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, op_mod, opmod); + MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask); + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, fte->index); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); + MLX5_SET(flow_context, in_flow_context, action, fte->action); + MLX5_SET(flow_context, in_flow_context, destination_list_size, + fte->dests_size); + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, + match_value); + memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + list_for_each_entry(dst, &fte->node.children, node.list) { + unsigned int id; + + MLX5_SET(dest_format_struct, in_dests, destination_type, + dst->dest_attr.type); + if (dst->dest_attr.type == + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + id = dst->dest_attr.ft->id; + else + id = dst->dest_attr.tir_num; + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + } + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, + sizeof(out)); + kvfree(in); + + return err; +} + +int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); +} + +int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + int opmod; + int modify_mask; + int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + flow_modify_en); + if (!atomic_mod_cap) + return -ENOTSUPP; + opmod = 1; + modify_mask = 1 << + MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST; + + return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte); +} + +int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int index) +{ + u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; + u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, ft->type); + MLX5_SET(delete_fte_in, in, table_id, ft->id); + MLX5_SET(delete_fte_in, in, flow_index, index); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h new file mode 100644 index 000000000000..9814d4784803 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CMD_ +#define _MLX5_FS_CMD_ + +int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, unsigned int level, + unsigned int log_size, struct mlx5_flow_table + *next_ft, unsigned int *table_id); + +int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft); + +int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft); + +int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, unsigned int *group_id); + +int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id); + +int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte); + +int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte); + +int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int index); + +int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c new file mode 100644 index 000000000000..6f68dba8d7ed --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -0,0 +1,1514 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mutex.h> +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" + +#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ + sizeof(struct init_tree_node)) + +#define ADD_PRIO(num_prios_val, min_level_val, max_ft_val, caps_val,\ + ...) {.type = FS_TYPE_PRIO,\ + .min_ft_level = min_level_val,\ + .max_ft = max_ft_val,\ + .num_leaf_prios = num_prios_val,\ + .caps = caps_val,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define ADD_MULTIPLE_PRIO(num_prios_val, max_ft_val, ...)\ + ADD_PRIO(num_prios_val, 0, max_ft_val, {},\ + __VA_ARGS__)\ + +#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\ + sizeof(long)) + +#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap)) + +#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ + .caps = (long[]) {__VA_ARGS__} } + +#define LEFTOVERS_MAX_FT 1 +#define LEFTOVERS_NUM_PRIOS 1 +#define BY_PASS_PRIO_MAX_FT 1 +#define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ + LEFTOVERS_MAX_FT) + +#define KERNEL_MAX_FT 2 +#define KERNEL_NUM_PRIOS 1 +#define KENREL_MIN_LEVEL 2 + +struct node_caps { + size_t arr_sz; + long *caps; +}; +static struct init_tree_node { + enum fs_node_type type; + struct init_tree_node *children; + int ar_size; + struct node_caps caps; + int min_ft_level; + int num_leaf_prios; + int prio; + int max_ft; +} root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 3, + .children = (struct init_tree_node[]) { + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, + FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), + FS_CAP(flow_table_properties_nic_receive.modify_root), + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), + ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_MAX_FT))), + ADD_PRIO(0, KENREL_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NUM_PRIOS, KERNEL_MAX_FT))), + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, + FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), + FS_CAP(flow_table_properties_nic_receive.modify_root), + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), + ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))), + } +}; + +enum fs_i_mutex_lock_class { + FS_MUTEX_GRANDPARENT, + FS_MUTEX_PARENT, + FS_MUTEX_CHILD +}; + +static void del_rule(struct fs_node *node); +static void del_flow_table(struct fs_node *node); +static void del_flow_group(struct fs_node *node); +static void del_fte(struct fs_node *node); + +static void tree_init_node(struct fs_node *node, + unsigned int refcount, + void (*remove_func)(struct fs_node *)) +{ + atomic_set(&node->refcount, refcount); + INIT_LIST_HEAD(&node->list); + INIT_LIST_HEAD(&node->children); + mutex_init(&node->lock); + node->remove_func = remove_func; +} + +static void tree_add_node(struct fs_node *node, struct fs_node *parent) +{ + if (parent) + atomic_inc(&parent->refcount); + node->parent = parent; + + /* Parent is the root */ + if (!parent) + node->root = node; + else + node->root = parent->root; +} + +static void tree_get_node(struct fs_node *node) +{ + atomic_inc(&node->refcount); +} + +static void nested_lock_ref_node(struct fs_node *node, + enum fs_i_mutex_lock_class class) +{ + if (node) { + mutex_lock_nested(&node->lock, class); + atomic_inc(&node->refcount); + } +} + +static void lock_ref_node(struct fs_node *node) +{ + if (node) { + mutex_lock(&node->lock); + atomic_inc(&node->refcount); + } +} + +static void unlock_ref_node(struct fs_node *node) +{ + if (node) { + atomic_dec(&node->refcount); + mutex_unlock(&node->lock); + } +} + +static void tree_put_node(struct fs_node *node) +{ + struct fs_node *parent_node = node->parent; + + lock_ref_node(parent_node); + if (atomic_dec_and_test(&node->refcount)) { + if (parent_node) + list_del_init(&node->list); + if (node->remove_func) + node->remove_func(node); + kfree(node); + node = NULL; + } + unlock_ref_node(parent_node); + if (!node && parent_node) + tree_put_node(parent_node); +} + +static int tree_remove_node(struct fs_node *node) +{ + if (atomic_read(&node->refcount) > 1) + return -EPERM; + tree_put_node(node); + return 0; +} + +static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, + unsigned int prio) +{ + struct fs_prio *iter_prio; + + fs_for_each_prio(iter_prio, ns) { + if (iter_prio->prio == prio) + return iter_prio; + } + + return NULL; +} + +static unsigned int find_next_free_level(struct fs_prio *prio) +{ + if (!list_empty(&prio->node.children)) { + struct mlx5_flow_table *ft; + + ft = list_last_entry(&prio->node.children, + struct mlx5_flow_table, + node.list); + return ft->level + 1; + } + return prio->start_level; +} + +static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size) +{ + unsigned int i; + + for (i = 0; i < size; i++, mask++, val1++, val2++) + if ((*((u8 *)val1) & (*(u8 *)mask)) != + ((*(u8 *)val2) & (*(u8 *)mask))) + return false; + + return true; +} + +static bool compare_match_value(struct mlx5_flow_group_mask *mask, + void *fte_param1, void *fte_param2) +{ + if (mask->match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { + void *fte_match1 = MLX5_ADDR_OF(fte_match_param, + fte_param1, outer_headers); + void *fte_match2 = MLX5_ADDR_OF(fte_match_param, + fte_param2, outer_headers); + void *fte_mask = MLX5_ADDR_OF(fte_match_param, + mask->match_criteria, outer_headers); + + if (!masked_memcmp(fte_mask, fte_match1, fte_match2, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + return false; + } + + if (mask->match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { + void *fte_match1 = MLX5_ADDR_OF(fte_match_param, + fte_param1, misc_parameters); + void *fte_match2 = MLX5_ADDR_OF(fte_match_param, + fte_param2, misc_parameters); + void *fte_mask = MLX5_ADDR_OF(fte_match_param, + mask->match_criteria, misc_parameters); + + if (!masked_memcmp(fte_mask, fte_match1, fte_match2, + MLX5_ST_SZ_BYTES(fte_match_set_misc))) + return false; + } + + if (mask->match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { + void *fte_match1 = MLX5_ADDR_OF(fte_match_param, + fte_param1, inner_headers); + void *fte_match2 = MLX5_ADDR_OF(fte_match_param, + fte_param2, inner_headers); + void *fte_mask = MLX5_ADDR_OF(fte_match_param, + mask->match_criteria, inner_headers); + + if (!masked_memcmp(fte_mask, fte_match1, fte_match2, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + return false; + } + return true; +} + +static bool compare_match_criteria(u8 match_criteria_enable1, + u8 match_criteria_enable2, + void *mask1, void *mask2) +{ + return match_criteria_enable1 == match_criteria_enable2 && + !memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param)); +} + +static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) +{ + struct fs_node *root; + struct mlx5_flow_namespace *ns; + + root = node->root; + + if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) { + pr_warn("mlx5: flow steering node is not in tree or garbaged\n"); + return NULL; + } + + ns = container_of(root, struct mlx5_flow_namespace, node); + return container_of(ns, struct mlx5_flow_root_namespace, ns); +} + +static inline struct mlx5_core_dev *get_dev(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev; + return NULL; +} + +static void del_flow_table(struct fs_node *node) +{ + struct mlx5_flow_table *ft; + struct mlx5_core_dev *dev; + struct fs_prio *prio; + int err; + + fs_get_obj(ft, node); + dev = get_dev(&ft->node); + + err = mlx5_cmd_destroy_flow_table(dev, ft); + if (err) + pr_warn("flow steering can't destroy ft\n"); + fs_get_obj(prio, ft->node.parent); + prio->num_ft--; +} + +static void del_rule(struct fs_node *node) +{ + struct mlx5_flow_rule *rule; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct fs_fte *fte; + u32 *match_value; + struct mlx5_core_dev *dev = get_dev(node); + int match_len = MLX5_ST_SZ_BYTES(fte_match_param); + int err; + + match_value = mlx5_vzalloc(match_len); + if (!match_value) { + pr_warn("failed to allocate inbox\n"); + return; + } + + fs_get_obj(rule, node); + fs_get_obj(fte, rule->node.parent); + fs_get_obj(fg, fte->node.parent); + memcpy(match_value, fte->val, sizeof(fte->val)); + fs_get_obj(ft, fg->node.parent); + list_del(&rule->node.list); + fte->dests_size--; + if (fte->dests_size) { + err = mlx5_cmd_update_fte(dev, ft, + fg->id, fte); + if (err) + pr_warn("%s can't del rule fg id=%d fte_index=%d\n", + __func__, fg->id, fte->index); + } + kvfree(match_value); +} + +static void del_fte(struct fs_node *node) +{ + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev; + struct fs_fte *fte; + int err; + + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + + dev = get_dev(&ft->node); + err = mlx5_cmd_delete_fte(dev, ft, + fte->index); + if (err) + pr_warn("flow steering can't delete fte in index %d of flow group id %d\n", + fte->index, fg->id); + + fte->status = 0; + fg->num_ftes--; +} + +static void del_flow_group(struct fs_node *node) +{ + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + struct mlx5_core_dev *dev; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); + dev = get_dev(&ft->node); + + if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) + pr_warn("flow steering can't destroy fg %d of ft %d\n", + fg->id, ft->id); +} + +static struct fs_fte *alloc_fte(u8 action, + u32 flow_tag, + u32 *match_value, + unsigned int index) +{ + struct fs_fte *fte; + + fte = kzalloc(sizeof(*fte), GFP_KERNEL); + if (!fte) + return ERR_PTR(-ENOMEM); + + memcpy(fte->val, match_value, sizeof(fte->val)); + fte->node.type = FS_TYPE_FLOW_ENTRY; + fte->flow_tag = flow_tag; + fte->index = index; + fte->action = action; + + return fte; +} + +static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) +{ + struct mlx5_flow_group *fg; + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + create_fg_in, match_criteria); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + create_fg_in, + match_criteria_enable); + fg = kzalloc(sizeof(*fg), GFP_KERNEL); + if (!fg) + return ERR_PTR(-ENOMEM); + + fg->mask.match_criteria_enable = match_criteria_enable; + memcpy(&fg->mask.match_criteria, match_criteria, + sizeof(fg->mask.match_criteria)); + fg->node.type = FS_TYPE_FLOW_GROUP; + fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in, + start_flow_index); + fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in, + end_flow_index) - fg->start_index + 1; + return fg; +} + +static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte, + enum fs_flow_table_type table_type) +{ + struct mlx5_flow_table *ft; + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return NULL; + + ft->level = level; + ft->node.type = FS_TYPE_FLOW_TABLE; + ft->type = table_type; + ft->max_fte = max_fte; + + return ft; +} + +/* If reverse is false, then we search for the first flow table in the + * root sub-tree from start(closest from right), else we search for the + * last flow table in the root sub-tree till start(closest from left). + */ +static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, + struct list_head *start, + bool reverse) +{ +#define list_advance_entry(pos, reverse) \ + ((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list)) + +#define list_for_each_advance_continue(pos, head, reverse) \ + for (pos = list_advance_entry(pos, reverse); \ + &pos->list != (head); \ + pos = list_advance_entry(pos, reverse)) + + struct fs_node *iter = list_entry(start, struct fs_node, list); + struct mlx5_flow_table *ft = NULL; + + if (!root) + return NULL; + + list_for_each_advance_continue(iter, &root->children, reverse) { + if (iter->type == FS_TYPE_FLOW_TABLE) { + fs_get_obj(ft, iter); + return ft; + } + ft = find_closest_ft_recursive(iter, &iter->children, reverse); + if (ft) + return ft; + } + + return ft; +} + +/* If reverse if false then return the first flow table in next priority of + * prio in the tree, else return the last flow table in the previous priority + * of prio in the tree. + */ +static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse) +{ + struct mlx5_flow_table *ft = NULL; + struct fs_node *curr_node; + struct fs_node *parent; + + parent = prio->node.parent; + curr_node = &prio->node; + while (!ft && parent) { + ft = find_closest_ft_recursive(parent, &curr_node->list, reverse); + curr_node = parent; + parent = curr_node->parent; + } + return ft; +} + +/* Assuming all the tree is locked by mutex chain lock */ +static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio) +{ + return find_closest_ft(prio, false); +} + +/* Assuming all the tree is locked by mutex chain lock */ +static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio) +{ + return find_closest_ft(prio, true); +} + +static int connect_fts_in_prio(struct mlx5_core_dev *dev, + struct fs_prio *prio, + struct mlx5_flow_table *ft) +{ + struct mlx5_flow_table *iter; + int i = 0; + int err; + + fs_for_each_ft(iter, prio) { + i++; + err = mlx5_cmd_modify_flow_table(dev, + iter, + ft); + if (err) { + mlx5_core_warn(dev, "Failed to modify flow table %d\n", + iter->id); + /* The driver is out of sync with the FW */ + if (i > 1) + WARN_ON(true); + return err; + } + } + return 0; +} + +/* Connect flow tables from previous priority of prio to ft */ +static int connect_prev_fts(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct mlx5_flow_table *prev_ft; + + prev_ft = find_prev_chained_ft(prio); + if (prev_ft) { + struct fs_prio *prev_prio; + + fs_get_obj(prev_prio, prev_ft->node.parent); + return connect_fts_in_prio(dev, prev_prio, ft); + } + return 0; +} + +static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio + *prio) +{ + struct mlx5_flow_root_namespace *root = find_root(&prio->node); + int min_level = INT_MAX; + int err; + + if (root->root_ft) + min_level = root->root_ft->level; + + if (ft->level >= min_level) + return 0; + + err = mlx5_cmd_update_root_ft(root->dev, ft); + if (err) + mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", + ft->id); + else + root->root_ft = ft; + + return err; +} + +static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + int err = 0; + + /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ + + if (list_empty(&prio->node.children)) { + err = connect_prev_fts(dev, ft, prio); + if (err) + return err; + } + + if (MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive.modify_root)) + err = update_root_ft_create(ft, prio); + return err; +} + +struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int max_fte) +{ + struct mlx5_flow_table *next_ft = NULL; + struct mlx5_flow_table *ft; + int err; + int log_table_sz; + struct mlx5_flow_root_namespace *root = + find_root(&ns->node); + struct fs_prio *fs_prio = NULL; + + if (!root) { + pr_err("mlx5: flow steering failed to find root of namespace\n"); + return ERR_PTR(-ENODEV); + } + + mutex_lock(&root->chain_lock); + fs_prio = find_prio(ns, prio); + if (!fs_prio) { + err = -EINVAL; + goto unlock_root; + } + if (fs_prio->num_ft == fs_prio->max_ft) { + err = -ENOSPC; + goto unlock_root; + } + + ft = alloc_flow_table(find_next_free_level(fs_prio), + roundup_pow_of_two(max_fte), + root->table_type); + if (!ft) { + err = -ENOMEM; + goto unlock_root; + } + + tree_init_node(&ft->node, 1, del_flow_table); + log_table_sz = ilog2(ft->max_fte); + next_ft = find_next_chained_ft(fs_prio); + err = mlx5_cmd_create_flow_table(root->dev, ft->type, ft->level, + log_table_sz, next_ft, &ft->id); + if (err) + goto free_ft; + + err = connect_flow_table(root->dev, ft, fs_prio); + if (err) + goto destroy_ft; + lock_ref_node(&fs_prio->node); + tree_add_node(&ft->node, &fs_prio->node); + list_add_tail(&ft->node.list, &fs_prio->node.children); + fs_prio->num_ft++; + unlock_ref_node(&fs_prio->node); + mutex_unlock(&root->chain_lock); + return ft; +destroy_ft: + mlx5_cmd_destroy_flow_table(root->dev, ft); +free_ft: + kfree(ft); +unlock_root: + mutex_unlock(&root->chain_lock); + return ERR_PTR(err); +} + +struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int num_flow_table_entries, + int max_num_groups) +{ + struct mlx5_flow_table *ft; + + if (max_num_groups > num_flow_table_entries) + return ERR_PTR(-EINVAL); + + ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries); + if (IS_ERR(ft)) + return ft; + + ft->autogroup.active = true; + ft->autogroup.required_groups = max_num_groups; + + return ft; +} +EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table); + +/* Flow table should be locked */ +static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft, + u32 *fg_in, + struct list_head + *prev_fg, + bool is_auto_fg) +{ + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev = get_dev(&ft->node); + int err; + + if (!dev) + return ERR_PTR(-ENODEV); + + fg = alloc_flow_group(fg_in); + if (IS_ERR(fg)) + return fg; + + err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); + if (err) { + kfree(fg); + return ERR_PTR(err); + } + + if (ft->autogroup.active) + ft->autogroup.num_groups++; + /* Add node to tree */ + tree_init_node(&fg->node, !is_auto_fg, del_flow_group); + tree_add_node(&fg->node, &ft->node); + /* Add node to group list */ + list_add(&fg->node.list, ft->node.children.prev); + + return fg; +} + +struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, + u32 *fg_in) +{ + struct mlx5_flow_group *fg; + + if (ft->autogroup.active) + return ERR_PTR(-EPERM); + + lock_ref_node(&ft->node); + fg = create_flow_group_common(ft, fg_in, &ft->node.children, false); + unlock_ref_node(&ft->node); + + return fg; +} + +static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return NULL; + + rule->node.type = FS_TYPE_FLOW_DEST; + memcpy(&rule->dest_attr, dest, sizeof(*dest)); + + return rule; +} + +/* fte should not be deleted while calling this function */ +static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, + struct mlx5_flow_group *fg, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_table *ft; + struct mlx5_flow_rule *rule; + int err; + + rule = alloc_rule(dest); + if (!rule) + return ERR_PTR(-ENOMEM); + + fs_get_obj(ft, fg->node.parent); + /* Add dest to dests list- added as first element after the head */ + tree_init_node(&rule->node, 1, del_rule); + list_add_tail(&rule->node.list, &fte->node.children); + fte->dests_size++; + if (fte->dests_size == 1) + err = mlx5_cmd_create_fte(get_dev(&ft->node), + ft, fg->id, fte); + else + err = mlx5_cmd_update_fte(get_dev(&ft->node), + ft, fg->id, fte); + if (err) + goto free_rule; + + fte->status |= FS_FTE_STATUS_EXISTING; + + return rule; + +free_rule: + list_del(&rule->node.list); + kfree(rule); + fte->dests_size--; + return ERR_PTR(err); +} + +/* Assumed fg is locked */ +static unsigned int get_free_fte_index(struct mlx5_flow_group *fg, + struct list_head **prev) +{ + struct fs_fte *fte; + unsigned int start = fg->start_index; + + if (prev) + *prev = &fg->node.children; + + /* assumed list is sorted by index */ + fs_for_each_fte(fte, fg) { + if (fte->index != start) + return start; + start++; + if (prev) + *prev = &fte->node.list; + } + + return start; +} + +/* prev is output, prev->next = new_fte */ +static struct fs_fte *create_fte(struct mlx5_flow_group *fg, + u32 *match_value, + u8 action, + u32 flow_tag, + struct list_head **prev) +{ + struct fs_fte *fte; + int index; + + index = get_free_fte_index(fg, prev); + fte = alloc_fte(action, flow_tag, match_value, index); + if (IS_ERR(fte)) + return fte; + + return fte; +} + +static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct list_head *prev = &ft->node.children; + unsigned int candidate_index = 0; + struct mlx5_flow_group *fg; + void *match_criteria_addr; + unsigned int group_size = 0; + u32 *in; + + if (!ft->autogroup.active) + return ERR_PTR(-ENOENT); + + in = mlx5_vzalloc(inlen); + if (!in) + return ERR_PTR(-ENOMEM); + + if (ft->autogroup.num_groups < ft->autogroup.required_groups) + /* We save place for flow groups in addition to max types */ + group_size = ft->max_fte / (ft->autogroup.required_groups + 1); + + /* ft->max_fte == ft->autogroup.max_types */ + if (group_size == 0) + group_size = 1; + + /* sorted by start_index */ + fs_for_each_fg(fg, ft) { + if (candidate_index + group_size > fg->start_index) + candidate_index = fg->start_index + fg->max_ftes; + else + break; + prev = &fg->node.list; + } + + if (candidate_index + group_size > ft->max_fte) { + fg = ERR_PTR(-ENOSPC); + goto out; + } + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + match_criteria_enable); + MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index); + MLX5_SET(create_flow_group_in, in, end_flow_index, candidate_index + + group_size - 1); + match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + memcpy(match_criteria_addr, match_criteria, + MLX5_ST_SZ_BYTES(fte_match_param)); + + fg = create_flow_group_common(ft, in, prev, true); +out: + kvfree(in); + return fg; +} + +static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, + u32 *match_value, + u8 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct fs_fte *fte; + struct mlx5_flow_rule *rule; + struct mlx5_flow_table *ft; + struct list_head *prev; + + nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT); + fs_for_each_fte(fte, fg) { + nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); + if (compare_match_value(&fg->mask, match_value, &fte->val) && + action == fte->action && flow_tag == fte->flow_tag) { + rule = add_rule_fte(fte, fg, dest); + unlock_ref_node(&fte->node); + if (IS_ERR(rule)) + goto unlock_fg; + else + goto add_rule; + } + unlock_ref_node(&fte->node); + } + fs_get_obj(ft, fg->node.parent); + if (fg->num_ftes >= fg->max_ftes) { + rule = ERR_PTR(-ENOSPC); + goto unlock_fg; + } + + fte = create_fte(fg, match_value, action, flow_tag, &prev); + if (IS_ERR(fte)) { + rule = (void *)fte; + goto unlock_fg; + } + tree_init_node(&fte->node, 0, del_fte); + rule = add_rule_fte(fte, fg, dest); + if (IS_ERR(rule)) { + kfree(fte); + goto unlock_fg; + } + + fg->num_ftes++; + + tree_add_node(&fte->node, &fg->node); + list_add(&fte->node.list, prev); +add_rule: + tree_add_node(&rule->node, &fte->node); +unlock_fg: + unlock_ref_node(&fg->node); + return rule; +} + +static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u8 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + struct mlx5_flow_group *g; + + g = create_autogroup(ft, match_criteria_enable, match_criteria); + if (IS_ERR(g)) + return (void *)g; + + rule = add_rule_fg(g, match_value, + action, flow_tag, dest); + if (IS_ERR(rule)) { + /* Remove assumes refcount > 0 and autogroup creates a group + * with a refcount = 0. + */ + tree_get_node(&g->node); + tree_remove_node(&g->node); + } + return rule; +} + +struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_group *g; + struct mlx5_flow_rule *rule; + + nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); + fs_for_each_fg(g, ft) + if (compare_match_criteria(g->mask.match_criteria_enable, + match_criteria_enable, + g->mask.match_criteria, + match_criteria)) { + rule = add_rule_fg(g, match_value, + action, flow_tag, dest); + if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) + goto unlock; + } + + rule = add_rule_to_auto_fg(ft, match_criteria_enable, match_criteria, + match_value, action, flow_tag, dest); +unlock: + unlock_ref_node(&ft->node); + return rule; +} +EXPORT_SYMBOL(mlx5_add_flow_rule); + +void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) +{ + tree_remove_node(&rule->node); +} +EXPORT_SYMBOL(mlx5_del_flow_rule); + +/* Assuming prio->node.children(flow tables) is sorted by level */ +static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) +{ + struct fs_prio *prio; + + fs_get_obj(prio, ft->node.parent); + + if (!list_is_last(&ft->node.list, &prio->node.children)) + return list_next_entry(ft, node.list); + return find_next_chained_ft(prio); +} + +static int update_root_ft_destroy(struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_flow_table *new_root_ft = NULL; + + if (root->root_ft != ft) + return 0; + + new_root_ft = find_next_ft(ft); + if (new_root_ft) { + int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft); + + if (err) { + mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", + ft->id); + return err; + } + root->root_ft = new_root_ft; + } + return 0; +} + +/* Connect flow table from previous priority to + * the next flow table. + */ +static int disconnect_flow_table(struct mlx5_flow_table *ft) +{ + struct mlx5_core_dev *dev = get_dev(&ft->node); + struct mlx5_flow_table *next_ft; + struct fs_prio *prio; + int err = 0; + + err = update_root_ft_destroy(ft); + if (err) + return err; + + fs_get_obj(prio, ft->node.parent); + if (!(list_first_entry(&prio->node.children, + struct mlx5_flow_table, + node.list) == ft)) + return 0; + + next_ft = find_next_chained_ft(prio); + err = connect_prev_fts(dev, next_ft, prio); + if (err) + mlx5_core_warn(dev, "Failed to disconnect flow table %d\n", + ft->id); + return err; +} + +int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + int err = 0; + + mutex_lock(&root->chain_lock); + err = disconnect_flow_table(ft); + if (err) { + mutex_unlock(&root->chain_lock); + return err; + } + if (tree_remove_node(&ft->node)) + mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n", + ft->id); + mutex_unlock(&root->chain_lock); + + return err; +} +EXPORT_SYMBOL(mlx5_destroy_flow_table); + +void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) +{ + if (tree_remove_node(&fg->node)) + mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n", + fg->id); +} + +struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; + int prio; + static struct fs_prio *fs_prio; + struct mlx5_flow_namespace *ns; + + if (!root_ns) + return NULL; + + switch (type) { + case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_KERNEL: + case MLX5_FLOW_NAMESPACE_LEFTOVERS: + prio = type; + break; + case MLX5_FLOW_NAMESPACE_FDB: + if (dev->priv.fdb_root_ns) + return &dev->priv.fdb_root_ns->ns; + else + return NULL; + default: + return NULL; + } + + fs_prio = find_prio(&root_ns->ns, prio); + if (!fs_prio) + return NULL; + + ns = list_first_entry(&fs_prio->node.children, + typeof(*ns), + node.list); + + return ns; +} +EXPORT_SYMBOL(mlx5_get_flow_namespace); + +static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned prio, int max_ft) +{ + struct fs_prio *fs_prio; + + fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL); + if (!fs_prio) + return ERR_PTR(-ENOMEM); + + fs_prio->node.type = FS_TYPE_PRIO; + tree_init_node(&fs_prio->node, 1, NULL); + tree_add_node(&fs_prio->node, &ns->node); + fs_prio->max_ft = max_ft; + fs_prio->prio = prio; + list_add_tail(&fs_prio->node.list, &ns->node.children); + + return fs_prio; +} + +static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace + *ns) +{ + ns->node.type = FS_TYPE_NAMESPACE; + + return ns; +} + +static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) +{ + struct mlx5_flow_namespace *ns; + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) + return ERR_PTR(-ENOMEM); + + fs_init_namespace(ns); + tree_init_node(&ns->node, 1, NULL); + tree_add_node(&ns->node, &prio->node); + list_add_tail(&ns->node.list, &prio->node.children); + + return ns; +} + +static int create_leaf_prios(struct mlx5_flow_namespace *ns, struct init_tree_node + *prio_metadata) +{ + struct fs_prio *fs_prio; + int i; + + for (i = 0; i < prio_metadata->num_leaf_prios; i++) { + fs_prio = fs_create_prio(ns, i, prio_metadata->max_ft); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + } + return 0; +} + +#define FLOW_TABLE_BIT_SZ 1 +#define GET_FLOW_TABLE_CAP(dev, offset) \ + ((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) + \ + offset / 32)) >> \ + (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ) +static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) +{ + int i; + + for (i = 0; i < caps->arr_sz; i++) { + if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i])) + return false; + } + return true; +} + +static int init_root_tree_recursive(struct mlx5_core_dev *dev, + struct init_tree_node *init_node, + struct fs_node *fs_parent_node, + struct init_tree_node *init_parent_node, + int index) +{ + int max_ft_level = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + max_ft_level); + struct mlx5_flow_namespace *fs_ns; + struct fs_prio *fs_prio; + struct fs_node *base; + int i; + int err; + + if (init_node->type == FS_TYPE_PRIO) { + if ((init_node->min_ft_level > max_ft_level) || + !has_required_caps(dev, &init_node->caps)) + return 0; + + fs_get_obj(fs_ns, fs_parent_node); + if (init_node->num_leaf_prios) + return create_leaf_prios(fs_ns, init_node); + fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + base = &fs_prio->node; + } else if (init_node->type == FS_TYPE_NAMESPACE) { + fs_get_obj(fs_prio, fs_parent_node); + fs_ns = fs_create_namespace(fs_prio); + if (IS_ERR(fs_ns)) + return PTR_ERR(fs_ns); + base = &fs_ns->node; + } else { + return -EINVAL; + } + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(dev, &init_node->children[i], + base, init_node, i); + if (err) + return err; + } + + return 0; +} + +static int init_root_tree(struct mlx5_core_dev *dev, + struct init_tree_node *init_node, + struct fs_node *fs_parent_node) +{ + int i; + struct mlx5_flow_namespace *fs_ns; + int err; + + fs_get_obj(fs_ns, fs_parent_node); + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(dev, &init_node->children[i], + &fs_ns->node, + init_node, i); + if (err) + return err; + } + return 0; +} + +static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev, + enum fs_flow_table_type + table_type) +{ + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_namespace *ns; + + /* Create the root namespace */ + root_ns = mlx5_vzalloc(sizeof(*root_ns)); + if (!root_ns) + return NULL; + + root_ns->dev = dev; + root_ns->table_type = table_type; + + ns = &root_ns->ns; + fs_init_namespace(ns); + mutex_init(&root_ns->chain_lock); + tree_init_node(&ns->node, 1, NULL); + tree_add_node(&ns->node, NULL); + + return root_ns; +} + +static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level); + +static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level) +{ + struct fs_prio *prio; + + fs_for_each_prio(prio, ns) { + /* This updates prio start_level and max_ft */ + set_prio_attrs_in_prio(prio, acc_level); + acc_level += prio->max_ft; + } + return acc_level; +} + +static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level) +{ + struct mlx5_flow_namespace *ns; + int acc_level_ns = acc_level; + + prio->start_level = acc_level; + fs_for_each_ns(ns, prio) + /* This updates start_level and max_ft of ns's priority descendants */ + acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); + if (!prio->max_ft) + prio->max_ft = acc_level_ns - prio->start_level; + WARN_ON(prio->max_ft < acc_level_ns - prio->start_level); +} + +static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) +{ + struct mlx5_flow_namespace *ns = &root_ns->ns; + struct fs_prio *prio; + int start_level = 0; + + fs_for_each_prio(prio, ns) { + set_prio_attrs_in_prio(prio, start_level); + start_level += prio->max_ft; + } +} + +static int init_root_ns(struct mlx5_core_dev *dev) +{ + + dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX); + if (IS_ERR_OR_NULL(dev->priv.root_ns)) + goto cleanup; + + if (init_root_tree(dev, &root_fs, &dev->priv.root_ns->ns.node)) + goto cleanup; + + set_prio_attrs(dev->priv.root_ns); + + return 0; + +cleanup: + mlx5_cleanup_fs(dev); + return -ENOMEM; +} + +static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev, + struct mlx5_flow_root_namespace *root_ns) +{ + struct fs_node *prio; + + if (!root_ns) + return; + + if (!list_empty(&root_ns->ns.node.children)) { + prio = list_first_entry(&root_ns->ns.node.children, + struct fs_node, + list); + if (tree_remove_node(prio)) + mlx5_core_warn(dev, + "Flow steering priority wasn't destroyed, refcount > 1\n"); + } + if (tree_remove_node(&root_ns->ns.node)) + mlx5_core_warn(dev, + "Flow steering namespace wasn't destroyed, refcount > 1\n"); + root_ns = NULL; +} + +static void cleanup_root_ns(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; + struct fs_prio *iter_prio; + + if (!MLX5_CAP_GEN(dev, nic_flow_table)) + return; + + if (!root_ns) + return; + + /* stage 1 */ + fs_for_each_prio(iter_prio, &root_ns->ns) { + struct fs_node *node; + struct mlx5_flow_namespace *iter_ns; + + fs_for_each_ns_or_ft(node, iter_prio) { + if (node->type == FS_TYPE_FLOW_TABLE) + continue; + fs_get_obj(iter_ns, node); + while (!list_empty(&iter_ns->node.children)) { + struct fs_prio *obj_iter_prio2; + struct fs_node *iter_prio2 = + list_first_entry(&iter_ns->node.children, + struct fs_node, + list); + + fs_get_obj(obj_iter_prio2, iter_prio2); + if (tree_remove_node(iter_prio2)) { + mlx5_core_warn(dev, + "Priority %d wasn't destroyed, refcount > 1\n", + obj_iter_prio2->prio); + return; + } + } + } + } + + /* stage 2 */ + fs_for_each_prio(iter_prio, &root_ns->ns) { + while (!list_empty(&iter_prio->node.children)) { + struct fs_node *iter_ns = + list_first_entry(&iter_prio->node.children, + struct fs_node, + list); + if (tree_remove_node(iter_ns)) { + mlx5_core_warn(dev, + "Namespace wasn't destroyed, refcount > 1\n"); + return; + } + } + } + + /* stage 3 */ + while (!list_empty(&root_ns->ns.node.children)) { + struct fs_prio *obj_prio_node; + struct fs_node *prio_node = + list_first_entry(&root_ns->ns.node.children, + struct fs_node, + list); + + fs_get_obj(obj_prio_node, prio_node); + if (tree_remove_node(prio_node)) { + mlx5_core_warn(dev, + "Priority %d wasn't destroyed, refcount > 1\n", + obj_prio_node->prio); + return; + } + } + + if (tree_remove_node(&root_ns->ns.node)) { + mlx5_core_warn(dev, + "root namespace wasn't destroyed, refcount > 1\n"); + return; + } + + dev->priv.root_ns = NULL; +} + +void mlx5_cleanup_fs(struct mlx5_core_dev *dev) +{ + cleanup_root_ns(dev); + cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); +} + +static int init_fdb_root_ns(struct mlx5_core_dev *dev) +{ + struct fs_prio *prio; + + dev->priv.fdb_root_ns = create_root_ns(dev, FS_FT_FDB); + if (!dev->priv.fdb_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1); + if (IS_ERR(prio)) { + cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); + return PTR_ERR(prio); + } else { + return 0; + } +} + +int mlx5_init_fs(struct mlx5_core_dev *dev) +{ + int err = 0; + + if (MLX5_CAP_GEN(dev, nic_flow_table)) { + err = init_root_ns(dev); + if (err) + return err; + } + if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { + err = init_fdb_root_ns(dev); + if (err) + cleanup_root_ns(dev); + } + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h new file mode 100644 index 000000000000..00245fd7e4bc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CORE_ +#define _MLX5_FS_CORE_ + +#include <linux/mlx5/fs.h> + +enum fs_node_type { + FS_TYPE_NAMESPACE, + FS_TYPE_PRIO, + FS_TYPE_FLOW_TABLE, + FS_TYPE_FLOW_GROUP, + FS_TYPE_FLOW_ENTRY, + FS_TYPE_FLOW_DEST +}; + +enum fs_flow_table_type { + FS_FT_NIC_RX = 0x0, + FS_FT_FDB = 0X4, +}; + +enum fs_fte_status { + FS_FTE_STATUS_EXISTING = 1UL << 0, +}; + +struct fs_node { + struct list_head list; + struct list_head children; + enum fs_node_type type; + struct fs_node *parent; + struct fs_node *root; + /* lock the node for writing and traversing */ + struct mutex lock; + atomic_t refcount; + void (*remove_func)(struct fs_node *); +}; + +struct mlx5_flow_rule { + struct fs_node node; + struct mlx5_flow_destination dest_attr; +}; + +/* Type of children is mlx5_flow_group */ +struct mlx5_flow_table { + struct fs_node node; + u32 id; + unsigned int max_fte; + unsigned int level; + enum fs_flow_table_type type; + struct { + bool active; + unsigned int required_groups; + unsigned int num_groups; + } autogroup; +}; + +/* Type of children is mlx5_flow_rule */ +struct fs_fte { + struct fs_node node; + u32 val[MLX5_ST_SZ_DW(fte_match_param)]; + u32 dests_size; + u32 flow_tag; + u32 index; + u32 action; + enum fs_fte_status status; +}; + +/* Type of children is mlx5_flow_table/namespace */ +struct fs_prio { + struct fs_node node; + unsigned int max_ft; + unsigned int start_level; + unsigned int prio; + unsigned int num_ft; +}; + +/* Type of children is fs_prio */ +struct mlx5_flow_namespace { + /* parent == NULL => root ns */ + struct fs_node node; +}; + +struct mlx5_flow_group_mask { + u8 match_criteria_enable; + u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; +}; + +/* Type of children is fs_fte */ +struct mlx5_flow_group { + struct fs_node node; + struct mlx5_flow_group_mask mask; + u32 start_index; + u32 max_ftes; + u32 num_ftes; + u32 id; +}; + +struct mlx5_flow_root_namespace { + struct mlx5_flow_namespace ns; + enum fs_flow_table_type table_type; + struct mlx5_core_dev *dev; + struct mlx5_flow_table *root_ft; + /* Should be held when chaining flow tables */ + struct mutex chain_lock; +}; + +int mlx5_init_fs(struct mlx5_core_dev *dev); +void mlx5_cleanup_fs(struct mlx5_core_dev *dev); + +#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } + +#define fs_list_for_each_entry(pos, root) \ + list_for_each_entry(pos, root, node.list) + +#define fs_for_each_ns_or_ft_reverse(pos, prio) \ + list_for_each_entry_reverse(pos, &(prio)->node.children, list) + +#define fs_for_each_ns_or_ft(pos, prio) \ + list_for_each_entry(pos, (&(prio)->node.children), list) + +#define fs_for_each_prio(pos, ns) \ + fs_list_for_each_entry(pos, &(ns)->node.children) + +#define fs_for_each_ns(pos, prio) \ + fs_list_for_each_entry(pos, &(prio)->node.children) + +#define fs_for_each_ft(pos, prio) \ + fs_list_for_each_entry(pos, &(prio)->node.children) + +#define fs_for_each_fg(pos, ft) \ + fs_list_for_each_entry(pos, &(ft)->node.children) + +#define fs_for_each_fte(pos, fg) \ + fs_list_for_each_entry(pos, &(fg)->node.children) + +#define fs_for_each_dst(pos, fte) \ + fs_list_for_each_entry(pos, &(fte)->node.children) + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 9335e5ae18cc..aa1ab4702385 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -160,6 +160,30 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (err) return err; } + + if (MLX5_CAP_GEN(dev, vport_group_manager) && + MLX5_CAP_GEN(dev, eswitch_flow_table)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4ac8d4cc4973..67676cf0d507 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -49,6 +49,10 @@ #include <linux/delay.h> #include <linux/mlx5/mlx5_ifc.h> #include "mlx5_core.h" +#include "fs_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -454,6 +458,9 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) struct mlx5_reg_host_endianess he_out; int err; + if (!mlx5_core_is_pf(dev)) + return 0; + memset(&he_in, 0, sizeof(he_in)); he_in.he = MLX5_SET_HOST_ENDIANNESS; err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), @@ -462,42 +469,52 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) return err; } -static int mlx5_core_enable_hca(struct mlx5_core_dev *dev) +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) { + u32 out[MLX5_ST_SZ_DW(enable_hca_out)]; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)]; int err; - struct mlx5_enable_hca_mbox_in in; - struct mlx5_enable_hca_mbox_out out; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ENABLE_HCA); + memset(in, 0, sizeof(in)); + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return 0; + return mlx5_cmd_status_to_err_v2(out); } -static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) { + u32 out[MLX5_ST_SZ_DW(disable_hca_out)]; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)]; int err; - struct mlx5_disable_hca_mbox_in in; - struct mlx5_disable_hca_mbox_out out; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DISABLE_HCA); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + memset(in, 0, sizeof(in)); + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); + return mlx5_cmd_status_to_err_v2(out); +} - return 0; +cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev) +{ + u32 timer_h, timer_h1, timer_l; + + timer_h = ioread32be(&dev->iseg->internal_timer_h); + timer_l = ioread32be(&dev->iseg->internal_timer_l); + timer_h1 = ioread32be(&dev->iseg->internal_timer_h); + if (timer_h != timer_h1) /* wrap around */ + timer_l = ioread32be(&dev->iseg->internal_timer_l); + + return (cycle_t)timer_l | (cycle_t)timer_h1 << 32; } static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) @@ -942,7 +959,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_pagealloc_init(dev); - err = mlx5_core_enable_hca(dev); + err = mlx5_core_enable_hca(dev, 0); if (err) { dev_err(&pdev->dev, "enable hca failed\n"); goto err_pagealloc_cleanup; @@ -1052,6 +1069,25 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); + err = mlx5_init_fs(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init flow steering\n"); + goto err_fs; + } +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_init(dev); + if (err) { + dev_err(&pdev->dev, "eswitch init failed %d\n", err); + goto err_reg_dev; + } +#endif + + err = mlx5_sriov_init(dev); + if (err) { + dev_err(&pdev->dev, "sriov init failed %d\n", err); + goto err_sriov; + } + err = mlx5_register_device(dev); if (err) { dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); @@ -1068,7 +1104,16 @@ out: return 0; +err_sriov: + if (mlx5_sriov_cleanup(dev)) + dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); + +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif err_reg_dev: + mlx5_cleanup_fs(dev); +err_fs: mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); @@ -1106,7 +1151,7 @@ reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); err_disable_hca: - mlx5_core_disable_hca(dev); + mlx5_core_disable_hca(dev, 0); err_pagealloc_cleanup: mlx5_pagealloc_cleanup(dev); @@ -1123,6 +1168,13 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { int err = 0; + err = mlx5_sriov_cleanup(dev); + if (err) { + dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n", + __func__); + return err; + } + mutex_lock(&dev->intf_state_mutex); if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", @@ -1130,6 +1182,11 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } mlx5_unregister_device(dev); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif + + mlx5_cleanup_fs(dev); mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); @@ -1149,7 +1206,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); - mlx5_core_disable_hca(dev); + mlx5_core_disable_hca(dev, 0); mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); @@ -1195,6 +1252,7 @@ static int init_one(struct pci_dev *pdev, return -ENOMEM; } priv = &dev->priv; + priv->pci_dev_data = id->driver_data; pci_set_drvdata(pdev, dev); @@ -1365,12 +1423,12 @@ static const struct pci_error_handlers mlx5_err_handler = { }; static const struct pci_device_id mlx5_core_pci_table[] = { - { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ - { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */ - { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ - { PCI_VDEVICE(MELLANOX, 0x1014) }, /* ConnectX-4 VF */ - { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ - { PCI_VDEVICE(MELLANOX, 0x1016) }, /* ConnectX-4LX VF */ + { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ + { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ + { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ + { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ + { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ + { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ { 0, } }; @@ -1381,7 +1439,8 @@ static struct pci_driver mlx5_core_driver = { .id_table = mlx5_core_pci_table, .probe = init_one, .remove = remove_one, - .err_handler = &mlx5_err_handler + .err_handler = &mlx5_err_handler, + .sriov_configure = mlx5_core_sriov_configure, }; static int __init init(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index cee5b7a839bc..0336847ec9a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -36,6 +36,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/if_link.h> #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "3.0-1" @@ -64,6 +65,9 @@ do { \ (__dev)->priv.name, __func__, __LINE__, current->pid, \ ##__VA_ARGS__) +#define mlx5_core_info(__dev, format, ...) \ + dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) + enum { MLX5_CMD_DATA, /* print command payload only */ MLX5_CMD_TIME, /* print command execution time */ @@ -90,6 +94,11 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); +cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev); void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 4d3377b12657..9eeee0545f1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -33,6 +33,7 @@ #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/delay.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" @@ -95,6 +96,7 @@ struct mlx5_manage_pages_outbox { enum { MAX_RECLAIM_TIME_MSECS = 5000, + MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60, }; enum { @@ -352,6 +354,10 @@ retry: goto out_4k; } + dev->priv.fw_pages += npages; + if (func_id) + dev->priv.vfs_pages += npages; + mlx5_core_dbg(dev, "err %d\n", err); kvfree(in); @@ -405,6 +411,12 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, } num_claimed = be32_to_cpu(out->num_entries); + if (num_claimed > npages) { + mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n", + num_claimed, npages); + err = -EINVAL; + goto out_free; + } if (nclaimed) *nclaimed = num_claimed; @@ -412,6 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, addr = be64_to_cpu(out->pas[i]); free_4k(dev, addr); } + dev->priv.fw_pages -= num_claimed; + if (func_id) + dev->priv.vfs_pages -= num_claimed; out_free: kvfree(out); @@ -548,3 +563,26 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) { destroy_workqueue(dev->priv.pg_wq); } + +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +{ + unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + int prev_vfs_pages = dev->priv.vfs_pages; + + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + dev->priv.name); + while (dev->priv.vfs_pages) { + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + return -ETIMEDOUT; + } + if (dev->priv.vfs_pages < prev_vfs_pages) { + end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + prev_vfs_pages = dev->priv.vfs_pages; + } + msleep(50); + } + + mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c new file mode 100644 index 000000000000..7b24386794f9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/pci.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif + +static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf); + if (err) { + mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1); + } else { + sriov->vfs_ctx[vf - 1].enabled = 1; + mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1); + } + } +} + +static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + if (sriov->vfs_ctx[vf - 1].enabled) { + if (mlx5_core_disable_hca(dev, vf)) + mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1); + else + sriov->vfs_ctx[vf - 1].enabled = 0; + } + } +} + +static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + if (pci_num_vf(pdev)) + pci_disable_sriov(pdev); + + enable_vfs(dev, num_vfs); + + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "enable sriov failed %d\n", err); + goto ex; + } + + return 0; + +ex: + disable_vfs(dev, num_vfs); + return err; +} + +static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = num_vfs; + err = mlx5_core_create_vfs(pdev, num_vfs); + if (err) { + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + return err; + } + + return 0; +} + +static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + sriov->num_vfs = num_vfs; +} + +static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov; + + sriov = &dev->priv.sriov; + disable_vfs(dev, sriov->num_vfs); + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout claiming VFs pages\n"); + + sriov->num_vfs = 0; +} + +int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + mlx5_core_dbg(dev, "requsted num_vfs %d\n", num_vfs); + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + mlx5_core_cleanup_vfs(dev); + + if (!num_vfs) { +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_disable_sriov(dev->priv.eswitch); +#endif + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + if (!pci_vfs_assigned(pdev)) + pci_disable_sriov(pdev); + else + pr_info("unloading PF driver while leaving orphan VFs\n"); + return 0; + } + + err = mlx5_core_sriov_enable(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err); + return err; + } + + mlx5_core_init_vfs(dev, num_vfs); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs); +#endif + + return num_vfs; +} + +static int sync_required(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int cur_vfs = pci_num_vf(pdev); + + if (cur_vfs != sriov->num_vfs) { + pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs); + return 1; + } + + return 0; +} + +int mlx5_sriov_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct pci_dev *pdev = dev->pdev; + int cur_vfs; + + if (!mlx5_core_is_pf(dev)) + return 0; + + if (!sync_required(dev->pdev)) + return 0; + + cur_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = cur_vfs; + + mlx5_core_init_vfs(dev, cur_vfs); +#ifdef CONFIG_MLX5_CORE_EN + if (cur_vfs) + mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs); +#endif + + enable_vfs(dev, cur_vfs); + + return 0; +} + +int mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + if (!mlx5_core_is_pf(dev)) + return 0; + + err = mlx5_core_sriov_configure(pdev, 0); + if (err) + return err; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b94177ebcf3a..076197efea9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,54 +36,399 @@ #include <linux/mlx5/vport.h> #include "mlx5_core.h" -u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) +static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; - u32 out[MLX5_ST_SZ_DW(query_vport_state_out)]; int err; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; memset(in, 0, sizeof(in)); MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, - sizeof(out)); + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); if (err) mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n"); + return err; +} + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + return MLX5_GET(query_vport_state_out, out, state); } -EXPORT_SYMBOL(mlx5_query_vport_state); +EXPORT_SYMBOL_GPL(mlx5_query_vport_state); + +u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + + return MLX5_GET(query_vport_state_out, out, admin_state); +} +EXPORT_SYMBOL(mlx5_query_vport_admin_state); -void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 state) +{ + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]; + u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_vport_state_in, in, opcode, + MLX5_CMD_OP_MODIFY_VPORT_STATE); + MLX5_SET(modify_vport_state_in, in, op_mod, opmod); + MLX5_SET(modify_vport_state_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_vport_state_in, in, other_vport, 1); + + MLX5_SET(modify_vport_state_in, in, admin_state, state); + + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, + sizeof(out)); + if (err) + mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n"); + + return err; +} +EXPORT_SYMBOL(mlx5_modify_vport_admin_state); + +static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); +} + +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) { - u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); u8 *out_addr; + int err; out = mlx5_vzalloc(outlen); if (!out) - return; + return -ENOMEM; out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, nic_vport_context.permanent_address); + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + ether_addr_copy(addr, &out_addr[2]); + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); + +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + void *nic_vport_ctx; + u8 *perm_mac; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.permanent_address, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, + permanent_address); + + ether_addr_copy(&perm_mac[2], addr); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address); + +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; MLX5_SET(query_nic_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); - memset(out, 0, outlen); - mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); - ether_addr_copy(addr, &out_addr[2]); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; - kvfree(out); + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(addr_list[i], mac_addr); + } +out: + kfree(out); + return err; } -EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); + +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, list_type); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(curr_mac, addr_list[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); + +int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, + u32 vport, + u16 vlans[], + int *size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int req_list_size; + int max_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *size; + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, + MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *size = req_list_size; + for (i = 0; i < req_list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans); + +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, @@ -343,3 +688,65 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); + +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + *promisc_uc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_uc); + *promisc_mc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_mc); + *promisc_all = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_all); + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); + +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_err(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_uc, promisc_uc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_mc, promisc_mc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_all, promisc_all); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index e36e12219c9b..ce26adcb4988 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -10,6 +10,14 @@ config MLXSW_CORE To compile this driver as a module, choose M here: the module will be called mlxsw_core. +config MLXSW_CORE_HWMON + bool "HWMON support for Mellanox Technologies Switch ASICs" + depends on MLXSW_CORE && HWMON + depends on !(MLXSW_CORE=y && HWMON=m) + default y + ---help--- + Say Y here if you want to expose HWMON interface on mlxsw devices. + config MLXSW_PCI tristate "PCI bus implementation for Mellanox Technologies Switch ASICs" depends on PCI && HAS_DMA && HAS_IOMEM && MLXSW_CORE @@ -33,7 +41,7 @@ config MLXSW_SWITCHX2 config MLXSW_SPECTRUM tristate "Mellanox Technologies Spectrum support" - depends on MLXSW_CORE && NET_SWITCHDEV + depends on MLXSW_CORE && NET_SWITCHDEV && VLAN_8021Q default m ---help--- This driver supports Mellanox Technologies Spectrum Ethernet diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index af015818fd19..584cac444852 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o mlxsw_core-objs := core.o +mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o mlxsw_pci-objs := pci.o obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 97f0d93caf99..22379eb8e924 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -105,6 +105,10 @@ struct mlxsw_core { struct debugfs_blob_wrapper vsd_blob; struct debugfs_blob_wrapper psid_blob; } dbg; + struct { + u8 *mapping; /* lag_id+port_index to local_port mapping */ + } lag; + struct mlxsw_hwmon *hwmon; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ }; @@ -814,6 +818,17 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_alloc_stats; } + if (mlxsw_driver->profile->used_max_lag && + mlxsw_driver->profile->used_max_port_per_lag) { + alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag * + mlxsw_driver->profile->max_port_per_lag; + mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL); + if (!mlxsw_core->lag.mapping) { + err = -ENOMEM; + goto err_alloc_lag_mapping; + } + } + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile); if (err) goto err_bus_init; @@ -822,6 +837,10 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_emad_init; + err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); + if (err) + goto err_hwmon_init; + err = mlxsw_driver->init(mlxsw_core->driver_priv, mlxsw_core, mlxsw_bus_info); if (err) @@ -836,10 +855,13 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, err_debugfs_init: mlxsw_core->driver->fini(mlxsw_core->driver_priv); err_driver_init: +err_hwmon_init: mlxsw_emad_fini(mlxsw_core); err_emad_init: mlxsw_bus->fini(bus_priv); err_bus_init: + kfree(mlxsw_core->lag.mapping); +err_alloc_lag_mapping: free_percpu(mlxsw_core->pcpu_stats); err_alloc_stats: kfree(mlxsw_core); @@ -857,6 +879,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) mlxsw_core->driver->fini(mlxsw_core->driver_priv); mlxsw_emad_fini(mlxsw_core); mlxsw_core->bus->fini(mlxsw_core->bus_priv); + kfree(mlxsw_core->lag.mapping); free_percpu(mlxsw_core->pcpu_stats); kfree(mlxsw_core); mlxsw_core_driver_put(device_kind); @@ -1188,11 +1211,25 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, struct mlxsw_rx_listener_item *rxl_item; const struct mlxsw_rx_listener *rxl; struct mlxsw_core_pcpu_stats *pcpu_stats; - u8 local_port = rx_info->sys_port; + u8 local_port; bool found = false; - dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: sys_port = %d, trap_id = 0x%x\n", - __func__, rx_info->sys_port, rx_info->trap_id); + if (rx_info->is_lag) { + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: lag_id = %d, lag_port_index = 0x%x\n", + __func__, rx_info->u.lag_id, + rx_info->trap_id); + /* Upper layer does not care if the skb came from LAG or not, + * so just get the local_port for the lag port and push it up. + */ + local_port = mlxsw_core_lag_mapping_get(mlxsw_core, + rx_info->u.lag_id, + rx_info->lag_port_index); + } else { + local_port = rx_info->u.sys_port; + } + + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: local_port = %d, trap_id = 0x%x\n", + __func__, local_port, rx_info->trap_id); if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) || (local_port >= MLXSW_PORT_MAX_PORTS)) @@ -1236,6 +1273,48 @@ drop: } EXPORT_SYMBOL(mlxsw_core_skb_receive); +static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + return mlxsw_core->driver->profile->max_port_per_lag * lag_id + + port_index; +} + +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u8 local_port) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + mlxsw_core->lag.mapping[index] = local_port; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_set); + +u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + return mlxsw_core->lag.mapping[index]; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_get); + +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 local_port) +{ + int i; + + for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) { + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, i); + + if (mlxsw_core->lag.mapping[index] == local_port) + mlxsw_core->lag.mapping[index] = 0; + } +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear); + int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, u32 in_mod, bool out_mbox_direct, char *in_mbox, size_t in_mbox_size, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 807827350a89..a01723600f0a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -112,13 +112,25 @@ int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload); struct mlxsw_rx_info { - u16 sys_port; + bool is_lag; + union { + u16 sys_port; + u16 lag_id; + } u; + u8 lag_port_index; int trap_id; }; void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, struct mlxsw_rx_info *rx_info); +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u8 local_port); +u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index); +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 local_port); + #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 struct mlxsw_swid_config { @@ -209,4 +221,24 @@ struct mlxsw_bus_info { u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; }; +struct mlxsw_hwmon; + +#ifdef CONFIG_MLXSW_CORE_HWMON + +int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon); +void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon); + +#else + +static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon) +{ + return 0; +} + +#endif + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c new file mode 100644 index 000000000000..1ac8bf187168 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -0,0 +1,372 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/device.h> +#include <linux/sysfs.h> +#include <linux/hwmon.h> +#include <linux/err.h> + +#include "core.h" + +#define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127 +#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \ + MLXSW_MFCR_TACHOS_MAX + MLXSW_MFCR_PWMS_MAX) + +struct mlxsw_hwmon_attr { + struct device_attribute dev_attr; + struct mlxsw_hwmon *hwmon; + unsigned int type_index; + char name[32]; +}; + +struct mlxsw_hwmon { + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + struct device *hwmon_dev; + struct attribute_group group; + const struct attribute_group *groups[2]; + struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1]; + struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; + unsigned int attrs_count; +}; + +static ssize_t mlxsw_hwmon_temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned int temp; + int err; + + mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, + false, false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + return sprintf(buf, "%u\n", temp); +} + +static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned int temp_max; + int err; + + mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, + false, false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL); + return sprintf(buf, "%u\n", temp_max); +} + +static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned long val; + int err; + + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val != 1) + return -EINVAL; + + mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, true, true); + err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n"); + return err; + } + return len; +} + +static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsm_pl[MLXSW_REG_MFSM_LEN]; + int err; + + mlxsw_reg_mfsm_pack(mfsm_pl, mlwsw_hwmon_attr->type_index); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsm), mfsm_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); + return err; + } + return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl)); +} + +static ssize_t mlxsw_hwmon_pwm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + int err; + + mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, 0); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query PWM\n"); + return err; + } + return sprintf(buf, "%u\n", + mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl)); +} + +static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + unsigned long val; + int err; + + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val > 255) + return -EINVAL; + + mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, val); + err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to write PWM\n"); + return err; + } + return len; +} + +enum mlxsw_hwmon_attr_type { + MLXSW_HWMON_ATTR_TYPE_TEMP, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, + MLXSW_HWMON_ATTR_TYPE_TEMP_RST, + MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + MLXSW_HWMON_ATTR_TYPE_PWM, +}; + +static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, + enum mlxsw_hwmon_attr_type attr_type, + unsigned int type_index, unsigned int num) { + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr; + unsigned int attr_index; + + attr_index = mlxsw_hwmon->attrs_count; + mlxsw_hwmon_attr = &mlxsw_hwmon->hwmon_attrs[attr_index]; + + switch (attr_type) { + case MLXSW_HWMON_ATTR_TYPE_TEMP: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MAX: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_max_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_highest", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_RST: + mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_temp_rst_store; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IWUSR; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_reset_history", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_FAN_RPM: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_rpm_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "fan%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_PWM: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show; + mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IWUSR | S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "pwm%u", num + 1); + break; + default: + WARN_ON(1); + } + + mlxsw_hwmon_attr->type_index = type_index; + mlxsw_hwmon_attr->hwmon = mlxsw_hwmon; + mlxsw_hwmon_attr->dev_attr.attr.name = mlxsw_hwmon_attr->name; + sysfs_attr_init(&mlxsw_hwmon_attr->dev_attr.attr); + + mlxsw_hwmon->attrs[attr_index] = &mlxsw_hwmon_attr->dev_attr.attr; + mlxsw_hwmon->attrs_count++; +} + +static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + char mtcap_pl[MLXSW_REG_MTCAP_LEN]; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u8 sensor_count; + int i; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtcap), mtcap_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n"); + return err; + } + sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); + for (i = 0; i < sensor_count; i++) { + mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true); + err = mlxsw_reg_write(mlxsw_hwmon->core, + MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n", + i); + return err; + } + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP, i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_RST, i, i); + } + return 0; +} + +static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + char mfcr_pl[MLXSW_REG_MFCR_LEN]; + enum mlxsw_reg_mfcr_pwm_frequency freq; + unsigned int type_index; + unsigned int num; + u16 tacho_active; + u8 pwm_active; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfcr), mfcr_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get to probe PWMs and Tachometers\n"); + return err; + } + mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active); + num = 0; + for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) { + if (tacho_active & BIT(type_index)) + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + type_index, num++); + } + num = 0; + for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) { + if (pwm_active & BIT(type_index)) + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_PWM, + type_index, num++); + } + return 0; +} + +int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon) +{ + struct mlxsw_hwmon *mlxsw_hwmon; + struct device *hwmon_dev; + int err; + + mlxsw_hwmon = devm_kzalloc(mlxsw_bus_info->dev, sizeof(*mlxsw_hwmon), + GFP_KERNEL); + if (!mlxsw_hwmon) + return -ENOMEM; + mlxsw_hwmon->core = mlxsw_core; + mlxsw_hwmon->bus_info = mlxsw_bus_info; + + err = mlxsw_hwmon_temp_init(mlxsw_hwmon); + if (err) + goto err_temp_init; + + err = mlxsw_hwmon_fans_init(mlxsw_hwmon); + if (err) + goto err_fans_init; + + mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group; + mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs; + + hwmon_dev = devm_hwmon_device_register_with_groups(mlxsw_bus_info->dev, + "mlxsw", + mlxsw_hwmon, + mlxsw_hwmon->groups); + if (IS_ERR(hwmon_dev)) { + err = PTR_ERR(hwmon_dev); + goto err_hwmon_register; + } + + mlxsw_hwmon->hwmon_dev = hwmon_dev; + *p_hwmon = mlxsw_hwmon; + return 0; + +err_hwmon_register: +err_fans_init: +err_temp_init: + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index de69e719dc9d..c071077aafbd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -384,7 +384,7 @@ static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, /* Set CQ of same number of this SDQ. */ mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num); - mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, 7); + mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, 3); mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); @@ -686,11 +686,15 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); - /* We do not support lag now */ - if (mlxsw_pci_cqe_lag_get(cqe)) - goto drop; + if (mlxsw_pci_cqe_lag_get(cqe)) { + rx_info.is_lag = true; + rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe); + rx_info.lag_port_index = mlxsw_pci_cqe_lag_port_index_get(cqe); + } else { + rx_info.is_lag = false; + rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe); + } - rx_info.sys_port = mlxsw_pci_cqe_system_port_get(cqe); rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); byte_count = mlxsw_pci_cqe_byte_count_get(cqe); @@ -699,7 +703,6 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); -put_new_skb: memset(wqe, 0, q->elem_size); err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); if (err) @@ -708,10 +711,6 @@ put_new_skb: q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); return; - -drop: - dev_kfree_skb_any(skb); - goto put_new_skb; } static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index 142f33d978c5..912106054ff2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -129,13 +129,15 @@ MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false); */ MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1); -/* pci_cqe_system_port +/* pci_cqe_system_port/lag_id * When lag=0: System port on which the packet was received * When lag=1: * bits [15:4] LAG ID on which the packet was received * bits [3:0] sub_port on which the packet was received */ MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16); +MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12); +MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4); /* pci_cqe_wqe_counter * WQE count of the WQEs completed on the associated dqn diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 236fb5d2ad69..0c5237264e3e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -99,6 +99,55 @@ static const struct mlxsw_reg_info mlxsw_reg_spad = { */ MLXSW_ITEM_BUF(reg, spad, base_mac, 0x02, 6); +/* SMID - Switch Multicast ID + * -------------------------- + * The MID record maps from a MID (Multicast ID), which is a unique identifier + * of the multicast group within the stacking domain, into a list of local + * ports into which the packet is replicated. + */ +#define MLXSW_REG_SMID_ID 0x2007 +#define MLXSW_REG_SMID_LEN 0x240 + +static const struct mlxsw_reg_info mlxsw_reg_smid = { + .id = MLXSW_REG_SMID_ID, + .len = MLXSW_REG_SMID_LEN, +}; + +/* reg_smid_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, smid, swid, 0x00, 24, 8); + +/* reg_smid_mid + * Multicast identifier - global identifier that represents the multicast group + * across all devices. + * Access: Index + */ +MLXSW_ITEM32(reg, smid, mid, 0x00, 0, 16); + +/* reg_smid_port + * Local port memebership (1 bit per port). + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, smid, port, 0x20, 0x20, 1); + +/* reg_smid_port_mask + * Local port mask (1 bit per port). + * Access: W + */ +MLXSW_ITEM_BIT_ARRAY(reg, smid, port_mask, 0x220, 0x20, 1); + +static inline void mlxsw_reg_smid_pack(char *payload, u16 mid, + u8 port, bool set) +{ + MLXSW_REG_ZERO(smid, payload); + mlxsw_reg_smid_swid_set(payload, 0); + mlxsw_reg_smid_mid_set(payload, mid); + mlxsw_reg_smid_port_set(payload, port, set); + mlxsw_reg_smid_port_mask_set(payload, port, 1); +} + /* SSPR - Switch System Port Record Register * ----------------------------------------- * Configures the system port to local port mapping. @@ -286,6 +335,8 @@ MLXSW_ITEM32_INDEXED(reg, sfd, rec_swid, MLXSW_REG_SFD_BASE_LEN, 24, 8, enum mlxsw_reg_sfd_rec_type { MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1, + MLXSW_REG_SFD_REC_TYPE_MULTICAST = 0x2, }; /* reg_sfd_rec_type @@ -376,36 +427,144 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_ITEM32_INDEXED(reg, sfd, uc_system_port, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_REG_SFD_REC_LEN, 0x0C, false); -static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, - enum mlxsw_reg_sfd_rec_policy policy, - const char *mac, u16 vid, - enum mlxsw_reg_sfd_rec_action action, - u8 local_port) +static inline void mlxsw_reg_sfd_rec_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_type rec_type, + const char *mac, + enum mlxsw_reg_sfd_rec_action action) { u8 num_rec = mlxsw_reg_sfd_num_rec_get(payload); if (rec_index >= num_rec) mlxsw_reg_sfd_num_rec_set(payload, rec_index + 1); mlxsw_reg_sfd_rec_swid_set(payload, rec_index, 0); - mlxsw_reg_sfd_rec_type_set(payload, rec_index, - MLXSW_REG_SFD_REC_TYPE_UNICAST); - mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); + mlxsw_reg_sfd_rec_type_set(payload, rec_index, rec_type); mlxsw_reg_sfd_rec_mac_memcpy_to(payload, rec_index, mac); - mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0); - mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, vid); mlxsw_reg_sfd_rec_action_set(payload, rec_index, action); +} + +static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 fid_vid, + enum mlxsw_reg_sfd_rec_action action, + u8 local_port) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST, mac, action); + mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); + mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0); + mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, fid_vid); mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port); } static inline void mlxsw_reg_sfd_uc_unpack(char *payload, int rec_index, - char *mac, u16 *p_vid, + char *mac, u16 *p_fid_vid, u8 *p_local_port) { mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac); - *p_vid = mlxsw_reg_sfd_uc_fid_vid_get(payload, rec_index); + *p_fid_vid = mlxsw_reg_sfd_uc_fid_vid_get(payload, rec_index); *p_local_port = mlxsw_reg_sfd_uc_system_port_get(payload, rec_index); } +/* reg_sfd_uc_lag_sub_port + * LAG sub port. + * Must be 0 if multichannel VEPA is not enabled. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_fid_vid + * Filtering ID or VLAN ID + * For SwitchX and SwitchX-2: + * - Dynamic entries (policy 2,3) use FID + * - Static entries (policy 0) use VID + * - When independent learning is configured, VID=FID + * For Spectrum: use FID for both Dynamic and Static entries. + * VID should not be used. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_lag_vid + * Indicates VID in case of vFIDs. Reserved for FIDs. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_vid, MLXSW_REG_SFD_BASE_LEN, 16, 12, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +/* reg_sfd_uc_lag_lag_id + * LAG Identifier - pointer into the LAG descriptor table. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_id, MLXSW_REG_SFD_BASE_LEN, 0, 10, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void +mlxsw_reg_sfd_uc_lag_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 fid_vid, + enum mlxsw_reg_sfd_rec_action action, u16 lag_vid, + u16 lag_id) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG, + mac, action); + mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); + mlxsw_reg_sfd_uc_lag_sub_port_set(payload, rec_index, 0); + mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, fid_vid); + mlxsw_reg_sfd_uc_lag_lag_vid_set(payload, rec_index, lag_vid); + mlxsw_reg_sfd_uc_lag_lag_id_set(payload, rec_index, lag_id); +} + +static inline void mlxsw_reg_sfd_uc_lag_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_lag_id) +{ + mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfd_uc_lag_fid_vid_get(payload, rec_index); + *p_lag_id = mlxsw_reg_sfd_uc_lag_lag_id_get(payload, rec_index); +} + +/* reg_sfd_mc_pgi + * + * Multicast port group index - index into the port group table. + * Value 0x1FFF indicates the pgi should point to the MID entry. + * For Spectrum this value must be set to 0x1FFF + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, mc_pgi, MLXSW_REG_SFD_BASE_LEN, 16, 13, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_mc_fid_vid + * + * Filtering ID or VLAN ID + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, mc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_mc_mid + * + * Multicast identifier - global identifier that represents the multicast + * group across all devices. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, mc_mid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void +mlxsw_reg_sfd_mc_pack(char *payload, int rec_index, + const char *mac, u16 fid_vid, + enum mlxsw_reg_sfd_rec_action action, u16 mid) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_MULTICAST, mac, action); + mlxsw_reg_sfd_mc_pgi_set(payload, rec_index, 0x1FFF); + mlxsw_reg_sfd_mc_fid_vid_set(payload, rec_index, fid_vid); + mlxsw_reg_sfd_mc_mid_set(payload, rec_index, mid); +} + /* SFN - Switch FDB Notification Register * ------------------------------------------- * The switch provides notifications on newly learned FDB entries and @@ -456,8 +615,12 @@ MLXSW_ITEM32_INDEXED(reg, sfn, rec_swid, MLXSW_REG_SFN_BASE_LEN, 24, 8, enum mlxsw_reg_sfn_rec_type { /* MAC addresses learned on a regular port. */ MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC = 0x5, - /* Aged-out MAC address on a regular port */ + /* MAC addresses learned on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG = 0x6, + /* Aged-out MAC address on a regular port. */ MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7, + /* Aged-out MAC address on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8, }; /* reg_sfn_rec_type @@ -505,6 +668,22 @@ static inline void mlxsw_reg_sfn_mac_unpack(char *payload, int rec_index, *p_local_port = mlxsw_reg_sfn_mac_system_port_get(payload, rec_index); } +/* reg_sfn_mac_lag_lag_id + * LAG ID (pointer into the LAG descriptor table). + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_lag_lag_id, MLXSW_REG_SFN_BASE_LEN, 0, 10, + MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_lag_id) +{ + mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index); + *p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index); +} + /* SPMS - Switch Port MSTP/RSTP State Register * ------------------------------------------- * Configures the spanning tree state of a physical port. @@ -865,6 +1044,293 @@ static inline void mlxsw_reg_sftr_pack(char *payload, mlxsw_reg_sftr_port_mask_set(payload, port, 1); } +/* SLDR - Switch LAG Descriptor Register + * ----------------------------------------- + * The switch LAG descriptor register is populated by LAG descriptors. + * Each LAG descriptor is indexed by lag_id. The LAG ID runs from 0 to + * max_lag-1. + */ +#define MLXSW_REG_SLDR_ID 0x2014 +#define MLXSW_REG_SLDR_LEN 0x0C /* counting in only one port in list */ + +static const struct mlxsw_reg_info mlxsw_reg_sldr = { + .id = MLXSW_REG_SLDR_ID, + .len = MLXSW_REG_SLDR_LEN, +}; + +enum mlxsw_reg_sldr_op { + /* Indicates a creation of a new LAG-ID, lag_id must be valid */ + MLXSW_REG_SLDR_OP_LAG_CREATE, + MLXSW_REG_SLDR_OP_LAG_DESTROY, + /* Ports that appear in the list have the Distributor enabled */ + MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST, + /* Removes ports from the disributor list */ + MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST, +}; + +/* reg_sldr_op + * Operation. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, op, 0x00, 29, 3); + +/* reg_sldr_lag_id + * LAG identifier. The lag_id is the index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, sldr, lag_id, 0x00, 0, 10); + +static inline void mlxsw_reg_sldr_lag_create_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_CREATE); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_sldr_lag_destroy_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_DESTROY); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +/* reg_sldr_num_ports + * The number of member ports of the LAG. + * Reserved for Create / Destroy operations + * For Add / Remove operations - indicates the number of ports in the list. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, num_ports, 0x04, 24, 8); + +/* reg_sldr_system_port + * System port. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sldr, system_port, 0x08, 0, 16, 4, 0, false); + +static inline void mlxsw_reg_sldr_lag_add_port_pack(char *payload, u8 lag_id, + u8 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +static inline void mlxsw_reg_sldr_lag_remove_port_pack(char *payload, u8 lag_id, + u8 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +/* SLCR - Switch LAG Configuration 2 Register + * ------------------------------------------- + * The Switch LAG Configuration register is used for configuring the + * LAG properties of the switch. + */ +#define MLXSW_REG_SLCR_ID 0x2015 +#define MLXSW_REG_SLCR_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_slcr = { + .id = MLXSW_REG_SLCR_ID, + .len = MLXSW_REG_SLCR_LEN, +}; + +enum mlxsw_reg_slcr_pp { + /* Global Configuration (for all ports) */ + MLXSW_REG_SLCR_PP_GLOBAL, + /* Per port configuration, based on local_port field */ + MLXSW_REG_SLCR_PP_PER_PORT, +}; + +/* reg_slcr_pp + * Per Port Configuration + * Note: Reading at Global mode results in reading port 1 configuration. + * Access: Index + */ +MLXSW_ITEM32(reg, slcr, pp, 0x00, 24, 1); + +/* reg_slcr_local_port + * Local port number + * Supported from CPU port + * Not supported from router port + * Reserved when pp = Global Configuration + * Access: Index + */ +MLXSW_ITEM32(reg, slcr, local_port, 0x00, 16, 8); + +enum mlxsw_reg_slcr_type { + MLXSW_REG_SLCR_TYPE_CRC, /* default */ + MLXSW_REG_SLCR_TYPE_XOR, + MLXSW_REG_SLCR_TYPE_RANDOM, +}; + +/* reg_slcr_type + * Hash type + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, type, 0x00, 0, 4); + +/* Ingress port */ +#define MLXSW_REG_SLCR_LAG_HASH_IN_PORT BIT(0) +/* SMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_IP BIT(1) +/* SMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP BIT(2) +#define MLXSW_REG_SLCR_LAG_HASH_SMAC \ + (MLXSW_REG_SLCR_LAG_HASH_SMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP) +/* DMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_IP BIT(3) +/* DMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP BIT(4) +#define MLXSW_REG_SLCR_LAG_HASH_DMAC \ + (MLXSW_REG_SLCR_LAG_HASH_DMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP) +/* Ethertype - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP BIT(5) +/* Ethertype - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP BIT(6) +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE \ + (MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP | \ + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP) +/* VLAN ID - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_IP BIT(7) +/* VLAN ID - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP BIT(8) +#define MLXSW_REG_SLCR_LAG_HASH_VLANID \ + (MLXSW_REG_SLCR_LAG_HASH_VLANID_IP | \ + MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP) +/* Source IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_SIP BIT(9) +/* Destination IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_DIP BIT(10) +/* TCP/UDP source port */ +#define MLXSW_REG_SLCR_LAG_HASH_SPORT BIT(11) +/* TCP/UDP destination port*/ +#define MLXSW_REG_SLCR_LAG_HASH_DPORT BIT(12) +/* IPv4 Protocol/IPv6 Next Header */ +#define MLXSW_REG_SLCR_LAG_HASH_IPPROTO BIT(13) +/* IPv6 Flow label */ +#define MLXSW_REG_SLCR_LAG_HASH_FLOWLABEL BIT(14) +/* SID - FCoE source ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_SID BIT(15) +/* DID - FCoE destination ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_DID BIT(16) +/* OXID - FCoE originator exchange ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_OXID BIT(17) +/* Destination QP number - for RoCE packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ROCE_DQP BIT(19) + +/* reg_slcr_lag_hash + * LAG hashing configuration. This is a bitmask, in which each set + * bit includes the corresponding item in the LAG hash calculation. + * The default lag_hash contains SMAC, DMAC, VLANID and + * Ethertype (for all packet types). + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20); + +static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash) +{ + MLXSW_REG_ZERO(slcr, payload); + mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL); + mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR); + mlxsw_reg_slcr_lag_hash_set(payload, lag_hash); +} + +/* SLCOR - Switch LAG Collector Register + * ------------------------------------- + * The Switch LAG Collector register controls the Local Port membership + * in a LAG and enablement of the collector. + */ +#define MLXSW_REG_SLCOR_ID 0x2016 +#define MLXSW_REG_SLCOR_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_slcor = { + .id = MLXSW_REG_SLCOR_ID, + .len = MLXSW_REG_SLCOR_LEN, +}; + +enum mlxsw_reg_slcor_col { + /* Port is added with collector disabled */ + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_DISABLED, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT, +}; + +/* reg_slcor_col + * Collector configuration + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, col, 0x00, 30, 2); + +/* reg_slcor_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, slcor, local_port, 0x00, 16, 8); + +/* reg_slcor_lag_id + * LAG Identifier. Index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, slcor, lag_id, 0x00, 0, 10); + +/* reg_slcor_port_index + * Port index in the LAG list. Only valid on Add Port to LAG col. + * Valid range is from 0 to cap_max_lag_members-1 + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, port_index, 0x04, 0, 10); + +static inline void mlxsw_reg_slcor_pack(char *payload, + u8 local_port, u16 lag_id, + enum mlxsw_reg_slcor_col col) +{ + MLXSW_REG_ZERO(slcor, payload); + mlxsw_reg_slcor_col_set(payload, col); + mlxsw_reg_slcor_local_port_set(payload, local_port); + mlxsw_reg_slcor_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_slcor_port_add_pack(char *payload, + u8 local_port, u16 lag_id, + u8 port_index) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT); + mlxsw_reg_slcor_port_index_set(payload, port_index); +} + +static inline void mlxsw_reg_slcor_port_remove_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT); +} + +static inline void mlxsw_reg_slcor_col_enable_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + +static inline void mlxsw_reg_slcor_col_disable_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + /* SPMLR - Switch Port MAC Learning Register * ----------------------------------------- * Controls the Switch MAC learning policy per port. @@ -2087,6 +2553,284 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id) mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT); } +/* MFCR - Management Fan Control Register + * -------------------------------------- + * This register controls the settings of the Fan Speed PWM mechanism. + */ +#define MLXSW_REG_MFCR_ID 0x9001 +#define MLXSW_REG_MFCR_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfcr = { + .id = MLXSW_REG_MFCR_ID, + .len = MLXSW_REG_MFCR_LEN, +}; + +enum mlxsw_reg_mfcr_pwm_frequency { + MLXSW_REG_MFCR_PWM_FEQ_11HZ = 0x00, + MLXSW_REG_MFCR_PWM_FEQ_14_7HZ = 0x01, + MLXSW_REG_MFCR_PWM_FEQ_22_1HZ = 0x02, + MLXSW_REG_MFCR_PWM_FEQ_1_4KHZ = 0x40, + MLXSW_REG_MFCR_PWM_FEQ_5KHZ = 0x41, + MLXSW_REG_MFCR_PWM_FEQ_20KHZ = 0x42, + MLXSW_REG_MFCR_PWM_FEQ_22_5KHZ = 0x43, + MLXSW_REG_MFCR_PWM_FEQ_25KHZ = 0x44, +}; + +/* reg_mfcr_pwm_frequency + * Controls the frequency of the PWM signal. + * Access: RW + */ +MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 6); + +#define MLXSW_MFCR_TACHOS_MAX 10 + +/* reg_mfcr_tacho_active + * Indicates which of the tachometer is active (bit per tachometer). + * Access: RO + */ +MLXSW_ITEM32(reg, mfcr, tacho_active, 0x04, 16, MLXSW_MFCR_TACHOS_MAX); + +#define MLXSW_MFCR_PWMS_MAX 5 + +/* reg_mfcr_pwm_active + * Indicates which of the PWM control is active (bit per PWM). + * Access: RO + */ +MLXSW_ITEM32(reg, mfcr, pwm_active, 0x04, 0, MLXSW_MFCR_PWMS_MAX); + +static inline void +mlxsw_reg_mfcr_pack(char *payload, + enum mlxsw_reg_mfcr_pwm_frequency pwm_frequency) +{ + MLXSW_REG_ZERO(mfcr, payload); + mlxsw_reg_mfcr_pwm_frequency_set(payload, pwm_frequency); +} + +static inline void +mlxsw_reg_mfcr_unpack(char *payload, + enum mlxsw_reg_mfcr_pwm_frequency *p_pwm_frequency, + u16 *p_tacho_active, u8 *p_pwm_active) +{ + *p_pwm_frequency = mlxsw_reg_mfcr_pwm_frequency_get(payload); + *p_tacho_active = mlxsw_reg_mfcr_tacho_active_get(payload); + *p_pwm_active = mlxsw_reg_mfcr_pwm_active_get(payload); +} + +/* MFSC - Management Fan Speed Control Register + * -------------------------------------------- + * This register controls the settings of the Fan Speed PWM mechanism. + */ +#define MLXSW_REG_MFSC_ID 0x9002 +#define MLXSW_REG_MFSC_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfsc = { + .id = MLXSW_REG_MFSC_ID, + .len = MLXSW_REG_MFSC_LEN, +}; + +/* reg_mfsc_pwm + * Fan pwm to control / monitor. + * Access: Index + */ +MLXSW_ITEM32(reg, mfsc, pwm, 0x00, 24, 3); + +/* reg_mfsc_pwm_duty_cycle + * Controls the duty cycle of the PWM. Value range from 0..255 to + * represent duty cycle of 0%...100%. + * Access: RW + */ +MLXSW_ITEM32(reg, mfsc, pwm_duty_cycle, 0x04, 0, 8); + +static inline void mlxsw_reg_mfsc_pack(char *payload, u8 pwm, + u8 pwm_duty_cycle) +{ + MLXSW_REG_ZERO(mfsc, payload); + mlxsw_reg_mfsc_pwm_set(payload, pwm); + mlxsw_reg_mfsc_pwm_duty_cycle_set(payload, pwm_duty_cycle); +} + +/* MFSM - Management Fan Speed Measurement + * --------------------------------------- + * This register controls the settings of the Tacho measurements and + * enables reading the Tachometer measurements. + */ +#define MLXSW_REG_MFSM_ID 0x9003 +#define MLXSW_REG_MFSM_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfsm = { + .id = MLXSW_REG_MFSM_ID, + .len = MLXSW_REG_MFSM_LEN, +}; + +/* reg_mfsm_tacho + * Fan tachometer index. + * Access: Index + */ +MLXSW_ITEM32(reg, mfsm, tacho, 0x00, 24, 4); + +/* reg_mfsm_rpm + * Fan speed (round per minute). + * Access: RO + */ +MLXSW_ITEM32(reg, mfsm, rpm, 0x04, 0, 16); + +static inline void mlxsw_reg_mfsm_pack(char *payload, u8 tacho) +{ + MLXSW_REG_ZERO(mfsm, payload); + mlxsw_reg_mfsm_tacho_set(payload, tacho); +} + +/* MTCAP - Management Temperature Capabilities + * ------------------------------------------- + * This register exposes the capabilities of the device and + * system temperature sensing. + */ +#define MLXSW_REG_MTCAP_ID 0x9009 +#define MLXSW_REG_MTCAP_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mtcap = { + .id = MLXSW_REG_MTCAP_ID, + .len = MLXSW_REG_MTCAP_LEN, +}; + +/* reg_mtcap_sensor_count + * Number of sensors supported by the device. + * This includes the QSFP module sensors (if exists in the QSFP module). + * Access: RO + */ +MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7); + +/* MTMP - Management Temperature + * ----------------------------- + * This register controls the settings of the temperature measurements + * and enables reading the temperature measurements. Note that temperature + * is in 0.125 degrees Celsius. + */ +#define MLXSW_REG_MTMP_ID 0x900A +#define MLXSW_REG_MTMP_LEN 0x20 + +static const struct mlxsw_reg_info mlxsw_reg_mtmp = { + .id = MLXSW_REG_MTMP_ID, + .len = MLXSW_REG_MTMP_LEN, +}; + +/* reg_mtmp_sensor_index + * Sensors index to access. + * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially + * (module 0 is mapped to sensor_index 64). + * Access: Index + */ +MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7); + +/* Convert to milli degrees Celsius */ +#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125) + +/* reg_mtmp_temperature + * Temperature reading from the sensor. Reading is in 0.125 Celsius + * degrees units. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, temperature, 0x04, 0, 16); + +/* reg_mtmp_mte + * Max Temperature Enable - enables measuring the max temperature on a sensor. + * Access: RW + */ +MLXSW_ITEM32(reg, mtmp, mte, 0x08, 31, 1); + +/* reg_mtmp_mtr + * Max Temperature Reset - clears the value of the max temperature register. + * Access: WO + */ +MLXSW_ITEM32(reg, mtmp, mtr, 0x08, 30, 1); + +/* reg_mtmp_max_temperature + * The highest measured temperature from the sensor. + * When the bit mte is cleared, the field max_temperature is reserved. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, max_temperature, 0x08, 0, 16); + +#define MLXSW_REG_MTMP_SENSOR_NAME_SIZE 8 + +/* reg_mtmp_sensor_name + * Sensor Name + * Access: RO + */ +MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE); + +static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index, + bool max_temp_enable, + bool max_temp_reset) +{ + MLXSW_REG_ZERO(mtmp, payload); + mlxsw_reg_mtmp_sensor_index_set(payload, sensor_index); + mlxsw_reg_mtmp_mte_set(payload, max_temp_enable); + mlxsw_reg_mtmp_mtr_set(payload, max_temp_reset); +} + +static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, + unsigned int *p_max_temp, + char *sensor_name) +{ + u16 temp; + + if (p_temp) { + temp = mlxsw_reg_mtmp_temperature_get(payload); + *p_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (p_max_temp) { + temp = mlxsw_reg_mtmp_max_temperature_get(payload); + *p_max_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (sensor_name) + mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); +} + +/* MLCR - Management LED Control Register + * -------------------------------------- + * Controls the system LEDs. + */ +#define MLXSW_REG_MLCR_ID 0x902B +#define MLXSW_REG_MLCR_LEN 0x0C + +static const struct mlxsw_reg_info mlxsw_reg_mlcr = { + .id = MLXSW_REG_MLCR_ID, + .len = MLXSW_REG_MLCR_LEN, +}; + +/* reg_mlcr_local_port + * Local port number. + * Access: RW + */ +MLXSW_ITEM32(reg, mlcr, local_port, 0x00, 16, 8); + +#define MLXSW_REG_MLCR_DURATION_MAX 0xFFFF + +/* reg_mlcr_beacon_duration + * Duration of the beacon to be active, in seconds. + * 0x0 - Will turn off the beacon. + * 0xFFFF - Will turn on the beacon until explicitly turned off. + * Access: RW + */ +MLXSW_ITEM32(reg, mlcr, beacon_duration, 0x04, 0, 16); + +/* reg_mlcr_beacon_remain + * Remaining duration of the beacon, in seconds. + * 0xFFFF indicates an infinite amount of time. + * Access: RO + */ +MLXSW_ITEM32(reg, mlcr, beacon_remain, 0x08, 0, 16); + +static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port, + bool active) +{ + MLXSW_REG_ZERO(mlcr, payload); + mlxsw_reg_mlcr_local_port_set(payload, local_port); + mlxsw_reg_mlcr_beacon_duration_set(payload, active ? + MLXSW_REG_MLCR_DURATION_MAX : 0); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -2357,6 +3101,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SGCR"; case MLXSW_REG_SPAD_ID: return "SPAD"; + case MLXSW_REG_SMID_ID: + return "SMID"; case MLXSW_REG_SSPR_ID: return "SSPR"; case MLXSW_REG_SFDAT_ID: @@ -2375,6 +3121,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SFGC"; case MLXSW_REG_SFTR_ID: return "SFTR"; + case MLXSW_REG_SLDR_ID: + return "SLDR"; + case MLXSW_REG_SLCR_ID: + return "SLCR"; + case MLXSW_REG_SLCOR_ID: + return "SLCOR"; case MLXSW_REG_SPMLR_ID: return "SPMLR"; case MLXSW_REG_SVFA_ID: @@ -2405,6 +3157,18 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "HTGT"; case MLXSW_REG_HPKT_ID: return "HPKT"; + case MLXSW_REG_MFCR_ID: + return "MFCR"; + case MLXSW_REG_MFSC_ID: + return "MFSC"; + case MLXSW_REG_MFSM_ID: + return "MFSM"; + case MLXSW_REG_MTCAP_ID: + return "MTCAP"; + case MLXSW_REG_MTMP_ID: + return "MTMP"; + case MLXSW_REG_MLCR_ID: + return "MLCR"; case MLXSW_REG_SBPR_ID: return "SBPR"; case MLXSW_REG_SBCM_ID: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3be4a2355ead..ce6845d534a8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -48,6 +48,7 @@ #include <linux/workqueue.h> #include <linux/jiffies.h> #include <linux/bitops.h> +#include <linux/list.h> #include <net/switchdev.h> #include <generated/utsrelease.h> @@ -186,33 +187,6 @@ static int mlxsw_sp_port_oper_status_get(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } -static int mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, u16 vfid) -{ - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - int err; - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, - MLXSW_SP_VFID_BASE + vfid, 0); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); - - if (err) - return err; - - set_bit(vfid, mlxsw_sp->active_vfids); - return 0; -} - -static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, u16 vfid) -{ - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - - clear_bit(vfid, mlxsw_sp->active_vfids); - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, - MLXSW_SP_VFID_BASE + vfid, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); -} - static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port, unsigned char *addr) { @@ -417,6 +391,10 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static void mlxsw_sp_set_rx_mode(struct net_device *dev) +{ +} + static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); @@ -545,12 +523,132 @@ static int mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } +static struct mlxsw_sp_vfid * +mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, u16 vid) +{ + struct mlxsw_sp_vfid *vfid; + + list_for_each_entry(vfid, &mlxsw_sp->port_vfids.list, list) { + if (vfid->vid == vid) + return vfid; + } + + return NULL; +} + +static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp) +{ + return find_first_zero_bit(mlxsw_sp->port_vfids.mapped, + MLXSW_SP_VFID_PORT_MAX); +} + +static int __mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, u16 vfid) +{ + u16 fid = mlxsw_sp_vfid_to_fid(vfid); + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static void __mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, u16 vfid) +{ + u16 fid = mlxsw_sp_vfid_to_fid(vfid); + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, fid, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static struct mlxsw_sp_vfid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, + u16 vid) +{ + struct device *dev = mlxsw_sp->bus_info->dev; + struct mlxsw_sp_vfid *vfid; + u16 n_vfid; + int err; + + n_vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp); + if (n_vfid == MLXSW_SP_VFID_PORT_MAX) { + dev_err(dev, "No available vFIDs\n"); + return ERR_PTR(-ERANGE); + } + + err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid); + if (err) { + dev_err(dev, "Failed to create vFID=%d\n", n_vfid); + return ERR_PTR(err); + } + + vfid = kzalloc(sizeof(*vfid), GFP_KERNEL); + if (!vfid) + goto err_allocate_vfid; + + vfid->vfid = n_vfid; + vfid->vid = vid; + + list_add(&vfid->list, &mlxsw_sp->port_vfids.list); + set_bit(n_vfid, mlxsw_sp->port_vfids.mapped); + + return vfid; + +err_allocate_vfid: + __mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid); + return ERR_PTR(-ENOMEM); +} + +static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vfid *vfid) +{ + clear_bit(vfid->vfid, mlxsw_sp->port_vfids.mapped); + list_del(&vfid->list); + + __mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid); + + kfree(vfid); +} + +static struct mlxsw_sp_port * +mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_vfid *vfid) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = kzalloc(sizeof(*mlxsw_sp_vport), GFP_KERNEL); + if (!mlxsw_sp_vport) + return NULL; + + /* dev will be set correctly after the VLAN device is linked + * with the real device. In case of bridge SELF invocation, dev + * will remain as is. + */ + mlxsw_sp_vport->dev = mlxsw_sp_port->dev; + mlxsw_sp_vport->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + mlxsw_sp_vport->local_port = mlxsw_sp_port->local_port; + mlxsw_sp_vport->stp_state = BR_STATE_FORWARDING; + mlxsw_sp_vport->lagged = mlxsw_sp_port->lagged; + mlxsw_sp_vport->lag_id = mlxsw_sp_port->lag_id; + mlxsw_sp_vport->vport.vfid = vfid; + mlxsw_sp_vport->vport.vid = vfid->vid; + + list_add(&mlxsw_sp_vport->vport.list, &mlxsw_sp_port->vports_list); + + return mlxsw_sp_vport; +} + +static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + list_del(&mlxsw_sp_vport->vport.list); + kfree(mlxsw_sp_vport); +} + int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char *sftr_pl; + struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_vfid *vfid; int err; /* VLAN 0 is added to HW filter when device goes up, but it is @@ -559,100 +657,105 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, if (!vid) return 0; - if (test_bit(vid, mlxsw_sp_port->active_vfids)) { + if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) { netdev_warn(dev, "VID=%d already configured\n", vid); return 0; } - if (!test_bit(vid, mlxsw_sp->active_vfids)) { - err = mlxsw_sp_vfid_create(mlxsw_sp, vid); - if (err) { - netdev_err(dev, "Failed to create vFID=%d\n", - MLXSW_SP_VFID_BASE + vid); - return err; + vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid); + if (!vfid) { + vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid); + if (IS_ERR(vfid)) { + netdev_err(dev, "Failed to create vFID for VID=%d\n", + vid); + return PTR_ERR(vfid); } + } - sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); - if (!sftr_pl) { - err = -ENOMEM; - goto err_flood_table_alloc; - } - mlxsw_reg_sftr_pack(sftr_pl, 0, vid, - MLXSW_REG_SFGC_TABLE_TYPE_FID, 0, - MLXSW_PORT_CPU_PORT, true); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); - kfree(sftr_pl); + mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vfid); + if (!mlxsw_sp_vport) { + netdev_err(dev, "Failed to create vPort for VID=%d\n", vid); + err = -ENOMEM; + goto err_port_vport_create; + } + + if (!vfid->nr_vports) { + err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, + true, false); if (err) { - netdev_err(dev, "Failed to configure flood table\n"); - goto err_flood_table_config; + netdev_err(dev, "Failed to setup flooding for vFID=%d\n", + vfid->vfid); + goto err_vport_flood_set; } } - /* In case we fail in the following steps, we intentionally do not - * destroy the associated vFID. - */ - /* When adding the first VLAN interface on a bridged port we need to * transition all the active 802.1Q bridge VLANs to use explicit * {Port, VID} to FID mappings and set the port's mode to Virtual mode. */ - if (!mlxsw_sp_port->nr_vfids) { + if (list_is_singular(&mlxsw_sp_port->vports_list)) { err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); if (err) { netdev_err(dev, "Failed to set to Virtual mode\n"); - return err; + goto err_port_vp_mode_trans; } } - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - true, MLXSW_SP_VFID_BASE + vid, vid); + true, + mlxsw_sp_vfid_to_fid(vfid->vfid), + vid); if (err) { netdev_err(dev, "Failed to map {Port, VID=%d} to vFID=%d\n", - vid, MLXSW_SP_VFID_BASE + vid); + vid, vfid->vfid); goto err_port_vid_to_fid_set; } - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); if (err) { netdev_err(dev, "Failed to disable learning for VID=%d\n", vid); goto err_port_vid_learning_set; } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, false); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, false); if (err) { netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", vid); goto err_port_add_vid; } - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, vid, + err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, MLXSW_REG_SPMS_STATE_FORWARDING); if (err) { netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); goto err_port_stp_state_set; } - mlxsw_sp_port->nr_vfids++; - set_bit(vid, mlxsw_sp_port->active_vfids); + vfid->nr_vports++; return 0; -err_flood_table_config: -err_flood_table_alloc: - mlxsw_sp_vfid_destroy(mlxsw_sp, vid); - return err; - err_port_stp_state_set: - mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); + mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); err_port_add_vid: - mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); + mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); err_port_vid_learning_set: - mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, + mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false, - MLXSW_SP_VFID_BASE + vid, vid); + mlxsw_sp_vfid_to_fid(vfid->vfid), vid); err_port_vid_to_fid_set: - mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); + if (list_is_singular(&mlxsw_sp_port->vports_list)) + mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); +err_port_vp_mode_trans: + if (!vfid->nr_vports) + mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, + false); +err_vport_flood_set: + mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); +err_port_vport_create: + if (!vfid->nr_vports) + mlxsw_sp_vfid_destroy(mlxsw_sp, vfid); return err; } @@ -660,6 +763,8 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_vfid *vfid; int err; /* VLAN 0 is removed from HW filter when device goes down, but @@ -668,38 +773,42 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, if (!vid) return 0; - if (!test_bit(vid, mlxsw_sp_port->active_vfids)) { + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (!mlxsw_sp_vport) { netdev_warn(dev, "VID=%d does not exist\n", vid); return 0; } - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, vid, + vfid = mlxsw_sp_vport->vport.vfid; + + err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, MLXSW_REG_SPMS_STATE_DISCARDING); if (err) { netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); return err; } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); if (err) { netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", vid); return err; } - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); if (err) { netdev_err(dev, "Failed to enable learning for VID=%d\n", vid); return err; } - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - false, MLXSW_SP_VFID_BASE + vid, + false, + mlxsw_sp_vfid_to_fid(vfid->vfid), vid); if (err) { netdev_err(dev, "Failed to invalidate {Port, VID=%d} to vFID=%d mapping\n", - vid, MLXSW_SP_VFID_BASE + vid); + vid, vfid->vfid); return err; } @@ -707,7 +816,7 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, * transition all active 802.1Q bridge VLANs to use VID to FID * mappings and set port's mode to VLAN mode. */ - if (mlxsw_sp_port->nr_vfids == 1) { + if (list_is_singular(&mlxsw_sp_port->vports_list)) { err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); if (err) { netdev_err(dev, "Failed to set to VLAN mode\n"); @@ -715,8 +824,12 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, } } - mlxsw_sp_port->nr_vfids--; - clear_bit(vid, mlxsw_sp_port->active_vfids); + vfid->nr_vports--; + mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); + + /* Destroy the vFID if no vPorts are assigned to it anymore. */ + if (!vfid->nr_vports) + mlxsw_sp_vfid_destroy(mlxsw_sp_port->mlxsw_sp, vfid); return 0; } @@ -725,6 +838,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, .ndo_start_xmit = mlxsw_sp_port_xmit, + .ndo_set_rx_mode = mlxsw_sp_set_rx_mode, .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, .ndo_change_mtu = mlxsw_sp_port_change_mtu, .ndo_get_stats64 = mlxsw_sp_port_get_stats64, @@ -859,6 +973,29 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, } } +static int mlxsw_sp_port_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char mlcr_pl[MLXSW_REG_MLCR_LEN]; + bool active; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + active = true; + break; + case ETHTOOL_ID_INACTIVE: + active = false; + break; + default: + return -EOPNOTSUPP; + } + + mlxsw_reg_mlcr_pack(mlcr_pl, mlxsw_sp_port->local_port, active); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mlcr), mlcr_pl); +} + static void mlxsw_sp_port_get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { @@ -1205,6 +1342,7 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlxsw_sp_port_get_strings, + .set_phys_id = mlxsw_sp_port_set_phys_id, .get_ethtool_stats = mlxsw_sp_port_get_stats, .get_sset_count = mlxsw_sp_port_get_sset_count, .get_settings = mlxsw_sp_port_get_settings, @@ -1216,6 +1354,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port) struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *dev; bool usable; + size_t bytes; int err; dev = alloc_etherdev(sizeof(struct mlxsw_sp_port)); @@ -1225,10 +1364,18 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp_port->dev = dev; mlxsw_sp_port->mlxsw_sp = mlxsw_sp; mlxsw_sp_port->local_port = local_port; - mlxsw_sp_port->learning = 1; - mlxsw_sp_port->learning_sync = 1; - mlxsw_sp_port->uc_flood = 1; - mlxsw_sp_port->pvid = 1; + bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE); + mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL); + if (!mlxsw_sp_port->active_vlans) { + err = -ENOMEM; + goto err_port_active_vlans_alloc; + } + mlxsw_sp_port->untagged_vlans = kzalloc(bytes, GFP_KERNEL); + if (!mlxsw_sp_port->untagged_vlans) { + err = -ENOMEM; + goto err_port_untagged_vlans_alloc; + } + INIT_LIST_HEAD(&mlxsw_sp_port->vports_list); mlxsw_sp_port->pcpu_stats = netdev_alloc_pcpu_stats(struct mlxsw_sp_port_pcpu_stats); @@ -1330,16 +1477,29 @@ err_port_module_check: err_dev_addr_init: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: + kfree(mlxsw_sp_port->untagged_vlans); +err_port_untagged_vlans_alloc: + kfree(mlxsw_sp_port->active_vlans); +err_port_active_vlans_alloc: free_netdev(dev); return err; } -static void mlxsw_sp_vfids_fini(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port) { - u16 vfid; + struct net_device *dev = mlxsw_sp_port->dev; + struct mlxsw_sp_port *mlxsw_sp_vport, *tmp; - for_each_set_bit(vfid, mlxsw_sp->active_vfids, VLAN_N_VID) - mlxsw_sp_vfid_destroy(mlxsw_sp, vfid); + list_for_each_entry_safe(mlxsw_sp_vport, tmp, + &mlxsw_sp_port->vports_list, vport.list) { + u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + + /* vPorts created for VLAN devices should already be gone + * by now, since we unregistered the port netdev. + */ + WARN_ON(is_vlan_dev(mlxsw_sp_vport->dev)); + mlxsw_sp_port_kill_vid(dev, 0, vid); + } } static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) @@ -1348,10 +1508,12 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) if (!mlxsw_sp_port) return; - mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ + mlxsw_sp_port_vports_fini(mlxsw_sp_port); mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); free_percpu(mlxsw_sp_port->pcpu_stats); + kfree(mlxsw_sp_port->untagged_vlans); + kfree(mlxsw_sp_port->active_vlans); free_netdev(mlxsw_sp_port->dev); } @@ -1633,16 +1795,15 @@ static int __mlxsw_sp_flood_init(struct mlxsw_core *mlxsw_core, enum mlxsw_sp_flood_table flood_table; char sfgc_pl[MLXSW_REG_SFGC_LEN]; - if (bridge_type == MLXSW_REG_SFGC_BRIDGE_TYPE_VFID) { + if (bridge_type == MLXSW_REG_SFGC_BRIDGE_TYPE_VFID) table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID; - flood_table = 0; - } else { + else table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; - if (type == MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST) - flood_table = MLXSW_SP_FLOOD_TABLE_UC; - else - flood_table = MLXSW_SP_FLOOD_TABLE_BM; - } + + if (type == MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST) + flood_table = MLXSW_SP_FLOOD_TABLE_UC; + else + flood_table = MLXSW_SP_FLOOD_TABLE_BM; mlxsw_reg_sfgc_pack(sfgc_pl, type, bridge_type, table_type, flood_table); @@ -1653,9 +1814,6 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) { int type, err; - /* For non-offloaded netdevs, flood all traffic types to CPU - * port. - */ for (type = 0; type < MLXSW_REG_SFGC_TYPE_MAX; type++) { if (type == MLXSW_REG_SFGC_TYPE_RESERVED) continue; @@ -1664,15 +1822,6 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) MLXSW_REG_SFGC_BRIDGE_TYPE_VFID); if (err) return err; - } - - /* For bridged ports, use one flooding table for unknown unicast - * traffic and a second table for unregistered multicast and - * broadcast. - */ - for (type = 0; type < MLXSW_REG_SFGC_TYPE_MAX; type++) { - if (type == MLXSW_REG_SFGC_TYPE_RESERVED) - continue; err = __mlxsw_sp_flood_init(mlxsw_sp->core, type, MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID); @@ -1683,6 +1832,22 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) +{ + char slcr_pl[MLXSW_REG_SLCR_LEN]; + + mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | + MLXSW_REG_SLCR_LAG_HASH_DMAC | + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE | + MLXSW_REG_SLCR_LAG_HASH_VLANID | + MLXSW_REG_SLCR_LAG_HASH_SIP | + MLXSW_REG_SLCR_LAG_HASH_DIP | + MLXSW_REG_SLCR_LAG_HASH_SPORT | + MLXSW_REG_SLCR_LAG_HASH_DPORT | + MLXSW_REG_SLCR_LAG_HASH_IPPROTO); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); +} + static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -1691,6 +1856,9 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, mlxsw_sp->core = mlxsw_core; mlxsw_sp->bus_info = mlxsw_bus_info; + INIT_LIST_HEAD(&mlxsw_sp->port_vfids.list); + INIT_LIST_HEAD(&mlxsw_sp->br_vfids.list); + INIT_LIST_HEAD(&mlxsw_sp->br_mids.list); err = mlxsw_sp_base_mac_get(mlxsw_sp); if (err) { @@ -1701,7 +1869,7 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, err = mlxsw_sp_ports_create(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); - goto err_ports_create; + return err; } err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE); @@ -1728,6 +1896,12 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, goto err_buffers_init; } + err = mlxsw_sp_lag_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize LAG\n"); + goto err_lag_init; + } + err = mlxsw_sp_switchdev_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n"); @@ -1737,6 +1911,7 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, return 0; err_switchdev_init: +err_lag_init: err_buffers_init: err_flood_init: mlxsw_sp_traps_fini(mlxsw_sp); @@ -1744,8 +1919,6 @@ err_rx_listener_register: mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); err_event_register: mlxsw_sp_ports_remove(mlxsw_sp); -err_ports_create: - mlxsw_sp_vfids_fini(mlxsw_sp); return err; } @@ -1757,18 +1930,17 @@ static void mlxsw_sp_fini(void *priv) mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); mlxsw_sp_ports_remove(mlxsw_sp); - mlxsw_sp_vfids_fini(mlxsw_sp); } static struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, .used_max_lag = 1, - .max_lag = 64, + .max_lag = MLXSW_SP_LAG_MAX, .used_max_port_per_lag = 1, - .max_port_per_lag = 16, + .max_port_per_lag = MLXSW_SP_PORT_PER_LAG_MAX, .used_max_mid = 1, - .max_mid = 7000, + .max_mid = MLXSW_SP_MID_MAX, .used_max_pgt = 1, .max_pgt = 0, .used_max_system_port = 1, @@ -1782,8 +1954,8 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .flood_mode = 3, .max_fid_offset_flood_tables = 2, .fid_offset_flood_table_size = VLAN_N_VID - 1, - .max_fid_flood_tables = 1, - .fid_flood_table_size = VLAN_N_VID, + .max_fid_flood_tables = 2, + .fid_flood_table_size = MLXSW_SP_VFID_MAX, .used_max_ib_mc = 1, .max_ib_mc = 0, .used_max_pkey = 1, @@ -1824,24 +1996,29 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port) */ err = mlxsw_sp_port_kill_vid(dev, 0, 1); if (err) - netdev_err(dev, "Failed to remove VID 1\n"); + return err; - return err; + mlxsw_sp_port->learning = 1; + mlxsw_sp_port->learning_sync = 1; + mlxsw_sp_port->uc_flood = 1; + mlxsw_sp_port->bridged = 1; + + return 0; } static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port) { struct net_device *dev = mlxsw_sp_port->dev; - int err; + + mlxsw_sp_port->learning = 0; + mlxsw_sp_port->learning_sync = 0; + mlxsw_sp_port->uc_flood = 0; + mlxsw_sp_port->bridged = 0; /* Add implicit VLAN interface in the device, so that untagged * packets will be classified to the default vFID. */ - err = mlxsw_sp_port_add_vid(dev, 0, 1); - if (err) - netdev_err(dev, "Failed to add VID 1\n"); - - return err; + return mlxsw_sp_port_add_vid(dev, 0, 1); } static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp, @@ -1865,19 +2042,293 @@ static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp, mlxsw_sp->master_bridge.dev = NULL; } -static int mlxsw_sp_netdevice_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_create_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_col_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id, u8 port_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_add_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id, port_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_remove_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_enable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_enable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_disable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_disable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + u16 *p_lag_id) +{ + struct mlxsw_sp_upper *lag; + int free_lag_id = -1; + int i; + + for (i = 0; i < MLXSW_SP_LAG_MAX; i++) { + lag = mlxsw_sp_lag_get(mlxsw_sp, i); + if (lag->ref_count) { + if (lag->dev == lag_dev) { + *p_lag_id = i; + return 0; + } + } else if (free_lag_id < 0) { + free_lag_id = i; + } + } + if (free_lag_id < 0) + return -EBUSY; + *p_lag_id = free_lag_id; + return 0; +} + +static bool +mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + struct netdev_lag_upper_info *lag_upper_info) +{ + u16 lag_id; + + if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) + return false; + if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + return false; + return true; +} + +static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, + u16 lag_id, u8 *p_port_index) +{ + int i; + + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) { + *p_port_index = i; + return 0; + } + } + return -EBUSY; +} + +static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_upper *lag; + u16 lag_id; + u8 port_index; + int err; + + err = mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id); + if (err) + return err; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + if (!lag->ref_count) { + err = mlxsw_sp_lag_create(mlxsw_sp, lag_id); + if (err) + return err; + lag->dev = lag_dev; + } + + err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index); + if (err) + return err; + err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); + if (err) + goto err_col_port_add; + err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id); + if (err) + goto err_col_port_enable; + + mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lag_id = lag_id; + mlxsw_sp_port->lagged = 1; + lag->ref_count++; + return 0; + +err_col_port_add: + if (!lag->ref_count) + mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); +err_col_port_enable: + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); + return err; +} + +static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_upper *lag; + u16 lag_id = mlxsw_sp_port->lag_id; + int err; + + if (!mlxsw_sp_port->lagged) + return 0; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + WARN_ON(lag->ref_count == 0); + + err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); + if (err) + return err; + err = mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); + if (err) + return err; + + if (lag->ref_count == 1) { + err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); + if (err) + return err; + } + + mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lagged = 0; + lag->ref_count--; + return 0; +} + +static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_add_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_remove_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool lag_tx_enabled) +{ + if (lag_tx_enabled) + return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + else + return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, + mlxsw_sp_port->lag_id); +} + +static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, + struct netdev_lag_lower_state_info *info) +{ + return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); +} + +static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *br_dev); + +static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *vlan_dev) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + u16 vid = vlan_dev_vlan_id(vlan_dev); + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (!mlxsw_sp_vport) { + WARN_ON(!mlxsw_sp_vport); + return -EINVAL; + } + + mlxsw_sp_vport->dev = vlan_dev; + + return 0; +} + +static int mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *vlan_dev) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + u16 vid = vlan_dev_vlan_id(vlan_dev); + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (!mlxsw_sp_vport) { + WARN_ON(!mlxsw_sp_vport); + return -EINVAL; + } + + /* When removing a VLAN device while still bridged we should first + * remove it from the bridge, as we receive the bridge's notification + * when the vPort is already gone. + */ + if (mlxsw_sp_vport->bridged) { + struct net_device *br_dev; + + br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport); + mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev); + } + + mlxsw_sp_vport->dev = mlxsw_sp_port->dev; + + return 0; +} + +static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, + unsigned long event, void *ptr) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; int err; - if (!mlxsw_sp_port_dev_check(dev)) - return NOTIFY_DONE; - mlxsw_sp_port = netdev_priv(dev); mlxsw_sp = mlxsw_sp_port->mlxsw_sp; info = ptr; @@ -1885,28 +2336,66 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; + if (!info->master || !info->linking) + break; /* HW limitation forbids to put ports to multiple bridges. */ - if (info->master && info->linking && - netif_is_bridge_master(upper_dev) && + if (netif_is_bridge_master(upper_dev) && !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev)) return NOTIFY_BAD; + if (netif_is_lag_master(upper_dev) && + !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, + info->upper_info)) + return NOTIFY_BAD; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; - if (info->master && - netif_is_bridge_master(upper_dev)) { + if (is_vlan_dev(upper_dev)) { + if (info->linking) { + err = mlxsw_sp_port_vlan_link(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to link VLAN device\n"); + return NOTIFY_BAD; + } + } else { + err = mlxsw_sp_port_vlan_unlink(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to unlink VLAN device\n"); + return NOTIFY_BAD; + } + } + } else if (netif_is_bridge_master(upper_dev)) { if (info->linking) { err = mlxsw_sp_port_bridge_join(mlxsw_sp_port); - if (err) + if (err) { netdev_err(dev, "Failed to join bridge\n"); + return NOTIFY_BAD; + } mlxsw_sp_master_bridge_inc(mlxsw_sp, upper_dev); - mlxsw_sp_port->bridged = 1; } else { err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port); - if (err) - netdev_err(dev, "Failed to leave bridge\n"); - mlxsw_sp_port->bridged = 0; mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev); + if (err) { + netdev_err(dev, "Failed to leave bridge\n"); + return NOTIFY_BAD; + } + } + } else if (netif_is_lag_master(upper_dev)) { + if (info->linking) { + err = mlxsw_sp_port_lag_join(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to join link aggregation\n"); + return NOTIFY_BAD; + } + } else { + err = mlxsw_sp_port_lag_leave(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to leave link aggregation\n"); + return NOTIFY_BAD; + } } } break; @@ -1915,6 +2404,443 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, return NOTIFY_DONE; } +static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changelowerstate_info *info; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = netdev_priv(dev); + info = ptr; + + switch (event) { + case NETDEV_CHANGELOWERSTATE: + if (netif_is_lag_port(dev) && mlxsw_sp_port->lagged) { + err = mlxsw_sp_port_lag_changed(mlxsw_sp_port, + info->lower_state_info); + if (err) + netdev_err(dev, "Failed to reflect link aggregation lower state change\n"); + } + break; + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_port_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + switch (event) { + case NETDEV_PRECHANGEUPPER: + case NETDEV_CHANGEUPPER: + return mlxsw_sp_netdevice_port_upper_event(dev, event, ptr); + case NETDEV_CHANGELOWERSTATE: + return mlxsw_sp_netdevice_port_lower_event(dev, event, ptr); + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, + unsigned long event, void *ptr) +{ + struct net_device *dev; + struct list_head *iter; + int ret; + + netdev_for_each_lower_dev(lag_dev, dev, iter) { + if (mlxsw_sp_port_dev_check(dev)) { + ret = mlxsw_sp_netdevice_port_event(dev, event, ptr); + if (ret == NOTIFY_BAD) + return ret; + } + } + + return NOTIFY_DONE; +} + +static struct mlxsw_sp_vfid * +mlxsw_sp_br_vfid_find(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev) +{ + struct mlxsw_sp_vfid *vfid; + + list_for_each_entry(vfid, &mlxsw_sp->br_vfids.list, list) { + if (vfid->br_dev == br_dev) + return vfid; + } + + return NULL; +} + +static u16 mlxsw_sp_vfid_to_br_vfid(u16 vfid) +{ + return vfid - MLXSW_SP_VFID_PORT_MAX; +} + +static u16 mlxsw_sp_br_vfid_to_vfid(u16 br_vfid) +{ + return MLXSW_SP_VFID_PORT_MAX + br_vfid; +} + +static u16 mlxsw_sp_avail_br_vfid_get(const struct mlxsw_sp *mlxsw_sp) +{ + return find_first_zero_bit(mlxsw_sp->br_vfids.mapped, + MLXSW_SP_VFID_BR_MAX); +} + +static struct mlxsw_sp_vfid *mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) +{ + struct device *dev = mlxsw_sp->bus_info->dev; + struct mlxsw_sp_vfid *vfid; + u16 n_vfid; + int err; + + n_vfid = mlxsw_sp_br_vfid_to_vfid(mlxsw_sp_avail_br_vfid_get(mlxsw_sp)); + if (n_vfid == MLXSW_SP_VFID_MAX) { + dev_err(dev, "No available vFIDs\n"); + return ERR_PTR(-ERANGE); + } + + err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid); + if (err) { + dev_err(dev, "Failed to create vFID=%d\n", n_vfid); + return ERR_PTR(err); + } + + vfid = kzalloc(sizeof(*vfid), GFP_KERNEL); + if (!vfid) + goto err_allocate_vfid; + + vfid->vfid = n_vfid; + vfid->br_dev = br_dev; + + list_add(&vfid->list, &mlxsw_sp->br_vfids.list); + set_bit(mlxsw_sp_vfid_to_br_vfid(n_vfid), mlxsw_sp->br_vfids.mapped); + + return vfid; + +err_allocate_vfid: + __mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid); + return ERR_PTR(-ENOMEM); +} + +static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vfid *vfid) +{ + u16 br_vfid = mlxsw_sp_vfid_to_br_vfid(vfid->vfid); + + clear_bit(br_vfid, mlxsw_sp->br_vfids.mapped); + list_del(&vfid->list); + + __mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid); + + kfree(vfid); +} + +static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *br_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + struct net_device *dev = mlxsw_sp_vport->dev; + struct mlxsw_sp_vfid *vfid, *new_vfid; + int err; + + vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev); + if (!vfid) { + WARN_ON(!vfid); + return -EINVAL; + } + + /* We need a vFID to go back to after leaving the bridge's vFID. */ + new_vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid); + if (!new_vfid) { + new_vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid); + if (IS_ERR(new_vfid)) { + netdev_err(dev, "Failed to create vFID for VID=%d\n", + vid); + return PTR_ERR(new_vfid); + } + } + + /* Invalidate existing {Port, VID} to vFID mapping and create a new + * one for the new vFID. + */ + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + false, + mlxsw_sp_vfid_to_fid(vfid->vfid), + vid); + if (err) { + netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n", + vfid->vfid); + goto err_port_vid_to_fid_invalidate; + } + + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + true, + mlxsw_sp_vfid_to_fid(new_vfid->vfid), + vid); + if (err) { + netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n", + new_vfid->vfid); + goto err_port_vid_to_fid_validate; + } + + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); + if (err) { + netdev_err(dev, "Failed to disable learning\n"); + goto err_port_vid_learning_set; + } + + err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, + false); + if (err) { + netdev_err(dev, "Failed clear to clear flooding\n"); + goto err_vport_flood_set; + } + + /* Switch between the vFIDs and destroy the old one if needed. */ + new_vfid->nr_vports++; + mlxsw_sp_vport->vport.vfid = new_vfid; + vfid->nr_vports--; + if (!vfid->nr_vports) + mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid); + + mlxsw_sp_vport->learning = 0; + mlxsw_sp_vport->learning_sync = 0; + mlxsw_sp_vport->uc_flood = 0; + mlxsw_sp_vport->bridged = 0; + + return 0; + +err_vport_flood_set: +err_port_vid_learning_set: +err_port_vid_to_fid_validate: +err_port_vid_to_fid_invalidate: + /* Rollback vFID only if new. */ + if (!new_vfid->nr_vports) + mlxsw_sp_vfid_destroy(mlxsw_sp, new_vfid); + return err; +} + +static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *br_dev) +{ + struct mlxsw_sp_vfid *old_vfid = mlxsw_sp_vport->vport.vfid; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + struct net_device *dev = mlxsw_sp_vport->dev; + struct mlxsw_sp_vfid *vfid; + int err; + + vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev); + if (!vfid) { + vfid = mlxsw_sp_br_vfid_create(mlxsw_sp, br_dev); + if (IS_ERR(vfid)) { + netdev_err(dev, "Failed to create bridge vFID\n"); + return PTR_ERR(vfid); + } + } + + err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, true, false); + if (err) { + netdev_err(dev, "Failed to setup flooding for vFID=%d\n", + vfid->vfid); + goto err_port_flood_set; + } + + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); + if (err) { + netdev_err(dev, "Failed to enable learning\n"); + goto err_port_vid_learning_set; + } + + /* We need to invalidate existing {Port, VID} to vFID mapping and + * create a new one for the bridge's vFID. + */ + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + false, + mlxsw_sp_vfid_to_fid(old_vfid->vfid), + vid); + if (err) { + netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n", + old_vfid->vfid); + goto err_port_vid_to_fid_invalidate; + } + + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + true, + mlxsw_sp_vfid_to_fid(vfid->vfid), + vid); + if (err) { + netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n", + vfid->vfid); + goto err_port_vid_to_fid_validate; + } + + /* Switch between the vFIDs and destroy the old one if needed. */ + vfid->nr_vports++; + mlxsw_sp_vport->vport.vfid = vfid; + old_vfid->nr_vports--; + if (!old_vfid->nr_vports) + mlxsw_sp_vfid_destroy(mlxsw_sp, old_vfid); + + mlxsw_sp_vport->learning = 1; + mlxsw_sp_vport->learning_sync = 1; + mlxsw_sp_vport->uc_flood = 1; + mlxsw_sp_vport->bridged = 1; + + return 0; + +err_port_vid_to_fid_validate: + mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false, + mlxsw_sp_vfid_to_fid(old_vfid->vfid), vid); +err_port_vid_to_fid_invalidate: + mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); +err_port_vid_learning_set: + mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, false); +err_port_flood_set: + if (!vfid->nr_vports) + mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid); + return err; +} + +static bool +mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port, + const struct net_device *br_dev) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + + list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, + vport.list) { + if (mlxsw_sp_vport_br_get(mlxsw_sp_vport) == br_dev) + return false; + } + + return true; +} + +static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, + unsigned long event, void *ptr, + u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct netdev_notifier_changeupper_info *info = ptr; + struct mlxsw_sp_port *mlxsw_sp_vport; + struct net_device *upper_dev; + int err; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + + switch (event) { + case NETDEV_PRECHANGEUPPER: + upper_dev = info->upper_dev; + if (!info->master || !info->linking) + break; + if (!netif_is_bridge_master(upper_dev)) + return NOTIFY_BAD; + /* We can't have multiple VLAN interfaces configured on + * the same port and being members in the same bridge. + */ + if (!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port, + upper_dev)) + return NOTIFY_BAD; + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (!info->master) + break; + if (info->linking) { + if (!mlxsw_sp_vport) { + WARN_ON(!mlxsw_sp_vport); + return NOTIFY_BAD; + } + err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport, + upper_dev); + if (err) { + netdev_err(dev, "Failed to join bridge\n"); + return NOTIFY_BAD; + } + } else { + /* We ignore bridge's unlinking notifications if vPort + * is gone, since we already left the bridge when the + * VLAN device was unlinked from the real device. + */ + if (!mlxsw_sp_vport) + return NOTIFY_DONE; + err = mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, + upper_dev); + if (err) { + netdev_err(dev, "Failed to leave bridge\n"); + return NOTIFY_BAD; + } + } + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev, + unsigned long event, void *ptr, + u16 vid) +{ + struct net_device *dev; + struct list_head *iter; + int ret; + + netdev_for_each_lower_dev(lag_dev, dev, iter) { + if (mlxsw_sp_port_dev_check(dev)) { + ret = mlxsw_sp_netdevice_vport_event(dev, event, ptr, + vid); + if (ret == NOTIFY_BAD) + return ret; + } + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, + unsigned long event, void *ptr) +{ + struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); + u16 vid = vlan_dev_vlan_id(vlan_dev); + + if (mlxsw_sp_port_dev_check(real_dev)) + return mlxsw_sp_netdevice_vport_event(real_dev, event, ptr, + vid); + else if (netif_is_lag_master(real_dev)) + return mlxsw_sp_netdevice_lag_vport_event(real_dev, event, ptr, + vid); + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (mlxsw_sp_port_dev_check(dev)) + return mlxsw_sp_netdevice_port_event(dev, event, ptr); + + if (netif_is_lag_master(dev)) + return mlxsw_sp_netdevice_lag_event(dev, event, ptr); + + if (is_vlan_dev(dev)) + return mlxsw_sp_netdevice_vlan_event(dev, event, ptr); + + return NOTIFY_DONE; +} + static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { .notifier_call = mlxsw_sp_netdevice_event, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4365c8bccc6d..a23dc610d259 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -41,16 +41,73 @@ #include <linux/netdevice.h> #include <linux/bitops.h> #include <linux/if_vlan.h> +#include <linux/list.h> #include <net/switchdev.h> +#include "port.h" #include "core.h" #define MLXSW_SP_VFID_BASE VLAN_N_VID +#define MLXSW_SP_VFID_PORT_MAX 512 /* Non-bridged VLAN interfaces */ +#define MLXSW_SP_VFID_BR_MAX 8192 /* Bridged VLAN interfaces */ +#define MLXSW_SP_VFID_MAX (MLXSW_SP_VFID_PORT_MAX + MLXSW_SP_VFID_BR_MAX) + +#define MLXSW_SP_LAG_MAX 64 +#define MLXSW_SP_PORT_PER_LAG_MAX 16 + +#define MLXSW_SP_MID_MAX 7000 struct mlxsw_sp_port; +struct mlxsw_sp_upper { + struct net_device *dev; + unsigned int ref_count; +}; + +struct mlxsw_sp_vfid { + struct list_head list; + u16 nr_vports; + u16 vfid; /* Starting at 0 */ + struct net_device *br_dev; + u16 vid; +}; + +struct mlxsw_sp_mid { + struct list_head list; + unsigned char addr[ETH_ALEN]; + u16 vid; + u16 mid; + unsigned int ref_count; +}; + +static inline u16 mlxsw_sp_vfid_to_fid(u16 vfid) +{ + return MLXSW_SP_VFID_BASE + vfid; +} + +static inline u16 mlxsw_sp_fid_to_vfid(u16 fid) +{ + return fid - MLXSW_SP_VFID_BASE; +} + +static inline bool mlxsw_sp_fid_is_vfid(u16 fid) +{ + return fid >= MLXSW_SP_VFID_BASE; +} + struct mlxsw_sp { - unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)]; + struct { + struct list_head list; + unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_PORT_MAX)]; + } port_vfids; + struct { + struct list_head list; + unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_BR_MAX)]; + } br_vfids; + struct { + struct list_head list; + unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_MID_MAX)]; + } br_mids; unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)]; struct mlxsw_sp_port **ports; struct mlxsw_core *core; @@ -63,12 +120,17 @@ struct mlxsw_sp { } fdb_notify; #define MLXSW_SP_DEFAULT_AGEING_TIME 300 u32 ageing_time; - struct { - struct net_device *dev; - unsigned int ref_count; - } master_bridge; + struct mutex fdb_lock; /* Make sure FDB sessions are atomic. */ + struct mlxsw_sp_upper master_bridge; + struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; }; +static inline struct mlxsw_sp_upper * +mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + return &mlxsw_sp->lags[lag_id]; +} + struct mlxsw_sp_port_pcpu_stats { u64 rx_packets; u64 rx_bytes; @@ -87,15 +149,87 @@ struct mlxsw_sp_port { u8 learning:1, learning_sync:1, uc_flood:1, - bridged:1; + bridged:1, + lagged:1; u16 pvid; + u16 lag_id; + struct { + struct list_head list; + struct mlxsw_sp_vfid *vfid; + u16 vid; + } vport; /* 802.1Q bridge VLANs */ - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + unsigned long *active_vlans; + unsigned long *untagged_vlans; /* VLAN interfaces */ - unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)]; - u16 nr_vfids; + struct list_head vports_list; }; +static inline struct mlxsw_sp_port * +mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u8 local_port; + + local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core, + lag_id, port_index); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL; +} + +static inline bool +mlxsw_sp_port_is_vport(const struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port->vport.vfid; +} + +static inline struct net_device * +mlxsw_sp_vport_br_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +{ + return mlxsw_sp_vport->vport.vfid->br_dev; +} + +static inline u16 +mlxsw_sp_vport_vid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +{ + return mlxsw_sp_vport->vport.vid; +} + +static inline u16 +mlxsw_sp_vport_vfid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +{ + return mlxsw_sp_vport->vport.vfid->vfid; +} + +static inline struct mlxsw_sp_port * +mlxsw_sp_port_vport_find(const struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + + list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, + vport.list) { + if (mlxsw_sp_vport_vid_get(mlxsw_sp_vport) == vid) + return mlxsw_sp_vport; + } + + return NULL; +} + +static inline struct mlxsw_sp_port * +mlxsw_sp_port_vport_find_by_vfid(const struct mlxsw_sp_port *mlxsw_sp_port, + u16 vfid) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + + list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, + vport.list) { + if (mlxsw_sp_vport_vfid_get(mlxsw_sp_vport) == vfid) + return mlxsw_sp_vport; + } + + return NULL; +} + enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, MLXSW_SP_FLOOD_TABLE_BM, @@ -118,5 +252,7 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid); int mlxsw_sp_port_kill_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid); +int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid, + bool set, bool only_uc); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 617fb22b5d81..ffe894e6d287 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -51,12 +51,50 @@ #include "core.h" #include "reg.h" +static u16 mlxsw_sp_port_vid_to_fid_get(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid) +{ + u16 fid = vid; + + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); + + fid = mlxsw_sp_vfid_to_fid(vfid); + } + + if (!fid) + fid = mlxsw_sp_port->pvid; + + return fid; +} + +static struct mlxsw_sp_port * +mlxsw_sp_port_orig_get(struct net_device *dev, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + u16 vid; + + if (!is_vlan_dev(dev)) + return mlxsw_sp_port; + + vid = vlan_dev_vlan_id(dev); + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + WARN_ON(!mlxsw_sp_vport); + + return mlxsw_sp_vport; +} + static int mlxsw_sp_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + mlxsw_sp_port = mlxsw_sp_port_orig_get(attr->orig_dev, mlxsw_sp_port); + if (!mlxsw_sp_port) + return -EINVAL; + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac); @@ -105,8 +143,14 @@ static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, if (!spms_pl) return -ENOMEM; mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) + + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); + } else { + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) + mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); + } err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); kfree(spms_pl); @@ -124,22 +168,38 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, state); } +static bool mlxsw_sp_vfid_is_vport_br(u16 vfid) +{ + return vfid >= MLXSW_SP_VFID_PORT_MAX; +} + static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 fid_begin, u16 fid_end, bool set, + u16 idx_begin, u16 idx_end, bool set, bool only_uc) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u16 range = fid_end - fid_begin + 1; + u16 local_port = mlxsw_sp_port->local_port; + enum mlxsw_flood_table_type table_type; + u16 range = idx_end - idx_begin + 1; char *sftr_pl; int err; + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID; + if (mlxsw_sp_vfid_is_vport_br(idx_begin)) + local_port = mlxsw_sp_port->local_port; + else + local_port = MLXSW_PORT_CPU_PORT; + } else { + table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; + } + sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); if (!sftr_pl) return -ENOMEM; - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, fid_begin, - MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST, range, - mlxsw_sp_port->local_port, set); + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin, + table_type, range, local_port, set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); if (err) goto buffer_out; @@ -150,9 +210,8 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, if (only_uc) goto buffer_out; - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, fid_begin, - MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST, range, - mlxsw_sp_port->local_port, set); + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, idx_begin, + table_type, range, local_port, set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); buffer_out: @@ -167,6 +226,13 @@ static int mlxsw_sp_port_uc_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, last_visited_vid; int err; + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); + + return __mlxsw_sp_port_flood_set(mlxsw_sp_port, vfid, vfid, + set, true); + } + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid, vid, set, true); @@ -185,6 +251,16 @@ err_port_flood_set: return err; } +int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid, + bool set, bool only_uc) +{ + /* In case of vFIDs, index into the flooding table is relative to + * the start of the vFIDs range. + */ + return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, + only_uc); +} + static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans, unsigned long brport_flags) @@ -193,6 +269,9 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, bool set; int err; + if (!mlxsw_sp_port->bridged) + return -EINVAL; + if (switchdev_trans_ph_prepare(trans)) return 0; @@ -237,6 +316,22 @@ static int mlxsw_sp_port_attr_br_ageing_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_ageing_set(mlxsw_sp, ageing_time); } +static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + struct net_device *orig_dev, + bool vlan_enabled) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + /* SWITCHDEV_TRANS_PREPARE phase */ + if ((!vlan_enabled) && (mlxsw_sp->master_bridge.dev == orig_dev)) { + netdev_err(mlxsw_sp_port->dev, "Bridge must be vlan-aware\n"); + return -EINVAL; + } + + return 0; +} + static int mlxsw_sp_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) @@ -244,6 +339,10 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err = 0; + mlxsw_sp_port = mlxsw_sp_port_orig_get(attr->orig_dev, mlxsw_sp_port); + if (!mlxsw_sp_port) + return -EINVAL; + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans, @@ -257,6 +356,11 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, trans, attr->u.ageing_time); break; + case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: + err = mlxsw_sp_port_attr_br_vlan_set(mlxsw_sp_port, trans, + attr->orig_dev, + attr->u.vlan_filtering); + break; default: err = -EOPNOTSUPP; break; @@ -304,7 +408,7 @@ static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) { enum mlxsw_reg_svfa_mt mt; - if (mlxsw_sp_port->nr_vfids) + if (!list_empty(&mlxsw_sp_port->vports_list)) mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; else mt = MLXSW_REG_SVFA_MT_VID_TO_FID; @@ -316,7 +420,7 @@ static int mlxsw_sp_port_fid_unmap(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) { enum mlxsw_reg_svfa_mt mt; - if (!mlxsw_sp_port->nr_vfids) + if (list_empty(&mlxsw_sp_port->vports_list)) return 0; mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; @@ -342,14 +446,35 @@ err_port_add_vid: return err; } +static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, bool is_member, + bool untagged) +{ + u16 vid, vid_e; + int err; + + for (vid = vid_begin; vid <= vid_end; + vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { + vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), + vid_end); + + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, + is_member, untagged); + if (err) + return err; + } + + return 0; +} + static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool flag_untagged, bool flag_pvid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct net_device *dev = mlxsw_sp_port->dev; + u16 vid, last_visited_vid, old_pvid; enum mlxsw_reg_svfa_mt mt; - u16 vid, vid_e; int err; /* In case this is invoked with BRIDGE_FLAGS_SELF and port is @@ -377,15 +502,18 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, if (err) { netdev_err(dev, "Failed to create FID=VID=%d mapping\n", vid); - return err; + goto err_port_vid_to_fid_set; } } + } - /* Set FID mapping according to port's mode */ + /* Set FID mapping according to port's mode */ + for (vid = vid_begin; vid <= vid_end; vid++) { err = mlxsw_sp_port_fid_map(mlxsw_sp_port, vid); if (err) { netdev_err(dev, "Failed to map FID=%d", vid); - return err; + last_visited_vid = --vid; + goto err_port_fid_map; } } @@ -393,83 +521,133 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, true, false); if (err) { netdev_err(dev, "Failed to configure flooding\n"); - return err; + goto err_port_flood_set; } - for (vid = vid_begin; vid <= vid_end; - vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { - vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), - vid_end); - - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, true, - flag_untagged); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to add VIDs %d-%d\n", - vid, vid_e); - return err; - } + err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, + true, flag_untagged); + if (err) { + netdev_err(dev, "Unable to add VIDs %d-%d\n", vid_begin, + vid_end); + goto err_port_vlans_set; } - vid = vid_begin; - if (flag_pvid && mlxsw_sp_port->pvid != vid) { - err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid); + old_pvid = mlxsw_sp_port->pvid; + if (flag_pvid && old_pvid != vid_begin) { + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid_begin); if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to add PVID %d\n", - vid); - return err; + netdev_err(dev, "Unable to add PVID %d\n", vid_begin); + goto err_port_pvid_set; } - mlxsw_sp_port->pvid = vid; + mlxsw_sp_port->pvid = vid_begin; } /* Changing activity bits only if HW operation succeded */ - for (vid = vid_begin; vid <= vid_end; vid++) + for (vid = vid_begin; vid <= vid_end; vid++) { set_bit(vid, mlxsw_sp_port->active_vlans); + if (flag_untagged) + set_bit(vid, mlxsw_sp_port->untagged_vlans); + else + clear_bit(vid, mlxsw_sp_port->untagged_vlans); + } - return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, - mlxsw_sp_port->stp_state); + /* STP state change must be done after we set active VLANs */ + err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, + mlxsw_sp_port->stp_state); + if (err) { + netdev_err(dev, "Failed to set STP state\n"); + goto err_port_stp_state_set; + } + + return 0; + +err_port_vid_to_fid_set: + mlxsw_sp_fid_destroy(mlxsw_sp, vid); + return err; + +err_port_stp_state_set: + for (vid = vid_begin; vid <= vid_end; vid++) + clear_bit(vid, mlxsw_sp_port->active_vlans); + if (old_pvid != mlxsw_sp_port->pvid) + mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid); +err_port_pvid_set: + __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, + false); +err_port_vlans_set: + __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, false, + false); +err_port_flood_set: + last_visited_vid = vid_end; +err_port_fid_map: + for (vid = last_visited_vid; vid >= vid_begin; vid--) + mlxsw_sp_port_fid_unmap(mlxsw_sp_port, vid); + return err; } static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - bool untagged_flag = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - bool pvid_flag = vlan->flags & BRIDGE_VLAN_INFO_PVID; + bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; if (switchdev_trans_ph_prepare(trans)) return 0; return __mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan->vid_begin, vlan->vid_end, - untagged_flag, pvid_flag); + flag_untagged, flag_pvid); +} + +static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic) +{ + return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; +} + +static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) +{ + return adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : + MLXSW_REG_SFD_OP_WRITE_REMOVE; } -static int mlxsw_sp_port_fdb_op(struct mlxsw_sp_port *mlxsw_sp_port, - const char *mac, u16 vid, bool adding, - bool dynamic) +static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, + const char *mac, u16 fid, bool adding, + bool dynamic) { - enum mlxsw_reg_sfd_rec_policy policy; - enum mlxsw_reg_sfd_op op; char *sfd_pl; int err; - if (!vid) - vid = mlxsw_sp_port->pvid; + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), + mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP, + local_port); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + kfree(sfd_pl); + + return err; +} + +static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, + const char *mac, u16 fid, u16 lag_vid, + bool adding, bool dynamic) +{ + char *sfd_pl; + int err; sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); if (!sfd_pl) return -ENOMEM; - policy = dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : - MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; - op = adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : - MLXSW_REG_SFD_OP_WRITE_REMOVE; - mlxsw_reg_sfd_pack(sfd_pl, op, 0); - mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, - mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, - mlxsw_sp_port->local_port); - err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(sfd), - sfd_pl); + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), + mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP, + lag_vid, lag_id); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); return err; @@ -480,11 +658,162 @@ mlxsw_sp_port_fdb_static_add(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { + u16 fid = mlxsw_sp_port_vid_to_fid_get(mlxsw_sp_port, fdb->vid); + u16 lag_vid = 0; + + if (switchdev_trans_ph_prepare(trans)) + return 0; + + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + } + + if (!mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + fdb->addr, fid, true, false); + else + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->lag_id, + fdb->addr, fid, lag_vid, + true, false); +} + +static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, + u16 fid, u16 mid, bool adding) +{ + char *sfd_pl; + int err; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_mc_pack(sfd_pl, 0, addr, fid, + MLXSW_REG_SFD_REC_ACTION_NOP, mid); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + kfree(sfd_pl); + return err; +} + +static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, + bool add, bool clear_all_ports) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *smid_pl; + int err, i; + + smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL); + if (!smid_pl) + return -ENOMEM; + + mlxsw_reg_smid_pack(smid_pl, mid, mlxsw_sp_port->local_port, add); + if (clear_all_ports) { + for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) + if (mlxsw_sp->ports[i]) + mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); + } + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); + kfree(smid_pl); + return err; +} + +static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp, + const unsigned char *addr, + u16 vid) +{ + struct mlxsw_sp_mid *mid; + + list_for_each_entry(mid, &mlxsw_sp->br_mids.list, list) { + if (ether_addr_equal(mid->addr, addr) && mid->vid == vid) + return mid; + } + return NULL; +} + +static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, + const unsigned char *addr, + u16 vid) +{ + struct mlxsw_sp_mid *mid; + u16 mid_idx; + + mid_idx = find_first_zero_bit(mlxsw_sp->br_mids.mapped, + MLXSW_SP_MID_MAX); + if (mid_idx == MLXSW_SP_MID_MAX) + return NULL; + + mid = kzalloc(sizeof(*mid), GFP_KERNEL); + if (!mid) + return NULL; + + set_bit(mid_idx, mlxsw_sp->br_mids.mapped); + ether_addr_copy(mid->addr, addr); + mid->vid = vid; + mid->mid = mid_idx; + mid->ref_count = 0; + list_add_tail(&mid->list, &mlxsw_sp->br_mids.list); + + return mid; +} + +static int __mlxsw_sp_mc_dec_ref(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mid *mid) +{ + if (--mid->ref_count == 0) { + list_del(&mid->list); + clear_bit(mid->mid, mlxsw_sp->br_mids.mapped); + kfree(mid); + return 1; + } + return 0; +} + +static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *dev = mlxsw_sp_port->dev; + struct mlxsw_sp_mid *mid; + u16 fid = mlxsw_sp_port_vid_to_fid_get(mlxsw_sp_port, mdb->vid); + int err = 0; + if (switchdev_trans_ph_prepare(trans)) return 0; - return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, - true, false); + mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid); + if (!mid) { + mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, mdb->vid); + if (!mid) { + netdev_err(dev, "Unable to allocate MC group\n"); + return -ENOMEM; + } + } + mid->ref_count++; + + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true, + mid->ref_count == 1); + if (err) { + netdev_err(dev, "Unable to set SMID\n"); + goto err_out; + } + + if (mid->ref_count == 1) { + err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid, mid->mid, + true); + if (err) { + netdev_err(dev, "Unable to set MC SFD\n"); + goto err_out; + } + } + + return 0; + +err_out: + __mlxsw_sp_mc_dec_ref(mlxsw_sp, mid); + return err; } static int mlxsw_sp_port_obj_add(struct net_device *dev, @@ -494,8 +823,15 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err = 0; + mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port); + if (!mlxsw_sp_port) + return -EINVAL; + switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) + return 0; + err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); @@ -505,6 +841,11 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_FDB(obj), trans); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = mlxsw_sp_port_mdb_add(mlxsw_sp_port, + SWITCHDEV_OBJ_PORT_MDB(obj), + trans); + break; default: err = -EOPNOTSUPP; break; @@ -532,7 +873,7 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool init) { struct net_device *dev = mlxsw_sp_port->dev; - u16 vid, vid_e; + u16 vid, pvid; int err; /* In case this is invoked with BRIDGE_FLAGS_SELF and port is @@ -542,30 +883,23 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, if (!init && !mlxsw_sp_port->bridged) return mlxsw_sp_port_kill_vids(dev, vid_begin, vid_end); - for (vid = vid_begin; vid <= vid_end; - vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { - vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), - vid_end); - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, false, - false); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to del VIDs %d-%d\n", - vid, vid_e); - return err; - } + err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, + false, false); + if (err) { + netdev_err(dev, "Unable to del VIDs %d-%d\n", vid_begin, + vid_end); + return err; } - if ((mlxsw_sp_port->pvid >= vid_begin) && - (mlxsw_sp_port->pvid <= vid_end)) { + pvid = mlxsw_sp_port->pvid; + if (pvid >= vid_begin && pvid <= vid_end && pvid != 1) { /* Default VLAN is always 1 */ - mlxsw_sp_port->pvid = 1; - err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, - mlxsw_sp_port->pvid); + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to del PVID %d\n", - vid); + netdev_err(dev, "Unable to del PVID %d\n", pvid); return err; } + mlxsw_sp_port->pvid = 1; } if (init) @@ -606,8 +940,54 @@ static int mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb) { - return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, - false, false); + u16 fid = mlxsw_sp_port_vid_to_fid_get(mlxsw_sp_port, fdb->vid); + u16 lag_vid = 0; + + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + } + + if (!mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + fdb->addr, fid, + false, false); + else + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->lag_id, + fdb->addr, fid, lag_vid, + false, false); +} + +static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_port_mdb *mdb) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *dev = mlxsw_sp_port->dev; + struct mlxsw_sp_mid *mid; + u16 fid = mlxsw_sp_port_vid_to_fid_get(mlxsw_sp_port, mdb->vid); + u16 mid_idx; + int err = 0; + + mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid); + if (!mid) { + netdev_err(dev, "Unable to remove port from MC DB\n"); + return -EINVAL; + } + + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false, false); + if (err) + netdev_err(dev, "Unable to remove port from SMID\n"); + + mid_idx = mid->mid; + if (__mlxsw_sp_mc_dec_ref(mlxsw_sp, mid)) { + err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid, mid_idx, + false); + if (err) + netdev_err(dev, "Unable to remove MC SFD\n"); + } + + return err; } static int mlxsw_sp_port_obj_del(struct net_device *dev, @@ -616,8 +996,15 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err = 0; + mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port); + if (!mlxsw_sp_port) + return -EINVAL; + switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) + return 0; + err = mlxsw_sp_port_vlans_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; @@ -625,6 +1012,9 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj)); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = mlxsw_sp_port_mdb_del(mlxsw_sp_port, + SWITCHDEV_OBJ_PORT_MDB(obj)); default: err = -EOPNOTSUPP; break; @@ -633,14 +1023,31 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, return err; } +static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, + u16 lag_id) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + int i; + + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); + if (mlxsw_sp_port) + return mlxsw_sp_port; + } + return NULL; +} + static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_obj_port_fdb *fdb, switchdev_obj_dump_cb_t *cb) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 vport_vid = 0, vport_fid = 0; char *sfd_pl; char mac[ETH_ALEN]; - u16 vid; + u16 fid; u8 local_port; + u16 lag_id; u8 num_rec; int stored_err = 0; int i; @@ -650,11 +1057,19 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, if (!sfd_pl) return -ENOMEM; + mutex_lock(&mlxsw_sp_port->mlxsw_sp->fdb_lock); + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + u16 tmp; + + tmp = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); + vport_fid = mlxsw_sp_vfid_to_fid(tmp); + vport_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + } + mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0); do { mlxsw_reg_sfd_num_rec_set(sfd_pl, MLXSW_REG_SFD_REC_MAX_COUNT); - err = mlxsw_reg_query(mlxsw_sp_port->mlxsw_sp->core, - MLXSW_REG(sfd), sfd_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); if (err) goto out; @@ -669,21 +1084,46 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, for (i = 0; i < num_rec; i++) { switch (mlxsw_reg_sfd_rec_type_get(sfd_pl, i)) { case MLXSW_REG_SFD_REC_TYPE_UNICAST: - mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &vid, + mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &fid, &local_port); if (local_port == mlxsw_sp_port->local_port) { + if (vport_fid && vport_fid != fid) + continue; + else if (vport_fid) + fdb->vid = vport_vid; + else + fdb->vid = fid; + ether_addr_copy(fdb->addr, mac); + fdb->ndm_state = NUD_REACHABLE; + err = cb(&fdb->obj); + if (err) + stored_err = err; + } + break; + case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG: + mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i, + mac, &fid, &lag_id); + if (mlxsw_sp_port == + mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) { + if (vport_fid && vport_fid != fid) + continue; + else if (vport_fid) + fdb->vid = vport_vid; + else + fdb->vid = fid; ether_addr_copy(fdb->addr, mac); fdb->ndm_state = NUD_REACHABLE; - fdb->vid = vid; err = cb(&fdb->obj); if (err) stored_err = err; } + break; } } } while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT); out: + mutex_unlock(&mlxsw_sp_port->mlxsw_sp->fdb_lock); kfree(sfd_pl); return stored_err ? stored_err : err; } @@ -695,10 +1135,19 @@ static int mlxsw_sp_port_vlan_dump(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid; int err = 0; + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + vlan->flags = 0; + vlan->vid_begin = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + vlan->vid_end = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + return cb(&vlan->obj); + } + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { vlan->flags = 0; if (vid == mlxsw_sp_port->pvid) vlan->flags |= BRIDGE_VLAN_INFO_PVID; + if (test_bit(vid, mlxsw_sp_port->untagged_vlans)) + vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED; vlan->vid_begin = vid; vlan->vid_end = vid; err = cb(&vlan->obj); @@ -715,6 +1164,10 @@ static int mlxsw_sp_port_obj_dump(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err = 0; + mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port); + if (!mlxsw_sp_port) + return -EINVAL; + switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: err = mlxsw_sp_port_vlan_dump(mlxsw_sp_port, @@ -740,6 +1193,21 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { .switchdev_port_obj_dump = mlxsw_sp_port_obj_dump, }; +static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync, + bool adding, char *mac, u16 vid, + struct net_device *dev) +{ + struct switchdev_notifier_fdb_info info; + unsigned long notifier_type; + + if (learning && learning_sync) { + info.addr = mac; + info.vid = vid; + notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; + call_switchdev_notifiers(notifier_type, dev, &info.info); + } +} + static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index, bool adding) @@ -747,34 +1215,119 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_port *mlxsw_sp_port; char mac[ETH_ALEN]; u8 local_port; - u16 vid; + u16 vid, fid; + bool do_notification = true; int err; - mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &vid, &local_port); + mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port); mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect local port in FDB notification\n"); - return; + goto just_remove; + } + + if (mlxsw_sp_fid_is_vfid(fid)) { + u16 vfid = mlxsw_sp_fid_to_vfid(fid); + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port, + vfid); + if (!mlxsw_sp_vport) { + netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n"); + goto just_remove; + } + vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + /* Override the physical port with the vPort. */ + mlxsw_sp_port = mlxsw_sp_vport; + } else { + vid = fid; } - err = mlxsw_sp_port_fdb_op(mlxsw_sp_port, mac, vid, - adding && mlxsw_sp_port->learning, true); + adding = adding && mlxsw_sp_port->learning; + +do_fdb_op: + err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, + adding, true); if (err) { if (net_ratelimit()) netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); return; } - if (mlxsw_sp_port->learning && mlxsw_sp_port->learning_sync) { - struct switchdev_notifier_fdb_info info; - unsigned long notifier_type; + if (!do_notification) + return; + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, + mlxsw_sp_port->learning_sync, + adding, mac, vid, mlxsw_sp_port->dev); + return; + +just_remove: + adding = false; + do_notification = false; + goto do_fdb_op; +} - info.addr = mac; - info.vid = vid; - notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; - call_switchdev_notifiers(notifier_type, mlxsw_sp_port->dev, - &info.info); +static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, int rec_index, + bool adding) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + char mac[ETH_ALEN]; + u16 lag_vid = 0; + u16 lag_id; + u16 vid, fid; + bool do_notification = true; + int err; + + mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &fid, &lag_id); + mlxsw_sp_port = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id); + if (!mlxsw_sp_port) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Cannot find port representor for LAG\n"); + goto just_remove; } + + if (mlxsw_sp_fid_is_vfid(fid)) { + u16 vfid = mlxsw_sp_fid_to_vfid(fid); + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port, + vfid); + if (!mlxsw_sp_vport) { + netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n"); + goto just_remove; + } + + vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + lag_vid = vid; + /* Override the physical port with the vPort. */ + mlxsw_sp_port = mlxsw_sp_vport; + } else { + vid = fid; + } + + adding = adding && mlxsw_sp_port->learning; + +do_fdb_op: + err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid, + adding, true); + if (err) { + if (net_ratelimit()) + netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); + return; + } + + if (!do_notification) + return; + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, + mlxsw_sp_port->learning_sync, + adding, mac, vid, + mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev); + return; + +just_remove: + adding = false; + do_notification = false; + goto do_fdb_op; } static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, @@ -789,6 +1342,14 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl, rec_index, false); break; + case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, true); + break; + case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, false); + break; } } @@ -812,6 +1373,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work) mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work); + mutex_lock(&mlxsw_sp->fdb_lock); do { mlxsw_reg_sfn_pack(sfn_pl); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); @@ -824,6 +1386,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work) mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); } while (num_rec); + mutex_unlock(&mlxsw_sp->fdb_lock); kfree(sfn_pl); mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); @@ -838,6 +1401,7 @@ static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp) dev_err(mlxsw_sp->bus_info->dev, "Failed to set default ageing time\n"); return err; } + mutex_init(&mlxsw_sp->fdb_lock); INIT_DELAYED_WORK(&mlxsw_sp->fdb_notify.dw, mlxsw_sp_fdb_notify_work); mlxsw_sp->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL; mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); @@ -877,7 +1441,8 @@ int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port) * with VID 1. */ mlxsw_sp_port->pvid = 1; - err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID, true); + err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID - 1, + true); if (err) { netdev_err(dev, "Unable to init VLANs\n"); return err; |