From d2b007374551ac09db16badde575cdd698f6fc92 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 24 Aug 2025 11:43:50 +0300 Subject: devlink: Move graceful period parameter to reporter ops Move the default graceful period from a parameter to devlink_health_reporter_create() to a field in the devlink_health_reporter_ops structure. This change improves consistency, as the graceful period is inherently tied to the reporter's behavior and recovery policy. It simplifies the signature of devlink_health_reporter_create() and its internal helper functions. It also centralizes the reporter configuration at the ops structure, preparing the groundwork for a downstream patch that will introduce a devlink health reporter burst period attribute whose default value will similarly be provided by the driver via the ops structure. Signed-off-by: Shahar Shitrit Reviewed-by: Jiri Pirko Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250824084354.533182-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/health.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/netdevsim') diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c index 688f05316b5e..3bd0e7a489c3 100644 --- a/drivers/net/netdevsim/health.c +++ b/drivers/net/netdevsim/health.c @@ -183,14 +183,14 @@ int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink) health->empty_reporter = devl_health_reporter_create(devlink, &nsim_dev_empty_reporter_ops, - 0, health); + health); if (IS_ERR(health->empty_reporter)) return PTR_ERR(health->empty_reporter); health->dummy_reporter = devl_health_reporter_create(devlink, &nsim_dev_dummy_reporter_ops, - 0, health); + health); if (IS_ERR(health->dummy_reporter)) { err = PTR_ERR(health->dummy_reporter); goto err_empty_reporter_destroy; -- cgit v1.2.3 From 9870d350e45a5724ee25f77aa0b6d053c9b766db Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Thu, 18 Sep 2025 16:24:25 +0200 Subject: net: replace use of system_unbound_wq with system_dfl_wq Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. system_unbound_wq should be the default workqueue so as not to enforce locality constraints for random work whenever it's not required. Adding system_dfl_wq to encourage its use when unbound work should be used. The old system_unbound_wq will be kept for a few release cycles. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20250918142427.309519-2-marco.crivellari@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/macvlan.c | 2 +- drivers/net/netdevsim/dev.c | 6 +++--- net/core/link_watch.c | 4 ++-- net/unix/garbage.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/net/netdevsim') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 4df991e494bd..7966545512cf 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -369,7 +369,7 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port, } spin_unlock(&port->bc_queue.lock); - queue_work(system_unbound_wq, &port->bc_work); + queue_work(system_dfl_wq, &port->bc_work); if (err) goto free_nskb; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 2672d071b325..95f66c1f59db 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -851,7 +851,7 @@ static void nsim_dev_trap_report_work(struct work_struct *work) nsim_dev = nsim_trap_data->nsim_dev; if (!devl_trylock(priv_to_devlink(nsim_dev))) { - queue_delayed_work(system_unbound_wq, + queue_delayed_work(system_dfl_wq, &nsim_dev->trap_data->trap_report_dw, 1); return; } @@ -867,7 +867,7 @@ static void nsim_dev_trap_report_work(struct work_struct *work) cond_resched(); } devl_unlock(priv_to_devlink(nsim_dev)); - queue_delayed_work(system_unbound_wq, + queue_delayed_work(system_dfl_wq, &nsim_dev->trap_data->trap_report_dw, msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); } @@ -924,7 +924,7 @@ static int nsim_dev_traps_init(struct devlink *devlink) INIT_DELAYED_WORK(&nsim_dev->trap_data->trap_report_dw, nsim_dev_trap_report_work); - queue_delayed_work(system_unbound_wq, + queue_delayed_work(system_dfl_wq, &nsim_dev->trap_data->trap_report_dw, msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 864f3bbc3a4c..212cde35affa 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -157,9 +157,9 @@ static void linkwatch_schedule_work(int urgent) * override the existing timer. */ if (test_bit(LW_URGENT, &linkwatch_flags)) - mod_delayed_work(system_unbound_wq, &linkwatch_work, 0); + mod_delayed_work(system_dfl_wq, &linkwatch_work, 0); else - queue_delayed_work(system_unbound_wq, &linkwatch_work, delay); + queue_delayed_work(system_dfl_wq, &linkwatch_work, delay); } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 01e2b9452c75..684ab03137b6 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -592,7 +592,7 @@ static DECLARE_WORK(unix_gc_work, __unix_gc); void unix_gc(void) { WRITE_ONCE(gc_in_progress, true); - queue_work(system_unbound_wq, &unix_gc_work); + queue_work(system_dfl_wq, &unix_gc_work); } #define UNIX_INFLIGHT_TRIGGER_GC 16000 -- cgit v1.2.3 From cc2f08129925b437bf28f7f7822f20dac083a87c Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 24 Sep 2025 12:40:33 +0000 Subject: ethtool: add FEC bins histogram report IEEE 802.3ck-2022 defines counters for FEC bins and 802.3df-2024 clarifies it a bit further. Implement reporting interface through as addition to FEC stats available in ethtool. Drivers can leave bin counter uninitialized if per-lane values are provided. In this case the core will recalculate summ for the bin. Signed-off-by: Vadim Fedorenko Reviewed-by: Aleksandr Loktionov Link: https://patch.msgid.link/20250924124037.1508846-2-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 29 +++++++++ Documentation/networking/ethtool-netlink.rst | 5 ++ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 +- .../net/ethernet/fungible/funeth/funeth_ethtool.c | 3 +- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 3 +- drivers/net/ethernet/intel/ice/ice_ethtool.c | 4 +- .../ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 3 +- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +- drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c | 3 +- drivers/net/ethernet/sfc/ethtool.c | 3 +- drivers/net/ethernet/sfc/siena/ethtool.c | 3 +- drivers/net/netdevsim/ethtool.c | 25 +++++++- include/linux/ethtool.h | 25 +++++++- include/uapi/linux/ethtool_netlink_generated.h | 12 ++++ net/ethtool/fec.c | 75 +++++++++++++++++++++- 15 files changed, 186 insertions(+), 13 deletions(-) (limited to 'drivers/net/netdevsim') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 7a7594713f1f..6a0fb1974513 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1219,6 +1219,30 @@ attribute-sets: name: udp-ports type: nest nested-attributes: tunnel-udp + - + name: fec-hist + attr-cnt-name: --ethtool-a-fec-hist-cnt + attributes: + - + name: pad + type: pad + - + name: bin-low + type: u32 + doc: Low bound of FEC bin (inclusive) + - + name: bin-high + type: u32 + doc: High bound of FEC bin (inclusive) + - + name: bin-val + type: uint + doc: Error count in the bin (optional if per-lane values exist) + - + name: bin-val-per-lane + type: binary + sub-type: u64 + doc: An array of per-lane error counters in the bin (optional) - name: fec-stat attr-cnt-name: __ethtool-a-fec-stat-cnt @@ -1242,6 +1266,11 @@ attribute-sets: name: corr-bits type: binary sub-type: u64 + - + name: hist + type: nest + multi-attr: True + nested-attributes: fec-hist - name: fec attr-cnt-name: __ethtool-a-fec-cnt diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index ab20c644af24..b270886c5f5d 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1541,6 +1541,11 @@ Drivers fill in the statistics in the following structure: .. kernel-doc:: include/linux/ethtool.h :identifiers: ethtool_fec_stats +Statistics may have FEC bins histogram attribute ``ETHTOOL_A_FEC_STAT_HIST`` +as defined in IEEE 802.3ck-2022 and 802.3df-2024. Nested attributes will have +the range of FEC errors in the bin (inclusive) and the amount of error events +in the bin. + FEC_SET ======= diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index be32ef8f5c96..41686a6f84b5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -3208,7 +3208,8 @@ static int bnxt_get_fecparam(struct net_device *dev, } static void bnxt_get_fec_stats(struct net_device *dev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct bnxt *bp = netdev_priv(dev); u64 *rx; diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c index ba83dbf4ed22..1966dba512f8 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c @@ -930,7 +930,8 @@ static void fun_get_rmon_stats(struct net_device *netdev, } static void fun_get_fec_stats(struct net_device *netdev, - struct ethtool_fec_stats *stats) + struct ethtool_fec_stats *stats, + struct ethtool_fec_hist *hist) { const struct funeth_priv *fp = netdev_priv(netdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index a752d0e3db3a..a5eefa28454c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -1659,7 +1659,8 @@ static void hns3_set_msglevel(struct net_device *netdev, u32 msg_level) } static void hns3_get_fec_stats(struct net_device *netdev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct hnae3_handle *handle = hns3_get_handle(netdev); struct hnae3_ae_dev *ae_dev = hns3_get_ae_dev(handle); diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 348acd46a0ef..dc131779d426 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4624,10 +4624,12 @@ static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, * ice_get_fec_stats - returns FEC correctable, uncorrectable stats per netdev * @netdev: network interface device structure * @fec_stats: buffer to hold FEC statistics for given port + * @hist: buffer to put FEC histogram statistics for given port * */ static void ice_get_fec_stats(struct net_device *netdev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_port_topology port_topology; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 998c734ff839..b90e23dc49de 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -1283,7 +1283,8 @@ end: } static void otx2_get_fec_stats(struct net_device *netdev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct otx2_nic *pfvf = netdev_priv(netdev); struct cgx_fw_data *rsp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d507366d773e..bcc3bbb78cc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1927,7 +1927,8 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) } static void mlx5e_get_fec_stats(struct net_device *netdev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct mlx5e_priv *priv = netdev_priv(netdev); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index fecb8c602024..d55d2ac1c3b9 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -1718,7 +1718,8 @@ fbnic_get_pause_stats(struct net_device *netdev, static void fbnic_get_fec_stats(struct net_device *netdev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct fbnic_net *fbn = netdev_priv(netdev); struct fbnic_phy_stats *phy_stats; diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 23c6a7df78d0..18fe5850a978 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -217,7 +217,8 @@ static int efx_ethtool_set_wol(struct net_device *net_dev, } static void efx_ethtool_get_fec_stats(struct net_device *net_dev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct efx_nic *efx = efx_netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sfc/siena/ethtool.c b/drivers/net/ethernet/sfc/siena/ethtool.c index 994909789bfe..8c3ebd0617fb 100644 --- a/drivers/net/ethernet/sfc/siena/ethtool.c +++ b/drivers/net/ethernet/sfc/siena/ethtool.c @@ -217,7 +217,8 @@ static int efx_ethtool_set_wol(struct net_device *net_dev, } static void efx_ethtool_get_fec_stats(struct net_device *net_dev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct efx_nic *efx = netdev_priv(net_dev); diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index f631d90c428a..36a201533aae 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -165,11 +165,34 @@ nsim_set_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam) return 0; } +static const struct ethtool_fec_hist_range netdevsim_fec_ranges[] = { + { 0, 0}, + { 1, 3}, + { 4, 7}, + { 0, 0} +}; + static void -nsim_get_fec_stats(struct net_device *dev, struct ethtool_fec_stats *fec_stats) +nsim_get_fec_stats(struct net_device *dev, struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { + struct ethtool_fec_hist_value *values = hist->values; + + hist->ranges = netdevsim_fec_ranges; + fec_stats->corrected_blocks.total = 123; fec_stats->uncorrectable_blocks.total = 4; + + values[0].per_lane[0] = 125; + values[0].per_lane[1] = 120; + values[0].per_lane[2] = 100; + values[0].per_lane[3] = 100; + values[1].sum = 12; + values[2].sum = 2; + values[2].per_lane[0] = 2; + values[2].per_lane[1] = 0; + values[2].per_lane[2] = 0; + values[2].per_lane[3] = 0; } static int nsim_get_ts_info(struct net_device *dev, diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c869b7f8bce8..c2d8b4ec62eb 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -492,7 +492,29 @@ struct ethtool_pause_stats { }; #define ETHTOOL_MAX_LANES 8 +/** + * IEEE 802.3ck/df defines 16 bins for FEC histogram plus one more for + * the end-of-list marker, total 17 items + */ +#define ETHTOOL_FEC_HIST_MAX 17 +/** + * struct ethtool_fec_hist_range - error bits range for FEC histogram + * statistics + * @low: low bound of the bin (inclusive) + * @high: high bound of the bin (inclusive) + */ +struct ethtool_fec_hist_range { + u16 low; + u16 high; +}; +struct ethtool_fec_hist { + struct ethtool_fec_hist_value { + u64 sum; + u64 per_lane[ETHTOOL_MAX_LANES]; + } values[ETHTOOL_FEC_HIST_MAX]; + const struct ethtool_fec_hist_range *ranges; +}; /** * struct ethtool_fec_stats - statistics for IEEE 802.3 FEC * @corrected_blocks: number of received blocks corrected by FEC @@ -1214,7 +1236,8 @@ struct ethtool_ops { int (*set_link_ksettings)(struct net_device *, const struct ethtool_link_ksettings *); void (*get_fec_stats)(struct net_device *dev, - struct ethtool_fec_stats *fec_stats); + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist); int (*get_fecparam)(struct net_device *, struct ethtool_fecparam *); int (*set_fecparam)(struct net_device *, diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index e3b8813465d7..0e8ac0d974e2 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -561,12 +561,24 @@ enum { ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) }; +enum { + ETHTOOL_A_FEC_HIST_PAD = 1, + ETHTOOL_A_FEC_HIST_BIN_LOW, + ETHTOOL_A_FEC_HIST_BIN_HIGH, + ETHTOOL_A_FEC_HIST_BIN_VAL, + ETHTOOL_A_FEC_HIST_BIN_VAL_PER_LANE, + + __ETHTOOL_A_FEC_HIST_CNT, + ETHTOOL_A_FEC_HIST_MAX = (__ETHTOOL_A_FEC_HIST_CNT - 1) +}; + enum { ETHTOOL_A_FEC_STAT_UNSPEC, ETHTOOL_A_FEC_STAT_PAD, ETHTOOL_A_FEC_STAT_CORRECTED, ETHTOOL_A_FEC_STAT_UNCORR, ETHTOOL_A_FEC_STAT_CORR_BITS, + ETHTOOL_A_FEC_STAT_HIST, __ETHTOOL_A_FEC_STAT_CNT, ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1) diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c index e7d3f2c352a3..4669e74cbcaa 100644 --- a/net/ethtool/fec.c +++ b/net/ethtool/fec.c @@ -17,6 +17,7 @@ struct fec_reply_data { u64 stats[1 + ETHTOOL_MAX_LANES]; u8 cnt; } corr, uncorr, corr_bits; + struct ethtool_fec_hist fec_stat_hist; }; #define FEC_REPDATA(__reply_base) \ @@ -113,7 +114,10 @@ static int fec_prepare_data(const struct ethnl_req_info *req_base, struct ethtool_fec_stats stats; ethtool_stats_init((u64 *)&stats, sizeof(stats) / 8); - dev->ethtool_ops->get_fec_stats(dev, &stats); + ethtool_stats_init((u64 *)data->fec_stat_hist.values, + sizeof(data->fec_stat_hist.values) / 8); + dev->ethtool_ops->get_fec_stats(dev, &stats, + &data->fec_stat_hist); fec_stats_recalc(&data->corr, &stats.corrected_blocks); fec_stats_recalc(&data->uncorr, &stats.uncorrectable_blocks); @@ -157,13 +161,77 @@ static int fec_reply_size(const struct ethnl_req_info *req_base, len += nla_total_size(sizeof(u8)) + /* _FEC_AUTO */ nla_total_size(sizeof(u32)); /* _FEC_ACTIVE */ - if (req_base->flags & ETHTOOL_FLAG_STATS) + if (req_base->flags & ETHTOOL_FLAG_STATS) { len += 3 * nla_total_size_64bit(sizeof(u64) * (1 + ETHTOOL_MAX_LANES)); + /* add FEC bins information */ + len += (nla_total_size(0) + /* _A_FEC_HIST */ + nla_total_size(4) + /* _A_FEC_HIST_BIN_LOW */ + nla_total_size(4) + /* _A_FEC_HIST_BIN_HI */ + /* _A_FEC_HIST_BIN_VAL + per-lane values */ + nla_total_size_64bit(sizeof(u64)) + + nla_total_size_64bit(sizeof(u64) * ETHTOOL_MAX_LANES)) * + ETHTOOL_FEC_HIST_MAX; + } return len; } +static int fec_put_hist(struct sk_buff *skb, + const struct ethtool_fec_hist *hist) +{ + const struct ethtool_fec_hist_range *ranges = hist->ranges; + const struct ethtool_fec_hist_value *values = hist->values; + struct nlattr *nest; + int i, j; + u64 sum; + + if (!ranges) + return 0; + + for (i = 0; i < ETHTOOL_FEC_HIST_MAX; i++) { + if (i && !ranges[i].low && !ranges[i].high) + break; + + if (WARN_ON_ONCE(values[i].sum == ETHTOOL_STAT_NOT_SET && + values[i].per_lane[0] == ETHTOOL_STAT_NOT_SET)) + break; + + nest = nla_nest_start(skb, ETHTOOL_A_FEC_STAT_HIST); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_FEC_HIST_BIN_LOW, + ranges[i].low) || + nla_put_u32(skb, ETHTOOL_A_FEC_HIST_BIN_HIGH, + ranges[i].high)) + goto err_cancel_hist; + sum = 0; + for (j = 0; j < ETHTOOL_MAX_LANES; j++) { + if (values[i].per_lane[j] == ETHTOOL_STAT_NOT_SET) + break; + sum += values[i].per_lane[j]; + } + if (nla_put_uint(skb, ETHTOOL_A_FEC_HIST_BIN_VAL, + values[i].sum == ETHTOOL_STAT_NOT_SET ? + sum : values[i].sum)) + goto err_cancel_hist; + if (j && nla_put_64bit(skb, ETHTOOL_A_FEC_HIST_BIN_VAL_PER_LANE, + sizeof(u64) * j, + values[i].per_lane, + ETHTOOL_A_FEC_HIST_PAD)) + goto err_cancel_hist; + + nla_nest_end(skb, nest); + } + + return 0; + +err_cancel_hist: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + static int fec_put_stats(struct sk_buff *skb, const struct fec_reply_data *data) { struct nlattr *nest; @@ -183,6 +251,9 @@ static int fec_put_stats(struct sk_buff *skb, const struct fec_reply_data *data) data->corr_bits.stats, ETHTOOL_A_FEC_STAT_PAD)) goto err_cancel; + if (fec_put_hist(skb, &data->fec_stat_hist)) + goto err_cancel; + nla_nest_end(skb, nest); return 0; -- cgit v1.2.3 From f857478d62066ee94831a5e0679fc18c246cd534 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 27 Sep 2025 15:54:13 -0700 Subject: netdevsim: a basic test PSP implementation Provide a PSP implementation for netdevsim. Use psp_dev_encapsulate() and psp_dev_rcv() to do actual encapsulation and decapsulation on skbs, but perform no encryption or decryption. In order to make encryption with a bad key result in a drop on the peer's rx side, we stash our psd's generation number in the first byte of each key before handing to the peer. Signed-off-by: Jakub Kicinski Co-developed-by: Daniel Zahka Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20250927225420.1443468-2-kuba@kernel.org Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/Makefile | 4 + drivers/net/netdevsim/netdev.c | 43 +++++++- drivers/net/netdevsim/netdevsim.h | 27 +++++ drivers/net/netdevsim/psp.c | 225 ++++++++++++++++++++++++++++++++++++++ net/core/skbuff.c | 1 + 5 files changed, 294 insertions(+), 6 deletions(-) create mode 100644 drivers/net/netdevsim/psp.c (limited to 'drivers/net/netdevsim') diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile index f8de93bc5f5b..14a553e000ec 100644 --- a/drivers/net/netdevsim/Makefile +++ b/drivers/net/netdevsim/Makefile @@ -18,6 +18,10 @@ ifneq ($(CONFIG_PSAMPLE),) netdevsim-objs += psample.o endif +ifneq ($(CONFIG_INET_PSP),) +netdevsim-objs += psp.o +endif + ifneq ($(CONFIG_MACSEC),) netdevsim-objs += macsec.o endif diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 0178219f0db5..ebc3833e95b4 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -103,28 +103,42 @@ static int nsim_napi_rx(struct net_device *tx_dev, struct net_device *rx_dev, static int nsim_forward_skb(struct net_device *tx_dev, struct net_device *rx_dev, struct sk_buff *skb, - struct nsim_rq *rq) + struct nsim_rq *rq, + struct skb_ext *psp_ext) { - return __dev_forward_skb(rx_dev, skb) ?: - nsim_napi_rx(tx_dev, rx_dev, rq, skb); + int ret; + + ret = __dev_forward_skb(rx_dev, skb); + if (ret) + return ret; + + nsim_psp_handle_ext(skb, psp_ext); + + return nsim_napi_rx(tx_dev, rx_dev, rq, skb); } static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); + struct skb_ext *psp_ext = NULL; struct net_device *peer_dev; unsigned int len = skb->len; struct netdevsim *peer_ns; struct netdev_config *cfg; struct nsim_rq *rq; int rxq; + int dr; rcu_read_lock(); if (!nsim_ipsec_tx(ns, skb)) - goto out_drop_free; + goto out_drop_any; peer_ns = rcu_dereference(ns->peer); if (!peer_ns) + goto out_drop_any; + + dr = nsim_do_psp(skb, ns, peer_ns, &psp_ext); + if (dr) goto out_drop_free; peer_dev = peer_ns->netdev; @@ -141,7 +155,8 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) skb_linearize(skb); skb_tx_timestamp(skb); - if (unlikely(nsim_forward_skb(dev, peer_dev, skb, rq) == NET_RX_DROP)) + if (unlikely(nsim_forward_skb(dev, peer_dev, + skb, rq, psp_ext) == NET_RX_DROP)) goto out_drop_cnt; if (!hrtimer_active(&rq->napi_timer)) @@ -151,8 +166,10 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) dev_dstats_tx_add(dev, len); return NETDEV_TX_OK; +out_drop_any: + dr = SKB_DROP_REASON_NOT_SPECIFIED; out_drop_free: - dev_kfree_skb(skb); + kfree_skb_reason(skb, dr); out_drop_cnt: rcu_read_unlock(); dev_dstats_tx_dropped(dev); @@ -1002,6 +1019,7 @@ static void nsim_queue_uninit(struct netdevsim *ns) static int nsim_init_netdevsim(struct netdevsim *ns) { + struct netdevsim *peer; struct mock_phc *phc; int err; @@ -1036,6 +1054,10 @@ static int nsim_init_netdevsim(struct netdevsim *ns) goto err_ipsec_teardown; rtnl_unlock(); + err = nsim_psp_init(ns); + if (err) + goto err_unregister_netdev; + if (IS_ENABLED(CONFIG_DEBUG_NET)) { ns->nb.notifier_call = netdev_debug_event; if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb, @@ -1045,6 +1067,13 @@ static int nsim_init_netdevsim(struct netdevsim *ns) return 0; +err_unregister_netdev: + rtnl_lock(); + peer = rtnl_dereference(ns->peer); + if (peer) + RCU_INIT_POINTER(peer->peer, NULL); + RCU_INIT_POINTER(ns->peer, NULL); + unregister_netdevice(ns->netdev); err_ipsec_teardown: nsim_ipsec_teardown(ns); nsim_macsec_teardown(ns); @@ -1132,6 +1161,8 @@ void nsim_destroy(struct netdevsim *ns) unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb, &ns->nn); + nsim_psp_uninit(ns); + rtnl_lock(); peer = rtnl_dereference(ns->peer); if (peer) diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index bddd24c1389d..02c1c97b7008 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -108,6 +108,12 @@ struct netdevsim { int rq_reset_mode; + struct { + struct psp_dev *dev; + u32 spi; + u32 assoc_cnt; + } psp; + struct nsim_bus_dev *nsim_bus_dev; struct bpf_prog *bpf_offloaded; @@ -421,6 +427,27 @@ static inline void nsim_macsec_teardown(struct netdevsim *ns) } #endif +#if IS_ENABLED(CONFIG_INET_PSP) +int nsim_psp_init(struct netdevsim *ns); +void nsim_psp_uninit(struct netdevsim *ns); +void nsim_psp_handle_ext(struct sk_buff *skb, struct skb_ext *psp_ext); +enum skb_drop_reason +nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, + struct netdevsim *peer_ns, struct skb_ext **psp_ext); +#else +static inline int nsim_psp_init(struct netdevsim *ns) { return 0; } +static inline void nsim_psp_uninit(struct netdevsim *ns) {} +static inline enum skb_drop_reason +nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, + struct netdevsim *peer_ns, struct skb_ext **psp_ext) +{ + return 0; +} + +static inline void +nsim_psp_handle_ext(struct sk_buff *skb, struct skb_ext *psp_ext) {} +#endif + struct nsim_bus_dev { struct device dev; struct list_head list; diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c new file mode 100644 index 000000000000..332b5b744f01 --- /dev/null +++ b/drivers/net/netdevsim/psp.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include + +#include "netdevsim.h" + +void nsim_psp_handle_ext(struct sk_buff *skb, struct skb_ext *psp_ext) +{ + if (psp_ext) + __skb_ext_set(skb, SKB_EXT_PSP, psp_ext); +} + +enum skb_drop_reason +nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, + struct netdevsim *peer_ns, struct skb_ext **psp_ext) +{ + enum skb_drop_reason rc = 0; + struct psp_assoc *pas; + struct net *net; + void **ptr; + + rcu_read_lock(); + pas = psp_skb_get_assoc_rcu(skb); + if (!pas) { + rc = SKB_NOT_DROPPED_YET; + goto out_unlock; + } + + if (!skb_transport_header_was_set(skb)) { + rc = SKB_DROP_REASON_PSP_OUTPUT; + goto out_unlock; + } + + ptr = psp_assoc_drv_data(pas); + if (*ptr != ns) { + rc = SKB_DROP_REASON_PSP_OUTPUT; + goto out_unlock; + } + + net = sock_net(skb->sk); + if (!psp_dev_encapsulate(net, skb, pas->tx.spi, pas->version, 0)) { + rc = SKB_DROP_REASON_PSP_OUTPUT; + goto out_unlock; + } + + /* Now pretend we just received this frame */ + if (peer_ns->psp.dev->config.versions & (1 << pas->version)) { + bool strip_icv = false; + u8 generation; + + /* We cheat a bit and put the generation in the key. + * In real life if generation was too old, then decryption would + * fail. Here, we just make it so a bad key causes a bad + * generation too, and psp_sk_rx_policy_check() will fail. + */ + generation = pas->tx.key[0]; + + skb_ext_reset(skb); + skb->mac_len = ETH_HLEN; + if (psp_dev_rcv(skb, peer_ns->psp.dev->id, generation, + strip_icv)) { + rc = SKB_DROP_REASON_PSP_OUTPUT; + goto out_unlock; + } + + *psp_ext = skb->extensions; + refcount_inc(&(*psp_ext)->refcnt); + skb->decrypted = 1; + } else { + struct ipv6hdr *ip6h __maybe_unused; + struct iphdr *iph; + struct udphdr *uh; + __wsum csum; + + /* Do not decapsulate. Receive the skb with the udp and psp + * headers still there as if this is a normal udp packet. + * psp_dev_encapsulate() sets udp checksum to 0, so we need to + * provide a valid checksum here, so the skb isn't dropped. + */ + uh = udp_hdr(skb); + csum = skb_checksum(skb, skb_transport_offset(skb), + ntohs(uh->len), 0); + + switch (skb->protocol) { + case htons(ETH_P_IP): + iph = ip_hdr(skb); + uh->check = udp_v4_check(ntohs(uh->len), iph->saddr, + iph->daddr, csum); + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + ip6h = ipv6_hdr(skb); + uh->check = udp_v6_check(ntohs(uh->len), &ip6h->saddr, + &ip6h->daddr, csum); + break; +#endif + } + + uh->check = uh->check ?: CSUM_MANGLED_0; + skb->ip_summed = CHECKSUM_NONE; + } + +out_unlock: + rcu_read_unlock(); + return rc; +} + +static int +nsim_psp_set_config(struct psp_dev *psd, struct psp_dev_config *conf, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static int +nsim_rx_spi_alloc(struct psp_dev *psd, u32 version, + struct psp_key_parsed *assoc, + struct netlink_ext_ack *extack) +{ + struct netdevsim *ns = psd->drv_priv; + unsigned int new; + int i; + + new = ++ns->psp.spi & PSP_SPI_KEY_ID; + if (psd->generation & 1) + new |= PSP_SPI_KEY_PHASE; + + assoc->spi = cpu_to_be32(new); + assoc->key[0] = psd->generation; + for (i = 1; i < PSP_MAX_KEY; i++) + assoc->key[i] = ns->psp.spi + i; + + return 0; +} + +static int nsim_assoc_add(struct psp_dev *psd, struct psp_assoc *pas, + struct netlink_ext_ack *extack) +{ + struct netdevsim *ns = psd->drv_priv; + void **ptr = psp_assoc_drv_data(pas); + + /* Copy drv_priv from psd to assoc */ + *ptr = psd->drv_priv; + ns->psp.assoc_cnt++; + + return 0; +} + +static int nsim_key_rotate(struct psp_dev *psd, struct netlink_ext_ack *extack) +{ + return 0; +} + +static void nsim_assoc_del(struct psp_dev *psd, struct psp_assoc *pas) +{ + struct netdevsim *ns = psd->drv_priv; + void **ptr = psp_assoc_drv_data(pas); + + *ptr = NULL; + ns->psp.assoc_cnt--; +} + +static struct psp_dev_ops nsim_psp_ops = { + .set_config = nsim_psp_set_config, + .rx_spi_alloc = nsim_rx_spi_alloc, + .tx_key_add = nsim_assoc_add, + .tx_key_del = nsim_assoc_del, + .key_rotate = nsim_key_rotate, +}; + +static struct psp_dev_caps nsim_psp_caps = { + .versions = 1 << PSP_VERSION_HDR0_AES_GCM_128 | + 1 << PSP_VERSION_HDR0_AES_GMAC_128 | + 1 << PSP_VERSION_HDR0_AES_GCM_256 | + 1 << PSP_VERSION_HDR0_AES_GMAC_256, + .assoc_drv_spc = sizeof(void *), +}; + +void nsim_psp_uninit(struct netdevsim *ns) +{ + if (!IS_ERR(ns->psp.dev)) + psp_dev_unregister(ns->psp.dev); + WARN_ON(ns->psp.assoc_cnt); +} + +static ssize_t +nsim_psp_rereg_write(struct file *file, const char __user *data, size_t count, + loff_t *ppos) +{ + struct netdevsim *ns = file->private_data; + int err; + + nsim_psp_uninit(ns); + + ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops, + &nsim_psp_caps, ns); + err = PTR_ERR_OR_ZERO(ns->psp.dev); + return err ?: count; +} + +static const struct file_operations nsim_psp_rereg_fops = { + .open = simple_open, + .write = nsim_psp_rereg_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + +int nsim_psp_init(struct netdevsim *ns) +{ + struct dentry *ddir = ns->nsim_dev_port->ddir; + int err; + + ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops, + &nsim_psp_caps, ns); + err = PTR_ERR_OR_ZERO(ns->psp.dev); + if (err) + return err; + + debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops); + return 0; +} diff --git a/net/core/skbuff.c b/net/core/skbuff.c index daaf6da43cc9..618afd59afff 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -7048,6 +7048,7 @@ void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, skb->active_extensions = 1 << id; return skb_ext_get_ptr(ext, id); } +EXPORT_SYMBOL_NS_GPL(__skb_ext_set, "NETDEV_INTERNAL"); /** * skb_ext_add - allocate space for given extension, COW if needed -- cgit v1.2.3 From 1a8fed52f7be14e45785e8e54d0d0b50fc17dbd8 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 14 Oct 2025 02:17:25 -0700 Subject: netdevsim: set the carrier when the device goes up Bringing a linked netdevsim device down and then up causes communication failure because both interfaces lack carrier. Basically a ifdown/ifup on the interface make the link broken. Commit 3762ec05a9fbda ("netdevsim: add NAPI support") added supported for NAPI, calling netif_carrier_off() in nsim_stop(). This patch re-enables the carrier symmetrically on nsim_open(), in case the device is linked and the peer is up. Signed-off-by: Breno Leitao Fixes: 3762ec05a9fbda ("netdevsim: add NAPI support") Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251014-netdevsim_fix-v2-1-53b40590dae1@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/net/netdevsim') diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index ebc3833e95b4..fa1d97885caa 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -545,6 +545,7 @@ static void nsim_enable_napi(struct netdevsim *ns) static int nsim_open(struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); + struct netdevsim *peer; int err; netdev_assert_locked(dev); @@ -555,6 +556,12 @@ static int nsim_open(struct net_device *dev) nsim_enable_napi(ns); + peer = rtnl_dereference(ns->peer); + if (peer && netif_running(peer->netdev)) { + netif_carrier_on(dev); + netif_carrier_on(peer->netdev); + } + return 0; } -- cgit v1.2.3