diff options
111 files changed, 1344 insertions, 647 deletions
diff --git a/Documentation/networking/devlink-params-sja1105.txt b/Documentation/networking/devlink-params-sja1105.txt deleted file mode 100644 index 1d71742e270a..000000000000 --- a/Documentation/networking/devlink-params-sja1105.txt +++ /dev/null @@ -1,27 +0,0 @@ -best_effort_vlan_filtering - [DEVICE, DRIVER-SPECIFIC] - Allow plain ETH_P_8021Q headers to be used as DSA tags. - Benefits: - - Can terminate untagged traffic over switch net - devices even when enslaved to a bridge with - vlan_filtering=1. - - Can terminate VLAN-tagged traffic over switch net - devices even when enslaved to a bridge with - vlan_filtering=1, with some constraints (no more than - 7 non-pvid VLANs per user port). - - Can do QoS based on VLAN PCP and VLAN membership - admission control for autonomously forwarded frames - (regardless of whether they can be terminated on the - CPU or not). - Drawbacks: - - User cannot use VLANs in range 1024-3071. If the - switch receives frames with such VIDs, it will - misinterpret them as DSA tags. - - Switch uses Shared VLAN Learning (FDB lookup uses - only DMAC as key). - - When VLANs span cross-chip topologies, the total - number of permitted VLANs may be less than 7 per - port, due to a maximum number of 32 VLAN retagging - rules per switch. - Configuration mode: runtime - Type: bool. diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index c536db2cc0f9..7684ae5c4a4a 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -40,5 +40,6 @@ parameters, info versions, and other features it supports. mv88e6xxx netdevsim nfp + sja1105 qed ti-cpsw-switch diff --git a/Documentation/networking/devlink/sja1105.rst b/Documentation/networking/devlink/sja1105.rst new file mode 100644 index 000000000000..e2679c274085 --- /dev/null +++ b/Documentation/networking/devlink/sja1105.rst @@ -0,0 +1,49 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +sja1105 devlink support +======================= + +This document describes the devlink features implemented +by the ``sja1105`` device driver. + +Parameters +========== + +.. list-table:: Driver-specific parameters implemented + :widths: 5 5 5 85 + + * - Name + - Type + - Mode + - Description + * - ``best_effort_vlan_filtering`` + - Boolean + - runtime + - Allow plain ETH_P_8021Q headers to be used as DSA tags. + + Benefits: + + - Can terminate untagged traffic over switch net + devices even when enslaved to a bridge with + vlan_filtering=1. + - Can terminate VLAN-tagged traffic over switch net + devices even when enslaved to a bridge with + vlan_filtering=1, with some constraints (no more than + 7 non-pvid VLANs per user port). + - Can do QoS based on VLAN PCP and VLAN membership + admission control for autonomously forwarded frames + (regardless of whether they can be terminated on the + CPU or not). + + Drawbacks: + + - User cannot use VLANs in range 1024-3071. If the + switch receives frames with such VIDs, it will + misinterpret them as DSA tags. + - Switch uses Shared VLAN Learning (FDB lookup uses + only DMAC as key). + - When VLANs span cross-chip topologies, the total + number of permitted VLANs may be less than 7 per + port, due to a maximum number of 32 VLAN retagging + rules per switch. diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst index b6bbc17814fb..7395a33baaf9 100644 --- a/Documentation/networking/dsa/sja1105.rst +++ b/Documentation/networking/dsa/sja1105.rst @@ -103,11 +103,11 @@ the switch net devices: +-------------+-----------+--------------+------------+ | | Mode 1 | Mode 2 | Mode 3 | +=============+===========+==============+============+ -| Regular | Yes | No | Yes | +| Regular | Yes | No | Yes | | traffic | | (use master) | | +-------------+-----------+--------------+------------+ | Management | Yes | Yes | Yes | -| traffic | | | | +| traffic | | | | | (BPDU, PTP) | | | | +-------------+-----------+--------------+------------+ @@ -241,6 +241,7 @@ switch. In this case, SJA1105 switch 1 consumes a total of 11 retagging entries, as follows: + - 8 retagging entries for VLANs 1 and 100 installed on its user ports (``sw1p0`` - ``sw1p3``) - 3 retagging entries for VLAN 100 installed on the user ports of SJA1105 @@ -249,6 +250,7 @@ follows: reverse retagging. SJA1105 switch 2 also consumes 11 retagging entries, but organized as follows: + - 7 retagging entries for the bridge VLANs on its user ports (``sw2p0`` - ``sw2p3``). - 4 retagging entries for VLAN 100 installed on the user ports of SJA1105 diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index d42661b91128..82470c36c27a 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1028,11 +1028,11 @@ Request contents: +--------------------------------------------+--------+-----------------------+ | ``ETHTOOL_A_CABLE_TEST_TDR_CFG`` | nested | test configuration | +-+------------------------------------------+--------+-----------------------+ - | | ``ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE `` | u32 | first data distance | + | | ``ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE`` | u32 | first data distance | +-+-+----------------------------------------+--------+-----------------------+ - | | ``ETHTOOL_A_CABLE_STEP_LAST_DISTANCE `` | u32 | last data distance | + | | ``ETHTOOL_A_CABLE_STEP_LAST_DISTANCE`` | u32 | last data distance | +-+-+----------------------------------------+--------+-----------------------+ - | | ``ETHTOOL_A_CABLE_STEP_STEP_DISTANCE `` | u32 | distance of each step | + | | ``ETHTOOL_A_CABLE_STEP_STEP_DISTANCE`` | u32 | distance of each step | +-+-+----------------------------------------+--------+-----------------------+ | | ``ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR`` | u8 | pair to test | +-+-+----------------------------------------+--------+-----------------------+ @@ -1085,11 +1085,11 @@ used to report the amplitude of the reflection for a given pair. +-+-+-----------------------------------------+--------+----------------------+ | | ``ETHTOOL_A_CABLE_NEST_STEP`` | nested | TDR step info | +-+-+-----------------------------------------+--------+----------------------+ - | | | ``ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE ``| u32 | First data distance | + | | | ``ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE`` | u32 | First data distance | +-+-+-----------------------------------------+--------+----------------------+ - | | | ``ETHTOOL_A_CABLE_STEP_LAST_DISTANCE `` | u32 | Last data distance | + | | | ``ETHTOOL_A_CABLE_STEP_LAST_DISTANCE`` | u32 | Last data distance | +-+-+-----------------------------------------+--------+----------------------+ - | | | ``ETHTOOL_A_CABLE_STEP_STEP_DISTANCE `` | u32 | distance of each step| + | | | ``ETHTOOL_A_CABLE_STEP_STEP_DISTANCE`` | u32 | distance of each step| +-+-+-----------------------------------------+--------+----------------------+ | | ``ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE`` | nested | Reflection amplitude | +-+-+-----------------------------------------+--------+----------------------+ diff --git a/Documentation/networking/mac80211-injection.rst b/Documentation/networking/mac80211-injection.rst index be65f886ff1f..63ba6611fdff 100644 --- a/Documentation/networking/mac80211-injection.rst +++ b/Documentation/networking/mac80211-injection.rst @@ -101,6 +101,6 @@ interface), along the following lines::: You can also find a link to a complete inject application here: -http://wireless.kernel.org/en/users/Documentation/packetspammer +https://wireless.wiki.kernel.org/en/users/Documentation/packetspammer Andy Green <andy@warmcat.com> diff --git a/Documentation/networking/regulatory.rst b/Documentation/networking/regulatory.rst index 8701b91e81ee..16782a95b74a 100644 --- a/Documentation/networking/regulatory.rst +++ b/Documentation/networking/regulatory.rst @@ -9,7 +9,7 @@ regulatory infrastructure works. More up to date information can be obtained at the project's web page: -http://wireless.kernel.org/en/developers/Regulatory +https://wireless.wiki.kernel.org/en/developers/Regulatory Keeping regulatory domains in userspace --------------------------------------- @@ -37,7 +37,7 @@ expected regulatory domains will be respected by the kernel. A currently available userspace agent which can accomplish this is CRDA - central regulatory domain agent. Its documented here: -http://wireless.kernel.org/en/developers/Regulatory/CRDA +https://wireless.wiki.kernel.org/en/developers/Regulatory/CRDA Essentially the kernel will send a udev event when it knows it needs a new regulatory domain. A udev rule can be put in place @@ -58,7 +58,7 @@ Who asks for regulatory domains? Users can use iw: -http://wireless.kernel.org/en/users/Documentation/iw +https://wireless.wiki.kernel.org/en/users/Documentation/iw An example:: diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index f26a7a15551a..4c2553672b6f 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -2590,11 +2590,22 @@ int chcr_aead_dma_map(struct device *dev, struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(tfm); - int dst_size; + int src_len, dst_len; - dst_size = req->assoclen + req->cryptlen + (op_type ? - 0 : authsize); - if (!req->cryptlen || !dst_size) + /* calculate and handle src and dst sg length separately + * for inplace and out-of place operations + */ + if (req->src == req->dst) { + src_len = req->assoclen + req->cryptlen + (op_type ? + 0 : authsize); + dst_len = src_len; + } else { + src_len = req->assoclen + req->cryptlen; + dst_len = req->assoclen + req->cryptlen + (op_type ? + -authsize : authsize); + } + + if (!req->cryptlen || !src_len || !dst_len) return 0; reqctx->iv_dma = dma_map_single(dev, reqctx->iv, (IV + reqctx->b0_len), DMA_BIDIRECTIONAL); @@ -2606,20 +2617,23 @@ int chcr_aead_dma_map(struct device *dev, reqctx->b0_dma = 0; if (req->src == req->dst) { error = dma_map_sg(dev, req->src, - sg_nents_for_len(req->src, dst_size), + sg_nents_for_len(req->src, src_len), DMA_BIDIRECTIONAL); if (!error) goto err; } else { - error = dma_map_sg(dev, req->src, sg_nents(req->src), + error = dma_map_sg(dev, req->src, + sg_nents_for_len(req->src, src_len), DMA_TO_DEVICE); if (!error) goto err; - error = dma_map_sg(dev, req->dst, sg_nents(req->dst), + error = dma_map_sg(dev, req->dst, + sg_nents_for_len(req->dst, dst_len), DMA_FROM_DEVICE); if (!error) { - dma_unmap_sg(dev, req->src, sg_nents(req->src), - DMA_TO_DEVICE); + dma_unmap_sg(dev, req->src, + sg_nents_for_len(req->src, src_len), + DMA_TO_DEVICE); goto err; } } @@ -2637,24 +2651,37 @@ void chcr_aead_dma_unmap(struct device *dev, struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(tfm); - int dst_size; + int src_len, dst_len; - dst_size = req->assoclen + req->cryptlen + (op_type ? - 0 : authsize); - if (!req->cryptlen || !dst_size) + /* calculate and handle src and dst sg length separately + * for inplace and out-of place operations + */ + if (req->src == req->dst) { + src_len = req->assoclen + req->cryptlen + (op_type ? + 0 : authsize); + dst_len = src_len; + } else { + src_len = req->assoclen + req->cryptlen; + dst_len = req->assoclen + req->cryptlen + (op_type ? + -authsize : authsize); + } + + if (!req->cryptlen || !src_len || !dst_len) return; dma_unmap_single(dev, reqctx->iv_dma, (IV + reqctx->b0_len), DMA_BIDIRECTIONAL); if (req->src == req->dst) { dma_unmap_sg(dev, req->src, - sg_nents_for_len(req->src, dst_size), + sg_nents_for_len(req->src, src_len), DMA_BIDIRECTIONAL); } else { - dma_unmap_sg(dev, req->src, sg_nents(req->src), - DMA_TO_DEVICE); - dma_unmap_sg(dev, req->dst, sg_nents(req->dst), - DMA_FROM_DEVICE); + dma_unmap_sg(dev, req->src, + sg_nents_for_len(req->src, src_len), + DMA_TO_DEVICE); + dma_unmap_sg(dev, req->dst, + sg_nents_for_len(req->dst, dst_len), + DMA_FROM_DEVICE); } } @@ -4364,22 +4391,32 @@ static int chcr_unregister_alg(void) for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_SKCIPHER: - if (driver_algs[i].is_registered) + if (driver_algs[i].is_registered && refcount_read( + &driver_algs[i].alg.skcipher.base.cra_refcnt) + == 1) { crypto_unregister_skcipher( &driver_algs[i].alg.skcipher); + driver_algs[i].is_registered = 0; + } break; case CRYPTO_ALG_TYPE_AEAD: - if (driver_algs[i].is_registered) + if (driver_algs[i].is_registered && refcount_read( + &driver_algs[i].alg.aead.base.cra_refcnt) == 1) { crypto_unregister_aead( &driver_algs[i].alg.aead); + driver_algs[i].is_registered = 0; + } break; case CRYPTO_ALG_TYPE_AHASH: - if (driver_algs[i].is_registered) + if (driver_algs[i].is_registered && refcount_read( + &driver_algs[i].alg.hash.halg.base.cra_refcnt) + == 1) { crypto_unregister_ahash( &driver_algs[i].alg.hash); + driver_algs[i].is_registered = 0; + } break; } - driver_algs[i].is_registered = 0; } return 0; } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a25c65d4af71..004919aea5fb 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3687,8 +3687,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd case BOND_RELEASE_OLD: case SIOCBONDRELEASE: res = bond_release(bond_dev, slave_dev); - if (!res) - netdev_update_lockdep_key(slave_dev); break; case BOND_SETHWADDR_OLD: case SIOCBONDSETHWADDR: diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 215c10923289..ddb3916d3506 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1398,8 +1398,6 @@ static int bond_option_slaves_set(struct bonding *bond, case '-': slave_dbg(bond->dev, dev, "Releasing interface\n"); ret = bond_release(bond->dev, dev); - if (!ret) - netdev_update_lockdep_key(dev); break; default: diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 36290a8e2a84..5b9d7c60eebc 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2558,13 +2558,16 @@ static int macb_open(struct net_device *dev) err = macb_phylink_connect(bp); if (err) - goto pm_exit; + goto napi_exit; netif_tx_start_all_queues(dev); if (bp->ptp_info) bp->ptp_info->ptp_init(dev); +napi_exit: + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) + napi_disable(&queue->napi); pm_exit: if (err) { pm_runtime_put_sync(&bp->pdev->dev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 8fb48de5d18c..f150cd454fa4 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -2907,8 +2907,9 @@ static int setup_dpni(struct fsl_mc_device *ls_dev) if (err && err != -EOPNOTSUPP) goto close; - priv->cls_rules = devm_kzalloc(dev, sizeof(struct dpaa2_eth_cls_rule) * - dpaa2_eth_fs_count(priv), GFP_KERNEL); + priv->cls_rules = devm_kcalloc(dev, dpaa2_eth_fs_count(priv), + sizeof(struct dpaa2_eth_cls_rule), + GFP_KERNEL); if (!priv->cls_rules) { err = -ENOMEM; goto close; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 197dc5b2c090..1b4d04e4474b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -5184,6 +5184,9 @@ static int ibmvnic_remove(struct vio_dev *dev) adapter->state = VNIC_REMOVING; spin_unlock_irqrestore(&adapter->state_lock, flags); + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + rtnl_lock(); unregister_netdevice(netdev); diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index bcd11b4b29df..10b805ba03ee 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -87,6 +87,10 @@ struct iavf_vsi { #define IAVF_HLUT_ARRAY_SIZE ((IAVF_VFQF_HLUT_MAX_INDEX + 1) * 4) #define IAVF_MBPS_DIVISOR 125000 /* divisor to convert to Mbps */ +#define IAVF_VIRTCHNL_VF_RESOURCE_SIZE (sizeof(struct virtchnl_vf_resource) + \ + (IAVF_MAX_VF_VSI * \ + sizeof(struct virtchnl_vsi_resource))) + /* MAX_MSIX_Q_VECTORS of these are allocated, * but we only use one per queue-specific vector. */ @@ -215,6 +219,10 @@ struct iavf_cloud_filter { bool add; /* filter needs to be added */ }; +#define IAVF_RESET_WAIT_MS 10 +#define IAVF_RESET_WAIT_DETECTED_COUNT 500 +#define IAVF_RESET_WAIT_COMPLETE_COUNT 2000 + /* board specific private data structure */ struct iavf_adapter { struct work_struct reset_task; @@ -306,6 +314,14 @@ struct iavf_adapter { bool netdev_registered; bool link_up; enum virtchnl_link_speed link_speed; + /* This is only populated if the VIRTCHNL_VF_CAP_ADV_LINK_SPEED is set + * in vf_res->vf_cap_flags. Use ADV_LINK_SUPPORT macro to determine if + * this field is valid. This field should be used going forward and the + * enum virtchnl_link_speed above should be considered the legacy way of + * storing/communicating link speeds. + */ + u32 link_speed_mbps; + enum virtchnl_ops current_op; #define CLIENT_ALLOWED(_a) ((_a)->vf_res ? \ (_a)->vf_res->vf_cap_flags & \ @@ -322,6 +338,8 @@ struct iavf_adapter { VIRTCHNL_VF_OFFLOAD_RSS_PF))) #define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \ VIRTCHNL_VF_OFFLOAD_VLAN) +#define ADV_LINK_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \ + VIRTCHNL_VF_CAP_ADV_LINK_SPEED) struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */ struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */ struct virtchnl_version_info pf_version; diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 2c39d46b6138..181573822942 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -278,35 +278,46 @@ static int iavf_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_zero_link_mode(cmd, supported); cmd->base.autoneg = AUTONEG_DISABLE; cmd->base.port = PORT_NONE; - /* Set speed and duplex */ + cmd->base.duplex = DUPLEX_FULL; + + if (ADV_LINK_SUPPORT(adapter)) { + if (adapter->link_speed_mbps && + adapter->link_speed_mbps < U32_MAX) + cmd->base.speed = adapter->link_speed_mbps; + else + cmd->base.speed = SPEED_UNKNOWN; + + return 0; + } + switch (adapter->link_speed) { - case IAVF_LINK_SPEED_40GB: + case VIRTCHNL_LINK_SPEED_40GB: cmd->base.speed = SPEED_40000; break; - case IAVF_LINK_SPEED_25GB: -#ifdef SPEED_25000 + case VIRTCHNL_LINK_SPEED_25GB: cmd->base.speed = SPEED_25000; -#else - netdev_info(netdev, - "Speed is 25G, display not supported by this version of ethtool.\n"); -#endif break; - case IAVF_LINK_SPEED_20GB: + case VIRTCHNL_LINK_SPEED_20GB: cmd->base.speed = SPEED_20000; break; - case IAVF_LINK_SPEED_10GB: + case VIRTCHNL_LINK_SPEED_10GB: cmd->base.speed = SPEED_10000; break; - case IAVF_LINK_SPEED_1GB: + case VIRTCHNL_LINK_SPEED_5GB: + cmd->base.speed = SPEED_5000; + break; + case VIRTCHNL_LINK_SPEED_2_5GB: + cmd->base.speed = SPEED_2500; + break; + case VIRTCHNL_LINK_SPEED_1GB: cmd->base.speed = SPEED_1000; break; - case IAVF_LINK_SPEED_100MB: + case VIRTCHNL_LINK_SPEED_100MB: cmd->base.speed = SPEED_100; break; default: break; } - cmd->base.duplex = DUPLEX_FULL; return 0; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2050649848ba..fa82768e5eda 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1756,17 +1756,17 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; struct iavf_hw *hw = &adapter->hw; - int err = 0, bufsz; + int err; WARN_ON(adapter->state != __IAVF_INIT_GET_RESOURCES); /* aq msg sent, awaiting reply */ if (!adapter->vf_res) { - bufsz = sizeof(struct virtchnl_vf_resource) + - (IAVF_MAX_VF_VSI * - sizeof(struct virtchnl_vsi_resource)); - adapter->vf_res = kzalloc(bufsz, GFP_KERNEL); - if (!adapter->vf_res) + adapter->vf_res = kzalloc(IAVF_VIRTCHNL_VF_RESOURCE_SIZE, + GFP_KERNEL); + if (!adapter->vf_res) { + err = -ENOMEM; goto err; + } } err = iavf_get_vf_config(adapter); if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK) { @@ -2036,7 +2036,7 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) iavf_reset_interrupt_capability(adapter); iavf_free_queues(adapter); iavf_free_q_vectors(adapter); - kfree(adapter->vf_res); + memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); adapter->netdev->flags &= ~IFF_UP; clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); @@ -2046,8 +2046,6 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n"); } -#define IAVF_RESET_WAIT_MS 10 -#define IAVF_RESET_WAIT_COUNT 500 /** * iavf_reset_task - Call-back task to handle hardware reset * @work: pointer to work_struct @@ -2101,20 +2099,20 @@ static void iavf_reset_task(struct work_struct *work) adapter->flags |= IAVF_FLAG_RESET_PENDING; /* poll until we see the reset actually happen */ - for (i = 0; i < IAVF_RESET_WAIT_COUNT; i++) { + for (i = 0; i < IAVF_RESET_WAIT_DETECTED_COUNT; i++) { reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) break; usleep_range(5000, 10000); } - if (i == IAVF_RESET_WAIT_COUNT) { + if (i == IAVF_RESET_WAIT_DETECTED_COUNT) { dev_info(&adapter->pdev->dev, "Never saw reset\n"); goto continue_reset; /* act like the reset happened */ } /* wait until the reset is complete and the PF is responding to us */ - for (i = 0; i < IAVF_RESET_WAIT_COUNT; i++) { + for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { /* sleep first to make sure a minimum wait time is met */ msleep(IAVF_RESET_WAIT_MS); @@ -2126,7 +2124,7 @@ static void iavf_reset_task(struct work_struct *work) pci_set_master(adapter->pdev); - if (i == IAVF_RESET_WAIT_COUNT) { + if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", reg_val); iavf_disable_vf(adapter); @@ -2487,29 +2485,46 @@ static int iavf_validate_tx_bandwidth(struct iavf_adapter *adapter, { int speed = 0, ret = 0; + if (ADV_LINK_SUPPORT(adapter)) { + if (adapter->link_speed_mbps < U32_MAX) { + speed = adapter->link_speed_mbps; + goto validate_bw; + } else { + dev_err(&adapter->pdev->dev, "Unknown link speed\n"); + return -EINVAL; + } + } + switch (adapter->link_speed) { - case IAVF_LINK_SPEED_40GB: - speed = 40000; + case VIRTCHNL_LINK_SPEED_40GB: + speed = SPEED_40000; + break; + case VIRTCHNL_LINK_SPEED_25GB: + speed = SPEED_25000; + break; + case VIRTCHNL_LINK_SPEED_20GB: + speed = SPEED_20000; break; - case IAVF_LINK_SPEED_25GB: - speed = 25000; + case VIRTCHNL_LINK_SPEED_10GB: + speed = SPEED_10000; break; - case IAVF_LINK_SPEED_20GB: - speed = 20000; + case VIRTCHNL_LINK_SPEED_5GB: + speed = SPEED_5000; break; - case IAVF_LINK_SPEED_10GB: - speed = 10000; + case VIRTCHNL_LINK_SPEED_2_5GB: + speed = SPEED_2500; break; - case IAVF_LINK_SPEED_1GB: - speed = 1000; + case VIRTCHNL_LINK_SPEED_1GB: + speed = SPEED_1000; break; - case IAVF_LINK_SPEED_100MB: - speed = 100; + case VIRTCHNL_LINK_SPEED_100MB: + speed = SPEED_100; break; default: break; } +validate_bw: if (max_tx_rate > speed) { dev_err(&adapter->pdev->dev, "Invalid tx rate specified\n"); @@ -3412,7 +3427,7 @@ static int iavf_check_reset_complete(struct iavf_hw *hw) u32 rstat; int i; - for (i = 0; i < 100; i++) { + for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { rstat = rd32(hw, IAVF_VFGEN_RSTAT) & IAVF_VFGEN_RSTAT_VFR_STATE_MASK; if ((rstat == VIRTCHNL_VFR_VFACTIVE) || diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 7a30d5d5ef53..e091bab7e770 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -379,19 +379,19 @@ static inline unsigned int iavf_itr_divisor(struct iavf_q_vector *q_vector) unsigned int divisor; switch (q_vector->adapter->link_speed) { - case IAVF_LINK_SPEED_40GB: + case VIRTCHNL_LINK_SPEED_40GB: divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 1024; break; - case IAVF_LINK_SPEED_25GB: - case IAVF_LINK_SPEED_20GB: + case VIRTCHNL_LINK_SPEED_25GB: + case VIRTCHNL_LINK_SPEED_20GB: divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 512; break; default: - case IAVF_LINK_SPEED_10GB: + case VIRTCHNL_LINK_SPEED_10GB: divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 256; break; - case IAVF_LINK_SPEED_1GB: - case IAVF_LINK_SPEED_100MB: + case VIRTCHNL_LINK_SPEED_1GB: + case VIRTCHNL_LINK_SPEED_100MB: divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 32; break; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index d58374c2c33d..ed08ace4f05a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -139,7 +139,8 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter) VIRTCHNL_VF_OFFLOAD_ENCAP | VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | VIRTCHNL_VF_OFFLOAD_REQ_QUEUES | - VIRTCHNL_VF_OFFLOAD_ADQ; + VIRTCHNL_VF_OFFLOAD_ADQ | + VIRTCHNL_VF_CAP_ADV_LINK_SPEED; adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; adapter->aq_required &= ~IAVF_FLAG_AQ_GET_CONFIG; @@ -891,6 +892,8 @@ void iavf_disable_vlan_stripping(struct iavf_adapter *adapter) iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, NULL, 0); } +#define IAVF_MAX_SPEED_STRLEN 13 + /** * iavf_print_link_message - print link up or down * @adapter: adapter structure @@ -900,37 +903,105 @@ void iavf_disable_vlan_stripping(struct iavf_adapter *adapter) static void iavf_print_link_message(struct iavf_adapter *adapter) { struct net_device *netdev = adapter->netdev; - char *speed = "Unknown "; + int link_speed_mbps; + char *speed; if (!adapter->link_up) { netdev_info(netdev, "NIC Link is Down\n"); return; } + speed = kcalloc(1, IAVF_MAX_SPEED_STRLEN, GFP_KERNEL); + if (!speed) + return; + + if (ADV_LINK_SUPPORT(adapter)) { + link_speed_mbps = adapter->link_speed_mbps; + goto print_link_msg; + } + switch (adapter->link_speed) { - case IAVF_LINK_SPEED_40GB: - speed = "40 G"; + case VIRTCHNL_LINK_SPEED_40GB: + link_speed_mbps = SPEED_40000; + break; + case VIRTCHNL_LINK_SPEED_25GB: + link_speed_mbps = SPEED_25000; + break; + case VIRTCHNL_LINK_SPEED_20GB: + link_speed_mbps = SPEED_20000; break; - case IAVF_LINK_SPEED_25GB: - speed = "25 G"; + case VIRTCHNL_LINK_SPEED_10GB: + link_speed_mbps = SPEED_10000; break; - case IAVF_LINK_SPEED_20GB: - speed = "20 G"; + case VIRTCHNL_LINK_SPEED_5GB: + link_speed_mbps = SPEED_5000; break; - case IAVF_LINK_SPEED_10GB: - speed = "10 G"; + case VIRTCHNL_LINK_SPEED_2_5GB: + link_speed_mbps = SPEED_2500; break; - case IAVF_LINK_SPEED_1GB: - speed = "1000 M"; + case VIRTCHNL_LINK_SPEED_1GB: + link_speed_mbps = SPEED_1000; break; - case IAVF_LINK_SPEED_100MB: - speed = "100 M"; + case VIRTCHNL_LINK_SPEED_100MB: + link_speed_mbps = SPEED_100; break; default: + link_speed_mbps = SPEED_UNKNOWN; break; } - netdev_info(netdev, "NIC Link is Up %sbps Full Duplex\n", speed); +print_link_msg: + if (link_speed_mbps > SPEED_1000) { + if (link_speed_mbps == SPEED_2500) + snprintf(speed, IAVF_MAX_SPEED_STRLEN, "2.5 Gbps"); + else + /* convert to Gbps inline */ + snprintf(speed, IAVF_MAX_SPEED_STRLEN, "%d %s", + link_speed_mbps / 1000, "Gbps"); + } else if (link_speed_mbps == SPEED_UNKNOWN) { + snprintf(speed, IAVF_MAX_SPEED_STRLEN, "%s", "Unknown Mbps"); + } else { + snprintf(speed, IAVF_MAX_SPEED_STRLEN, "%u %s", + link_speed_mbps, "Mbps"); + } + + netdev_info(netdev, "NIC Link is Up Speed is %s Full Duplex\n", speed); + kfree(speed); +} + +/** + * iavf_get_vpe_link_status + * @adapter: adapter structure + * @vpe: virtchnl_pf_event structure + * + * Helper function for determining the link status + **/ +static bool +iavf_get_vpe_link_status(struct iavf_adapter *adapter, + struct virtchnl_pf_event *vpe) +{ + if (ADV_LINK_SUPPORT(adapter)) + return vpe->event_data.link_event_adv.link_status; + else + return vpe->event_data.link_event.link_status; +} + +/** + * iavf_set_adapter_link_speed_from_vpe + * @adapter: adapter structure for which we are setting the link speed + * @vpe: virtchnl_pf_event structure that contains the link speed we are setting + * + * Helper function for setting iavf_adapter link speed + **/ +static void +iavf_set_adapter_link_speed_from_vpe(struct iavf_adapter *adapter, + struct virtchnl_pf_event *vpe) +{ + if (ADV_LINK_SUPPORT(adapter)) + adapter->link_speed_mbps = + vpe->event_data.link_event_adv.link_speed; + else + adapter->link_speed = vpe->event_data.link_event.link_speed; } /** @@ -1160,12 +1231,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, if (v_opcode == VIRTCHNL_OP_EVENT) { struct virtchnl_pf_event *vpe = (struct virtchnl_pf_event *)msg; - bool link_up = vpe->event_data.link_event.link_status; + bool link_up = iavf_get_vpe_link_status(adapter, vpe); switch (vpe->event) { case VIRTCHNL_EVENT_LINK_CHANGE: - adapter->link_speed = - vpe->event_data.link_event.link_speed; + iavf_set_adapter_link_speed_from_vpe(adapter, vpe); /* we've already got the right link status, bail */ if (adapter->link_up == link_up) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 4cc9abd61c43..946925bbcb2d 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -452,11 +452,17 @@ struct mvneta_pcpu_port { u32 cause_rx_tx; }; +enum { + __MVNETA_DOWN, +}; + struct mvneta_port { u8 id; struct mvneta_pcpu_port __percpu *ports; struct mvneta_pcpu_stats __percpu *stats; + unsigned long state; + int pkt_size; void __iomem *base; struct mvneta_rx_queue *rxqs; @@ -2113,6 +2119,9 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame, struct netdev_queue *nq; u32 ret; + if (unlikely(test_bit(__MVNETA_DOWN, &pp->state))) + return -ENETDOWN; + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; @@ -3568,12 +3577,16 @@ static void mvneta_start_dev(struct mvneta_port *pp) phylink_start(pp->phylink); netif_tx_start_all_queues(pp->dev); + + clear_bit(__MVNETA_DOWN, &pp->state); } static void mvneta_stop_dev(struct mvneta_port *pp) { unsigned int cpu; + set_bit(__MVNETA_DOWN, &pp->state); + phylink_stop(pp->phylink); if (!pp->neta_armada3700) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index e94f0c4d74a7..a99fe4b02b9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -283,7 +283,6 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev) goto params_reg_err; mlx5_devlink_set_params_init_values(devlink); devlink_params_publish(devlink); - devlink_reload_enable(devlink); return 0; params_reg_err: @@ -293,7 +292,6 @@ params_reg_err: void mlx5_devlink_unregister(struct devlink *devlink) { - devlink_reload_disable(devlink); devlink_params_unregister(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); devlink_unregister(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index afc19dca1f5f..430025550fad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -328,21 +328,21 @@ mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, case FLOW_ACT_MANGLE_HDR_TYPE_IP6: MLX5_SET(set_action_in, modact, length, 0); - if (offset == offsetof(struct ipv6hdr, saddr)) + if (offset == offsetof(struct ipv6hdr, saddr) + 12) field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; - else if (offset == offsetof(struct ipv6hdr, saddr) + 4) - field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; else if (offset == offsetof(struct ipv6hdr, saddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, saddr) + 4) field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; - else if (offset == offsetof(struct ipv6hdr, saddr) + 12) + else if (offset == offsetof(struct ipv6hdr, saddr)) field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; - else if (offset == offsetof(struct ipv6hdr, daddr)) + else if (offset == offsetof(struct ipv6hdr, daddr) + 12) field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; - else if (offset == offsetof(struct ipv6hdr, daddr) + 4) - field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; else if (offset == offsetof(struct ipv6hdr, daddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, daddr) + 4) field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; - else if (offset == offsetof(struct ipv6hdr, daddr) + 12) + else if (offset == offsetof(struct ipv6hdr, daddr)) field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; else return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index c28cbae42331..2c80205dc939 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -152,6 +152,10 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) mlx5e_close_cq(&c->xskicosq.cq); mlx5e_close_xdpsq(&c->xsksq); mlx5e_close_cq(&c->xsksq.cq); + + memset(&c->xskrq, 0, sizeof(c->xskrq)); + memset(&c->xsksq, 0, sizeof(c->xsksq)); + memset(&c->xskicosq, 0, sizeof(c->xskicosq)); } void mlx5e_activate_xsk(struct mlx5e_channel *c) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3ef2525e8de9..ec5658bbe3c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1173,7 +1173,8 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rss_params *rss = &priv->rss_params; int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - bool hash_changed = false; + bool refresh_tirs = false; + bool refresh_rqt = false; void *in; if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && @@ -1189,36 +1190,38 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) { rss->hfunc = hfunc; - hash_changed = true; + refresh_rqt = true; + refresh_tirs = true; } if (indir) { memcpy(rss->indirection_rqt, indir, sizeof(rss->indirection_rqt)); - - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - u32 rqtn = priv->indir_rqt.rqtn; - struct mlx5e_redirect_rqt_param rrp = { - .is_rss = true, - { - .rss = { - .hfunc = rss->hfunc, - .channels = &priv->channels, - }, - }, - }; - - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); - } + refresh_rqt = true; } if (key) { memcpy(rss->toeplitz_hash_key, key, sizeof(rss->toeplitz_hash_key)); - hash_changed = hash_changed || rss->hfunc == ETH_RSS_HASH_TOP; + refresh_tirs = refresh_tirs || rss->hfunc == ETH_RSS_HASH_TOP; + } + + if (refresh_rqt && test_bit(MLX5E_STATE_OPENED, &priv->state)) { + struct mlx5e_redirect_rqt_param rrp = { + .is_rss = true, + { + .rss = { + .hfunc = rss->hfunc, + .channels = &priv->channels, + }, + }, + }; + u32 rqtn = priv->indir_rqt.rqtn; + + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); } - if (hash_changed) + if (refresh_tirs) mlx5e_modify_tirs_hash(priv, in); mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index 9bda4fe2eafa..5dc335e621c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -162,10 +162,12 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { counter = mlx5_fc_create(esw->dev, false); - if (IS_ERR(counter)) + if (IS_ERR(counter)) { esw_warn(esw->dev, "vport[%d] configure ingress drop rule counter failed\n", vport->vport); + counter = NULL; + } vport->ingress.legacy.drop_counter = counter; } @@ -272,7 +274,7 @@ void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, esw_acl_ingress_table_destroy(vport); clean_drop_counter: - if (!IS_ERR_OR_NULL(vport->ingress.legacy.drop_counter)) { + if (vport->ingress.legacy.drop_counter) { mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); vport->ingress.legacy.drop_counter = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index c0cfbab15fe9..b31f769d2df9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -192,15 +192,23 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev) void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) { + bool err_detected = false; + + /* Mark the device as fatal in order to abort FW commands */ + if ((check_fatal_sensors(dev) || force) && + dev->state == MLX5_DEVICE_STATE_UP) { + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + err_detected = true; + } mutex_lock(&dev->intf_state_mutex); - if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - goto unlock; + if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto unlock;/* a previous error is still being handled */ if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; goto unlock; } - if (check_fatal_sensors(dev) || force) { + if (check_fatal_sensors(dev) || force) { /* protected state setting */ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mlx5_cmd_flush(dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index df46b1fce3a7..8b658908f044 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -785,6 +785,11 @@ err_disable: static void mlx5_pci_close(struct mlx5_core_dev *dev) { + /* health work might still be active, and it needs pci bar in + * order to know the NIC state. Therefore, drain the health WQ + * before removing the pci bars + */ + mlx5_drain_health_wq(dev); iounmap(dev->iseg); pci_clear_master(dev->pdev); release_bar(dev->pdev); @@ -1194,23 +1199,22 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) if (err) goto err_load; + set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + if (boot) { err = mlx5_devlink_register(priv_to_devlink(dev), dev->device); if (err) goto err_devlink_reg; - } - - if (mlx5_device_registered(dev)) - mlx5_attach_device(dev); - else mlx5_register_device(dev); - - set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + } else { + mlx5_attach_device(dev); + } mutex_unlock(&dev->intf_state_mutex); return 0; err_devlink_reg: + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); err_load: if (boot) @@ -1226,10 +1230,15 @@ out: void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { - if (cleanup) + mutex_lock(&dev->intf_state_mutex); + + if (cleanup) { mlx5_unregister_device(dev); + mlx5_devlink_unregister(priv_to_devlink(dev)); + } else { + mlx5_detach_device(dev); + } - mutex_lock(&dev->intf_state_mutex); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__); @@ -1240,9 +1249,6 @@ void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - if (mlx5_device_registered(dev)) - mlx5_detach_device(dev); - mlx5_unload(dev); if (cleanup) @@ -1275,11 +1281,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) priv->dbg_root = debugfs_create_dir(dev_name(dev->device), mlx5_debugfs_root); - if (!priv->dbg_root) { - dev_err(dev->device, "mlx5_core: error, Cannot create debugfs dir, aborting\n"); - goto err_dbg_root; - } - err = mlx5_health_init(dev); if (err) goto err_health_init; @@ -1294,7 +1295,6 @@ err_pagealloc_init: mlx5_health_cleanup(dev); err_health_init: debugfs_remove(dev->priv.dbg_root); -err_dbg_root: mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); mutex_destroy(&priv->bfregs.wc_head.lock); @@ -1362,6 +1362,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); pci_save_state(pdev); + devlink_reload_enable(devlink); return 0; err_load_one: @@ -1379,9 +1380,8 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); + devlink_reload_disable(devlink); mlx5_crdump_disable(dev); - mlx5_devlink_unregister(devlink); - mlx5_drain_health_wq(dev); mlx5_unload_one(dev, true); mlx5_pci_close(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index f421013b0b54..2ca79b9bde1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -179,7 +179,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn); - kfree(in); + kvfree(in); if (err) goto err_in; dr_qp->uar = attr->uar; diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h index 23ccc0da2341..f5a910c458ba 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic.h +++ b/drivers/net/ethernet/pensando/ionic/ionic.h @@ -17,7 +17,6 @@ struct ionic_lif; #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF 0x1002 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003 -#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_MGMT 0x1004 #define DEVCMD_TIMEOUT 10 @@ -42,7 +41,6 @@ struct ionic { struct dentry *dentry; struct ionic_dev_bar bars[IONIC_BARS_MAX]; unsigned int num_bars; - bool is_mgmt_nic; struct ionic_identity ident; struct list_head lifs; struct ionic_lif *master_lif; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 60fc191a35e5..2924cde440aa 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -15,7 +15,6 @@ static const struct pci_device_id ionic_id_table[] = { { PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF) }, { PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF) }, - { PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_IONIC_ETH_MGMT) }, { 0, } /* end of table */ }; MODULE_DEVICE_TABLE(pci, ionic_id_table); @@ -225,9 +224,6 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, ionic); mutex_init(&ionic->dev_cmd_lock); - ionic->is_mgmt_nic = - ent->device == PCI_DEVICE_ID_PENSANDO_IONIC_ETH_MGMT; - /* Query system for DMA addressing limitation for the device. */ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(IONIC_ADDR_LEN)); if (err) { @@ -252,8 +248,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } pci_set_master(pdev); - if (!ionic->is_mgmt_nic) - pcie_print_link_status(pdev); + pcie_print_link_status(pdev); err = ionic_map_bars(ionic); if (err) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index 273c889faaad..2d590e571133 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -77,10 +77,6 @@ int ionic_devlink_register(struct ionic *ionic) return err; } - /* don't register the mgmt_nic as a port */ - if (ionic->is_mgmt_nic) - return 0; - devlink_port_attrs_set(&ionic->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, 0, false, 0, NULL, 0); err = devlink_port_register(dl, &ionic->dl_port, 0); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 7321a92f8395..9d8c969f21cb 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -99,9 +99,6 @@ static void ionic_link_status_check(struct ionic_lif *lif) if (!test_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state)) return; - if (lif->ionic->is_mgmt_nic) - return; - link_status = le16_to_cpu(lif->info->status.link_status); link_up = link_status == IONIC_PORT_OPER_STATUS_UP; @@ -116,7 +113,7 @@ static void ionic_link_status_check(struct ionic_lif *lif) netif_carrier_on(netdev); } - if (netif_running(lif->netdev)) + if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) ionic_start_queues(lif); } else { if (netif_carrier_ok(netdev)) { @@ -124,7 +121,7 @@ static void ionic_link_status_check(struct ionic_lif *lif) netif_carrier_off(netdev); } - if (netif_running(lif->netdev)) + if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) ionic_stop_queues(lif); } @@ -1193,10 +1190,6 @@ static int ionic_init_nic_features(struct ionic_lif *lif) netdev_features_t features; int err; - /* no netdev features on the management device */ - if (lif->ionic->is_mgmt_nic) - return 0; - /* set up what we expect to support by default */ features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | @@ -2594,12 +2587,6 @@ int ionic_lifs_register(struct ionic *ionic) { int err; - /* the netdev is not registered on the management device, it is - * only used as a vehicle for napi operations on the adminq - */ - if (ionic->is_mgmt_nic) - return 0; - INIT_WORK(&ionic->nb_work, ionic_lif_notify_work); ionic->nb.notifier_call = ionic_lif_notify; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 87a4775ed53a..1492648247d9 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1981,7 +1981,7 @@ MODULE_DEVICE_TABLE(of, am65_cpsw_nuss_of_mtable); static int am65_cpsw_nuss_probe(struct platform_device *pdev) { - struct cpsw_ale_params ale_params; + struct cpsw_ale_params ale_params = { 0 }; const struct of_device_id *of_id; struct device *dev = &pdev->dev; struct am65_cpsw_common *common; diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 8dc6be11b2ff..9ad872bfae3a 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -604,10 +604,44 @@ void cpsw_ale_set_unreg_mcast(struct cpsw_ale *ale, int unreg_mcast_mask, } } +static void cpsw_ale_vlan_set_unreg_mcast(struct cpsw_ale *ale, u32 *ale_entry, + int allmulti) +{ + int unreg_mcast; + + unreg_mcast = + cpsw_ale_get_vlan_unreg_mcast(ale_entry, + ale->vlan_field_bits); + if (allmulti) + unreg_mcast |= ALE_PORT_HOST; + else + unreg_mcast &= ~ALE_PORT_HOST; + cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast, + ale->vlan_field_bits); +} + +static void +cpsw_ale_vlan_set_unreg_mcast_idx(struct cpsw_ale *ale, u32 *ale_entry, + int allmulti) +{ + int unreg_mcast; + int idx; + + idx = cpsw_ale_get_vlan_unreg_mcast_idx(ale_entry); + + unreg_mcast = readl(ale->params.ale_regs + ALE_VLAN_MASK_MUX(idx)); + + if (allmulti) + unreg_mcast |= ALE_PORT_HOST; + else + unreg_mcast &= ~ALE_PORT_HOST; + + writel(unreg_mcast, ale->params.ale_regs + ALE_VLAN_MASK_MUX(idx)); +} + void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti, int port) { u32 ale_entry[ALE_ENTRY_WORDS]; - int unreg_mcast = 0; int type, idx; for (idx = 0; idx < ale->params.ale_entries; idx++) { @@ -624,15 +658,12 @@ void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti, int port) if (port != -1 && !(vlan_members & BIT(port))) continue; - unreg_mcast = - cpsw_ale_get_vlan_unreg_mcast(ale_entry, - ale->vlan_field_bits); - if (allmulti) - unreg_mcast |= ALE_PORT_HOST; + if (!ale->params.nu_switch_ale) + cpsw_ale_vlan_set_unreg_mcast(ale, ale_entry, allmulti); else - unreg_mcast &= ~ALE_PORT_HOST; - cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast, - ale->vlan_field_bits); + cpsw_ale_vlan_set_unreg_mcast_idx(ale, ale_entry, + allmulti); + cpsw_ale_write(ale, idx, ale_entry); } } diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 60dcaf2a04a9..1ad6085994b1 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -113,6 +113,7 @@ static LIST_HEAD(bpq_devices); * off into a separate class since they always nest. */ static struct lock_class_key bpq_netdev_xmit_lock_key; +static struct lock_class_key bpq_netdev_addr_lock_key; static void bpq_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, @@ -123,6 +124,7 @@ static void bpq_set_lockdep_class_one(struct net_device *dev, static void bpq_set_lockdep_class(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, &bpq_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL); } diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c index 43faa35ae726..d4c2bc7ad24b 100644 --- a/drivers/net/ipa/ipa_data-sc7180.c +++ b/drivers/net/ipa/ipa_data-sc7180.c @@ -106,7 +106,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { [IPA_ENDPOINT_MODEM_LAN_RX] = { .ee_id = GSI_EE_MODEM, .channel_id = 3, - .endpoint_id = 13, + .endpoint_id = 11, .toward_ipa = false, }, [IPA_ENDPOINT_MODEM_AP_TX] = { diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 66649a806dd1..9f50d0d11704 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -32,6 +32,9 @@ /* The amount of RX buffer space consumed by standard skb overhead */ #define IPA_RX_BUFFER_OVERHEAD (PAGE_SIZE - SKB_MAX_ORDER(NET_SKB_PAD, 0)) +/* Where to find the QMAP mux_id for a packet within modem-supplied metadata */ +#define IPA_ENDPOINT_QMAP_METADATA_MASK 0x000000ff /* host byte order */ + #define IPA_ENDPOINT_RESET_AGGR_RETRY_MAX 3 #define IPA_AGGR_TIME_LIMIT_DEFAULT 1000 /* microseconds */ @@ -433,6 +436,24 @@ static void ipa_endpoint_init_cfg(struct ipa_endpoint *endpoint) iowrite32(val, endpoint->ipa->reg_virt + offset); } +/** + * We program QMAP endpoints so each packet received is preceded by a QMAP + * header structure. The QMAP header contains a 1-byte mux_id and 2-byte + * packet size field, and we have the IPA hardware populate both for each + * received packet. The header is configured (in the HDR_EXT register) + * to use big endian format. + * + * The packet size is written into the QMAP header's pkt_len field. That + * location is defined here using the HDR_OFST_PKT_SIZE field. + * + * The mux_id comes from a 4-byte metadata value supplied with each packet + * by the modem. It is *not* a QMAP header, but it does contain the mux_id + * value that we want, in its low-order byte. A bitmask defined in the + * endpoint's METADATA_MASK register defines which byte within the modem + * metadata contains the mux_id. And the OFST_METADATA field programmed + * here indicates where the extracted byte should be placed within the QMAP + * header. + */ static void ipa_endpoint_init_hdr(struct ipa_endpoint *endpoint) { u32 offset = IPA_REG_ENDP_INIT_HDR_N_OFFSET(endpoint->endpoint_id); @@ -441,25 +462,31 @@ static void ipa_endpoint_init_hdr(struct ipa_endpoint *endpoint) if (endpoint->data->qmap) { size_t header_size = sizeof(struct rmnet_map_header); + /* We might supply a checksum header after the QMAP header */ if (endpoint->toward_ipa && endpoint->data->checksum) header_size += sizeof(struct rmnet_map_ul_csum_header); - val |= u32_encode_bits(header_size, HDR_LEN_FMASK); - /* metadata is the 4 byte rmnet_map header itself */ - val |= HDR_OFST_METADATA_VALID_FMASK; - val |= u32_encode_bits(0, HDR_OFST_METADATA_FMASK); - /* HDR_ADDITIONAL_CONST_LEN is 0; (IPA->AP only) */ + + /* Define how to fill fields in a received QMAP header */ if (!endpoint->toward_ipa) { - u32 size_offset = offsetof(struct rmnet_map_header, - pkt_len); + u32 off; /* Field offset within header */ + + /* Where IPA will write the metadata value */ + off = offsetof(struct rmnet_map_header, mux_id); + val |= u32_encode_bits(off, HDR_OFST_METADATA_FMASK); + /* Where IPA will write the length */ + off = offsetof(struct rmnet_map_header, pkt_len); val |= HDR_OFST_PKT_SIZE_VALID_FMASK; - val |= u32_encode_bits(size_offset, - HDR_OFST_PKT_SIZE_FMASK); + val |= u32_encode_bits(off, HDR_OFST_PKT_SIZE_FMASK); } + /* For QMAP TX, metadata offset is 0 (modem assumes this) */ + val |= HDR_OFST_METADATA_VALID_FMASK; + + /* HDR_ADDITIONAL_CONST_LEN is 0; (RX only) */ /* HDR_A5_MUX is 0 */ /* HDR_LEN_INC_DEAGG_HDR is 0 */ - /* HDR_METADATA_REG_VALID is 0; (AP->IPA only) */ + /* HDR_METADATA_REG_VALID is 0 (TX only) */ } iowrite32(val, endpoint->ipa->reg_virt + offset); @@ -472,38 +499,27 @@ static void ipa_endpoint_init_hdr_ext(struct ipa_endpoint *endpoint) u32 val = 0; val |= HDR_ENDIANNESS_FMASK; /* big endian */ - val |= HDR_TOTAL_LEN_OR_PAD_VALID_FMASK; - /* HDR_TOTAL_LEN_OR_PAD is 0 (pad, not total_len) */ + + /* A QMAP header contains a 6 bit pad field at offset 0. The RMNet + * driver assumes this field is meaningful in packets it receives, + * and assumes the header's payload length includes that padding. + * The RMNet driver does *not* pad packets it sends, however, so + * the pad field (although 0) should be ignored. + */ + if (endpoint->data->qmap && !endpoint->toward_ipa) { + val |= HDR_TOTAL_LEN_OR_PAD_VALID_FMASK; + /* HDR_TOTAL_LEN_OR_PAD is 0 (pad, not total_len) */ + val |= HDR_PAYLOAD_LEN_INC_PADDING_FMASK; + /* HDR_TOTAL_LEN_OR_PAD_OFFSET is 0 */ + } + /* HDR_PAYLOAD_LEN_INC_PADDING is 0 */ - /* HDR_TOTAL_LEN_OR_PAD_OFFSET is 0 */ if (!endpoint->toward_ipa) val |= u32_encode_bits(pad_align, HDR_PAD_TO_ALIGNMENT_FMASK); iowrite32(val, endpoint->ipa->reg_virt + offset); } -/** - * Generate a metadata mask value that will select only the mux_id - * field in an rmnet_map header structure. The mux_id is at offset - * 1 byte from the beginning of the structure, but the metadata - * value is treated as a 4-byte unit. So this mask must be computed - * with endianness in mind. Note that ipa_endpoint_init_hdr_metadata_mask() - * will convert this value to the proper byte order. - * - * Marked __always_inline because this is really computing a - * constant value. - */ -static __always_inline __be32 ipa_rmnet_mux_id_metadata_mask(void) -{ - size_t mux_id_offset = offsetof(struct rmnet_map_header, mux_id); - u32 mux_id_mask = 0; - u8 *bytes; - - bytes = (u8 *)&mux_id_mask; - bytes[mux_id_offset] = 0xff; /* mux_id is 1 byte */ - - return cpu_to_be32(mux_id_mask); -} static void ipa_endpoint_init_hdr_metadata_mask(struct ipa_endpoint *endpoint) { @@ -513,8 +529,9 @@ static void ipa_endpoint_init_hdr_metadata_mask(struct ipa_endpoint *endpoint) offset = IPA_REG_ENDP_INIT_HDR_METADATA_MASK_N_OFFSET(endpoint_id); + /* Note that HDR_ENDIANNESS indicates big endian header fields */ if (!endpoint->toward_ipa && endpoint->data->qmap) - val = ipa_rmnet_mux_id_metadata_mask(); + val = cpu_to_be32(IPA_ENDPOINT_QMAP_METADATA_MASK); iowrite32(val, endpoint->ipa->reg_virt + offset); } @@ -693,10 +710,12 @@ static void ipa_endpoint_init_seq(struct ipa_endpoint *endpoint) u32 seq_type = endpoint->seq_type; u32 val = 0; + /* Sequencer type is made up of four nibbles */ val |= u32_encode_bits(seq_type & 0xf, HPS_SEQ_TYPE_FMASK); val |= u32_encode_bits((seq_type >> 4) & 0xf, DPS_SEQ_TYPE_FMASK); - /* HPS_REP_SEQ_TYPE is 0 */ - /* DPS_REP_SEQ_TYPE is 0 */ + /* The second two apply to replicated packets */ + val |= u32_encode_bits((seq_type >> 8) & 0xf, HPS_REP_SEQ_TYPE_FMASK); + val |= u32_encode_bits((seq_type >> 12) & 0xf, DPS_REP_SEQ_TYPE_FMASK); iowrite32(val, endpoint->ipa->reg_virt + offset); } diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index 3b8106aa277a..0a688d8c1d7c 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -455,6 +455,8 @@ enum ipa_mode { * second packet processing pass + no decipher + microcontroller * @IPA_SEQ_DMA_DEC: DMA + cipher/decipher * @IPA_SEQ_DMA_COMP_DECOMP: DMA + compression/decompression + * @IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP: + * packet processing + no decipher + no uCP + HPS REP DMA parser * @IPA_SEQ_INVALID: invalid sequencer type * * The values defined here are broken into 4-bit nibbles that are written diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 20b53e255f68..e56547bfdac9 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3999,6 +3999,8 @@ static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) return 0; } +static struct lock_class_key macsec_netdev_addr_lock_key; + static int macsec_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -4050,6 +4052,9 @@ static int macsec_newlink(struct net *net, struct net_device *dev, return err; netdev_lockdep_set_classes(dev); + lockdep_set_class_and_subclass(&dev->addr_list_lock, + &macsec_netdev_addr_lock_key, + dev->lower_level); err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 563aed5b3d9f..6a6cc9f75307 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -860,6 +860,8 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * "super class" of normal network devices; split their locks off into a * separate class since they always nest. */ +static struct lock_class_key macvlan_netdev_addr_lock_key; + #define ALWAYS_ON_OFFLOADS \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \ NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL) @@ -875,6 +877,14 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) #define MACVLAN_STATE_MASK \ ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) +static void macvlan_set_lockdep_class(struct net_device *dev) +{ + netdev_lockdep_set_classes(dev); + lockdep_set_class_and_subclass(&dev->addr_list_lock, + &macvlan_netdev_addr_lock_key, + dev->lower_level); +} + static int macvlan_init(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); @@ -892,8 +902,7 @@ static int macvlan_init(struct net_device *dev) dev->gso_max_size = lowerdev->gso_max_size; dev->gso_max_segs = lowerdev->gso_max_segs; dev->hard_header_len = lowerdev->hard_header_len; - - netdev_lockdep_set_classes(dev); + macvlan_set_lockdep_class(dev); vlan->pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->pcpu_stats) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 5bb448ae6c9c..e8085ab6d484 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -857,7 +857,6 @@ static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, u32 nhid, struct netlink_ext_ack *extack) { struct nexthop *old_nh = rtnl_dereference(fdb->nh); - struct nh_group *nhg; struct nexthop *nh; int err = -EINVAL; @@ -876,13 +875,12 @@ static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, nh = NULL; goto err_inval; } - if (!nh->is_fdb_nh) { + if (!nexthop_is_fdb(nh)) { NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop"); goto err_inval; } - nhg = rtnl_dereference(nh->nh_grp); - if (!nh->is_group || !nhg->mpath) { + if (!nexthop_is_multipath(nh)) { NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group"); goto err_inval; } @@ -890,14 +888,14 @@ static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, /* check nexthop group family */ switch (vxlan->default_dst.remote_ip.sa.sa_family) { case AF_INET: - if (!nhg->has_v4) { + if (!nexthop_has_v4(nh)) { err = -EAFNOSUPPORT; NL_SET_ERR_MSG(extack, "Nexthop group family not supported"); goto err_inval; } break; case AF_INET6: - if (nhg->has_v4) { + if (nexthop_has_v4(nh)) { err = -EAFNOSUPPORT; NL_SET_ERR_MSG(extack, "Nexthop group family not supported"); goto err_inval; @@ -4245,10 +4243,8 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], mod_timer(&vxlan->age_timer, jiffies); netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev); - if (lowerdev && lowerdev != dst->remote_dev) { + if (lowerdev && lowerdev != dst->remote_dev) dst->remote_dev = lowerdev; - netdev_update_lockdep_key(lowerdev); - } vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true); return 0; } diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c index aadf3dec5bf3..2ab34cf74ecc 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_hw.c +++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c @@ -3048,6 +3048,7 @@ static void prism2_clear_set_tim_queue(local_info_t *local) * This is a natural nesting, which needs a split lock type. */ static struct lock_class_key hostap_netdev_xmit_lock_key; +static struct lock_class_key hostap_netdev_addr_lock_key; static void prism2_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, @@ -3059,6 +3060,8 @@ static void prism2_set_lockdep_class_one(struct net_device *dev, static void prism2_set_lockdep_class(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, + &hostap_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, prism2_set_lockdep_class_one, NULL); } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5b364a2e0006..6fc613ed8eae 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1821,8 +1821,6 @@ enum netdev_priv_flags { * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. * - * @addr_list_lock_key: lockdep class annotating - * net_device->addr_list_lock spinlock * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount * @@ -2125,7 +2123,6 @@ struct net_device { #endif struct phy_device *phydev; struct sfp_bus *sfp_bus; - struct lock_class_key addr_list_lock_key; struct lock_class_key *qdisc_tx_busylock; struct lock_class_key *qdisc_running_key; bool proto_down; @@ -2217,10 +2214,13 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, static struct lock_class_key qdisc_tx_busylock_key; \ static struct lock_class_key qdisc_running_key; \ static struct lock_class_key qdisc_xmit_lock_key; \ + static struct lock_class_key dev_addr_list_lock_key; \ unsigned int i; \ \ (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ (dev)->qdisc_running_key = &qdisc_running_key; \ + lockdep_set_class(&(dev)->addr_list_lock, \ + &dev_addr_list_lock_key); \ for (i = 0; i < (dev)->num_tx_queues; i++) \ lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ &qdisc_xmit_lock_key); \ @@ -3253,7 +3253,6 @@ static inline void netif_stop_queue(struct net_device *dev) } void netif_tx_stop_all_queues(struct net_device *dev); -void netdev_update_lockdep_key(struct net_device *dev); static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { @@ -4239,6 +4238,11 @@ static inline void netif_addr_lock(struct net_device *dev) spin_lock(&dev->addr_list_lock); } +static inline void netif_addr_lock_nested(struct net_device *dev) +{ + spin_lock_nested(&dev->addr_list_lock, dev->lower_level); +} + static inline void netif_addr_lock_bh(struct net_device *dev) { spin_lock_bh(&dev->addr_list_lock); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b58ad1a3f695..fc7e8807838d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5075,7 +5075,8 @@ struct cfg80211_cqm_config; * by cfg80211 on change_interface * @mgmt_registrations: list of registrations for management frames * @mgmt_registrations_lock: lock for the list - * @mgmt_registrations_update_wk: update work to defer from atomic context + * @mgmt_registrations_need_update: mgmt registrations were updated, + * need to propagate the update to the driver * @mtx: mutex used to lock data in this struct, may be used by drivers * and some API functions require it held * @beacon_interval: beacon interval used on this device for transmitting @@ -5121,7 +5122,7 @@ struct wireless_dev { struct list_head mgmt_registrations; spinlock_t mgmt_registrations_lock; - struct work_struct mgmt_registrations_update_wk; + u8 mgmt_registrations_need_update:1; struct mutex mtx; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 69e13c8b6b3a..f2c8311a0433 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -542,28 +542,4 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)); -typedef void flow_indr_block_cmd_t(struct net_device *dev, - flow_indr_block_bind_cb_t *cb, void *cb_priv, - enum flow_block_command command); - -int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, - flow_indr_block_bind_cb_t *cb, - void *cb_ident); - -void __flow_indr_block_cb_unregister(struct net_device *dev, - flow_indr_block_bind_cb_t *cb, - void *cb_ident); - -int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, - flow_indr_block_bind_cb_t *cb, void *cb_ident); - -void flow_indr_block_cb_unregister(struct net_device *dev, - flow_indr_block_bind_cb_t *cb, - void *cb_ident); - -void flow_indr_block_call(struct net_device *dev, - struct flow_block_offload *bo, - enum flow_block_command command, - enum tc_setup_type type); - #endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ad64ba6a057f..92560974ea67 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -185,6 +185,12 @@ static inline spinlock_t *inet_ehash_lockp( int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo); +static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h) +{ + kfree(h->lhash2); + h->lhash2 = NULL; +} + static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) { kvfree(hashinfo->ehash_locks); diff --git a/include/net/nexthop.h b/include/net/nexthop.h index e4b55b43e907..3a4f9e3b91a5 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -76,6 +76,7 @@ struct nh_group { struct nh_group *spare; /* spare group for removals */ u16 num_nh; bool mpath; + bool fdb_nh; bool has_v4; struct nh_grp_entry nh_entries[]; }; @@ -93,7 +94,6 @@ struct nexthop { u8 protocol; /* app managing this nh */ u8 nh_flags; bool is_group; - bool is_fdb_nh; refcount_t refcnt; struct rcu_head rcu; @@ -136,6 +136,32 @@ static inline bool nexthop_cmp(const struct nexthop *nh1, return nh1 == nh2; } +static inline bool nexthop_is_fdb(const struct nexthop *nh) +{ + if (nh->is_group) { + const struct nh_group *nh_grp; + + nh_grp = rcu_dereference_rtnl(nh->nh_grp); + return nh_grp->fdb_nh; + } else { + const struct nh_info *nhi; + + nhi = rcu_dereference_rtnl(nh->nh_info); + return nhi->fdb_nh; + } +} + +static inline bool nexthop_has_v4(const struct nexthop *nh) +{ + if (nh->is_group) { + struct nh_group *nh_grp; + + nh_grp = rcu_dereference_rtnl(nh->nh_grp); + return nh_grp->has_v4; + } + return false; +} + static inline bool nexthop_is_multipath(const struct nexthop *nh) { if (nh->is_group) { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c65b374a5090..19684813faae 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3761,6 +3761,19 @@ struct xdp_md { __u32 egress_ifindex; /* txq->dev->ifindex */ }; +/* DEVMAP map-value layout + * + * The struct data-layout of map-value is a configuration interface. + * New members can only be added to the end of this structure. + */ +struct bpf_devmap_val { + __u32 ifindex; /* device index */ + union { + int fd; /* prog fd on map write */ + __u32 id; /* prog id on map read */ + } bpf_prog; +}; + enum sk_action { SK_DROP = 0, SK_PASS, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index dad8c8f8581f..4e6339ab1fce 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -794,7 +794,7 @@ * various triggers. These triggers can be configured through this * command with the %NL80211_ATTR_WOWLAN_TRIGGERS attribute. For * more background information, see - * http://wireless.kernel.org/en/users/Documentation/WoWLAN. + * https://wireless.wiki.kernel.org/en/users/Documentation/WoWLAN. * The @NL80211_CMD_SET_WOWLAN command can also be used as a notification * from the driver reporting the wakeup reason. In this case, the * @NL80211_ATTR_WOWLAN_TRIGGERS attribute will contain the reason diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index fdf7836750a3..4d76f16524cc 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -378,7 +378,7 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs, } list_for_each_entry(pl, progs, node) { - if (prog && pl->prog == prog) + if (prog && pl->prog == prog && prog != replace_prog) /* disallow attaching the same prog twice */ return ERR_PTR(-EINVAL); if (link && pl->link == link) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 854b09beb16b..0cbb72cdaf63 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -60,15 +60,6 @@ struct xdp_dev_bulk_queue { unsigned int count; }; -/* DEVMAP values */ -struct bpf_devmap_val { - u32 ifindex; /* device index */ - union { - int fd; /* prog fd on map write */ - u32 id; /* prog id on map read */ - } bpf_prog; -}; - struct bpf_dtab_netdev { struct net_device *dev; /* must be first member, due to tracepoint */ struct hlist_node index_hlist; @@ -479,6 +470,7 @@ static struct xdp_buff *dev_map_run_prog(struct net_device *dev, struct xdp_txq_info txq = { .dev = dev }; u32 act; + xdp_set_data_meta_invalid(xdp); xdp->txq = &txq; act = bpf_prog_run_xdp(xdp_prog, xdp); @@ -618,7 +610,7 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, if (!dev->dev) goto err_out; - if (val->bpf_prog.fd >= 0) { + if (val->bpf_prog.fd > 0) { prog = bpf_prog_get_type_dev(val->bpf_prog.fd, BPF_PROG_TYPE_XDP, false); if (IS_ERR(prog)) @@ -652,8 +644,8 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - struct bpf_devmap_val val = { .bpf_prog.fd = -1 }; struct bpf_dtab_netdev *dev, *old_dev; + struct bpf_devmap_val val = {}; u32 i = *(u32 *)key; if (unlikely(map_flags > BPF_EXIST)) @@ -669,7 +661,7 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map, if (!val.ifindex) { dev = NULL; /* can not specify fd if ifindex is 0 */ - if (val.bpf_prog.fd != -1) + if (val.bpf_prog.fd > 0) return -EINVAL; } else { dev = __dev_map_alloc_node(net, dtab, &val, i); @@ -699,8 +691,8 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - struct bpf_devmap_val val = { .bpf_prog.fd = -1 }; struct bpf_dtab_netdev *dev, *old_dev; + struct bpf_devmap_val val = {}; u32 idx = *(u32 *)key; unsigned long flags; int err = -EEXIST; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9693730833d2..8da159936bab 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3145,6 +3145,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) struct bpf_insn *insns; u32 off, type; u64 imm; + u8 code; int i; insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), @@ -3153,21 +3154,27 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) return insns; for (i = 0; i < prog->len; i++) { - if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) { + code = insns[i].code; + + if (code == (BPF_JMP | BPF_TAIL_CALL)) { insns[i].code = BPF_JMP | BPF_CALL; insns[i].imm = BPF_FUNC_tail_call; /* fall-through */ } - if (insns[i].code == (BPF_JMP | BPF_CALL) || - insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) { - if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) + if (code == (BPF_JMP | BPF_CALL) || + code == (BPF_JMP | BPF_CALL_ARGS)) { + if (code == (BPF_JMP | BPF_CALL_ARGS)) insns[i].code = BPF_JMP | BPF_CALL; if (!bpf_dump_raw_ok()) insns[i].imm = 0; continue; } + if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { + insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; + continue; + } - if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW)) + if (code != (BPF_LD | BPF_IMM | BPF_DW)) continue; imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5c7bbaac81ef..34cde841ab68 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7552,7 +7552,7 @@ static int check_btf_func(struct bpf_verifier_env *env, const struct btf *btf; void __user *urecord; u32 prev_offset = 0; - int ret = 0; + int ret = -ENOMEM; nfuncs = attr->func_info_cnt; if (!nfuncs) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ea8d0b094f1b..6048f1be26d2 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1643,7 +1643,7 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, if (perf_type_tracepoint) tk = find_trace_kprobe(pevent, group); else - tk = event->tp_event->data; + tk = trace_kprobe_primary_from_call(event->tp_event); if (!tk) return -EINVAL; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 2a8e8e9c1c75..fdd47f99b18f 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1412,7 +1412,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, if (perf_type_tracepoint) tu = find_probe_event(pevent, group); else - tu = event->tp_event->data; + tu = trace_uprobe_primary_from_call(event->tp_event); if (!tu) return -EINVAL; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index f00bb57f0f60..c8d6a07e23c5 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -494,6 +494,7 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) * separate class since they always nest. */ static struct lock_class_key vlan_netdev_xmit_lock_key; +static struct lock_class_key vlan_netdev_addr_lock_key; static void vlan_dev_set_lockdep_one(struct net_device *dev, struct netdev_queue *txq, @@ -502,8 +503,11 @@ static void vlan_dev_set_lockdep_one(struct net_device *dev, lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key); } -static void vlan_dev_set_lockdep_class(struct net_device *dev) +static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) { + lockdep_set_class_and_subclass(&dev->addr_list_lock, + &vlan_netdev_addr_lock_key, + subclass); netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL); } @@ -597,7 +601,7 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); - vlan_dev_set_lockdep_class(dev); + vlan_dev_set_lockdep_class(dev, dev->lower_level); vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->vlan_pcpu_stats) diff --git a/net/atm/lec.c b/net/atm/lec.c index ca37f5a71f5e..875fc0bc1780 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1536,10 +1536,8 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv, struct lec_arp_table *to_return; to_return = kzalloc(sizeof(struct lec_arp_table), GFP_ATOMIC); - if (!to_return) { - pr_info("LEC: Arp entry kmalloc failed\n"); + if (!to_return) return NULL; - } ether_addr_copy(to_return->mac_addr, mac_addr); INIT_HLIST_NODE(&to_return->next); timer_setup(&to_return->timer, lec_arp_expire_arp, 0); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 0ddd80130ea3..f1f1c86f3419 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -745,6 +745,7 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, * separate class since they always nest. */ static struct lock_class_key batadv_netdev_xmit_lock_key; +static struct lock_class_key batadv_netdev_addr_lock_key; /** * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue @@ -765,6 +766,7 @@ static void batadv_set_lockdep_class_one(struct net_device *dev, */ static void batadv_set_lockdep_class(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 8ec1362588af..8c7b78f8bc23 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -105,6 +105,13 @@ out: return NETDEV_TX_OK; } +static struct lock_class_key bridge_netdev_addr_lock_key; + +static void br_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key); +} + static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -143,6 +150,7 @@ static int br_dev_init(struct net_device *dev) br_fdb_hash_fini(br); } + br_set_lockdep_class(dev); return err; } diff --git a/net/core/dev.c b/net/core/dev.c index 061496a1f640..6bc2388141f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -439,6 +439,7 @@ static const char *const netdev_lock_name[] = { "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; +static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; static inline unsigned short netdev_lock_pos(unsigned short dev_type) { @@ -460,11 +461,25 @@ static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], netdev_lock_name[i]); } + +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) +{ + int i; + + i = netdev_lock_pos(dev->type); + lockdep_set_class_and_name(&dev->addr_list_lock, + &netdev_addr_lock_key[i], + netdev_lock_name[i]); +} #else static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, unsigned short dev_type) { } + +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) +{ +} #endif /******************************************************************************* @@ -9373,15 +9388,6 @@ void netif_tx_stop_all_queues(struct net_device *dev) } EXPORT_SYMBOL(netif_tx_stop_all_queues); -void netdev_update_lockdep_key(struct net_device *dev) -{ - lockdep_unregister_key(&dev->addr_list_lock_key); - lockdep_register_key(&dev->addr_list_lock_key); - - lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); -} -EXPORT_SYMBOL(netdev_update_lockdep_key); - /** * register_netdevice - register a network device * @dev: device to register @@ -9420,7 +9426,7 @@ int register_netdevice(struct net_device *dev) return ret; spin_lock_init(&dev->addr_list_lock); - lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); + netdev_set_addr_lockdep_class(dev); ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) @@ -9939,8 +9945,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); - lockdep_register_key(&dev->addr_list_lock_key); - dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; dev->upper_level = 1; @@ -10028,8 +10032,6 @@ void free_netdev(struct net_device *dev) free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL; - lockdep_unregister_key(&dev->addr_list_lock_key); - /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { netdev_freemem(dev); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 2f949b5a1eb9..6393ba930097 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock(to); + netif_addr_lock_nested(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock(to); + netif_addr_lock_nested(to); err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -691,7 +691,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) return; netif_addr_lock_bh(from); - netif_addr_lock(to); + netif_addr_lock_nested(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); @@ -858,7 +858,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock(to); + netif_addr_lock_nested(to); err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -888,7 +888,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock(to); + netif_addr_lock_nested(to); err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -912,7 +912,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) return; netif_addr_lock_bh(from); - netif_addr_lock(to); + netif_addr_lock_nested(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); diff --git a/net/core/filter.c b/net/core/filter.c index 209482a4eaa2..73395384afe2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1755,25 +1755,27 @@ BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, u32, offset, void *, to, u32, len, u32, start_header) { u8 *end = skb_tail_pointer(skb); - u8 *net = skb_network_header(skb); - u8 *mac = skb_mac_header(skb); - u8 *ptr; + u8 *start, *ptr; - if (unlikely(offset > 0xffff || len > (end - mac))) + if (unlikely(offset > 0xffff)) goto err_clear; switch (start_header) { case BPF_HDR_START_MAC: - ptr = mac + offset; + if (unlikely(!skb_mac_header_was_set(skb))) + goto err_clear; + start = skb_mac_header(skb); break; case BPF_HDR_START_NET: - ptr = net + offset; + start = skb_network_header(skb); break; default: goto err_clear; } - if (likely(ptr >= mac && ptr + len <= end)) { + ptr = start + offset; + + if (likely(ptr + len <= end)) { memcpy(to, ptr, len); return 0; } @@ -4340,8 +4342,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, } break; case SO_BINDTODEVICE: - ret = -ENOPROTOOPT; -#ifdef CONFIG_NETDEVICES optlen = min_t(long, optlen, IFNAMSIZ - 1); strncpy(devname, optval, optlen); devname[optlen] = 0; @@ -4360,7 +4360,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, dev_put(dev); } ret = sock_bindtoindex(sk, ifindex, false); -#endif break; default: ret = -EINVAL; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2269199c5891..9aedc15736ad 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2462,7 +2462,6 @@ static int do_set_master(struct net_device *dev, int ifindex, err = ops->ndo_del_slave(upper_dev, dev); if (err) return err; - netdev_update_lockdep_key(dev); } else { return -EOPNOTSUPP; } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 00a26cf2cfe9..4059f94e9bb5 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -424,10 +424,7 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next) return 0; } -static bool sock_map_redirect_allowed(const struct sock *sk) -{ - return sk->sk_state != TCP_LISTEN; -} +static bool sock_map_redirect_allowed(const struct sock *sk); static int sock_map_update_common(struct bpf_map *map, u32 idx, struct sock *sk, u64 flags) @@ -508,6 +505,11 @@ static bool sk_is_udp(const struct sock *sk) sk->sk_protocol == IPPROTO_UDP; } +static bool sock_map_redirect_allowed(const struct sock *sk) +{ + return sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN; +} + static bool sock_map_sk_is_suitable(const struct sock *sk) { return sk_is_tcp(sk) || sk_is_udp(sk); @@ -989,11 +991,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) err = -EINVAL; goto free_htab; } + err = bpf_map_charge_init(&htab->map.memory, cost); + if (err) + goto free_htab; htab->buckets = bpf_map_area_alloc(htab->buckets_num * sizeof(struct bpf_htab_bucket), htab->map.numa_node); if (!htab->buckets) { + bpf_map_charge_finish(&htab->map.memory); err = -ENOMEM; goto free_htab; } @@ -1013,6 +1019,7 @@ static void sock_hash_free(struct bpf_map *map) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct bpf_htab_bucket *bucket; + struct hlist_head unlink_list; struct bpf_htab_elem *elem; struct hlist_node *node; int i; @@ -1024,13 +1031,32 @@ static void sock_hash_free(struct bpf_map *map) synchronize_rcu(); for (i = 0; i < htab->buckets_num; i++) { bucket = sock_hash_select_bucket(htab, i); - hlist_for_each_entry_safe(elem, node, &bucket->head, node) { - hlist_del_rcu(&elem->node); + + /* We are racing with sock_hash_delete_from_link to + * enter the spin-lock critical section. Every socket on + * the list is still linked to sockhash. Since link + * exists, psock exists and holds a ref to socket. That + * lets us to grab a socket ref too. + */ + raw_spin_lock_bh(&bucket->lock); + hlist_for_each_entry(elem, &bucket->head, node) + sock_hold(elem->sk); + hlist_move_list(&bucket->head, &unlink_list); + raw_spin_unlock_bh(&bucket->lock); + + /* Process removed entries out of atomic context to + * block for socket lock before deleting the psock's + * link to sockhash. + */ + hlist_for_each_entry_safe(elem, node, &unlink_list, node) { + hlist_del(&elem->node); lock_sock(elem->sk); rcu_read_lock(); sock_map_unref(elem->sk, elem); rcu_read_unlock(); release_sock(elem->sk); + sock_put(elem->sk); + sock_hash_free_elem(htab, elem); } } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 4af8a98fe784..c13b6609474b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1139,14 +1139,14 @@ static int __init dccp_init(void) inet_hashinfo_init(&dccp_hashinfo); rc = inet_hashinfo2_init_mod(&dccp_hashinfo); if (rc) - goto out_fail; + goto out_free_percpu; rc = -ENOBUFS; dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL); if (!dccp_hashinfo.bind_bucket_cachep) - goto out_free_percpu; + goto out_free_hashinfo2; /* * Size and allocate the main established and bind bucket @@ -1242,6 +1242,8 @@ out_free_dccp_ehash: free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); +out_free_hashinfo2: + inet_hashinfo2_free_mod(&dccp_hashinfo); out_free_percpu: percpu_counter_destroy(&dccp_orphan_count); out_fail: @@ -1265,6 +1267,7 @@ static void __exit dccp_fini(void) kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_ackvec_exit(); dccp_sysctl_exit(); + inet_hashinfo2_free_mod(&dccp_hashinfo); percpu_counter_destroy(&dccp_orphan_count); } diff --git a/net/dsa/master.c b/net/dsa/master.c index a621367c6e8c..480a61460c23 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -327,6 +327,8 @@ static void dsa_master_reset_mtu(struct net_device *dev) rtnl_unlock(); } +static struct lock_class_key dsa_master_addr_list_lock_key; + int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { int ret; @@ -345,6 +347,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) wmb(); dev->dsa_ptr = cpu_dp; + lockdep_set_class(&dev->addr_list_lock, + &dsa_master_addr_list_lock_key); ret = dsa_master_ethtool_setup(dev); if (ret) return ret; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 400a9f89ebdb..cc8049b100b2 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -247,12 +247,11 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_u32(skb, NHA_ID, nh->id)) goto nla_put_failure; - if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB)) - goto nla_put_failure; - if (nh->is_group) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB)) + goto nla_put_failure; if (nla_put_nh_group(skb, nhg)) goto nla_put_failure; goto out; @@ -264,7 +263,10 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_flag(skb, NHA_BLACKHOLE)) goto nla_put_failure; goto out; - } else if (!nh->is_fdb_nh) { + } else if (nhi->fdb_nh) { + if (nla_put_flag(skb, NHA_FDB)) + goto nla_put_failure; + } else { const struct net_device *dev; dev = nhi->fib_nhc.nhc_dev; @@ -385,7 +387,7 @@ errout: } static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, - struct netlink_ext_ack *extack) + bool *is_fdb, struct netlink_ext_ack *extack) { if (nh->is_group) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); @@ -398,6 +400,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, "Multipath group can not be a nexthop within a group"); return false; } + *is_fdb = nhg->fdb_nh; } else { struct nh_info *nhi = rtnl_dereference(nh->nh_info); @@ -406,6 +409,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, "Blackhole nexthop can not be used in a group with more than 1 path"); return false; } + *is_fdb = nhi->fdb_nh; } return true; @@ -416,12 +420,13 @@ static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family, { struct nh_info *nhi; - if (!nh->is_fdb_nh) { + nhi = rtnl_dereference(nh->nh_info); + + if (!nhi->fdb_nh) { NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops"); return -EINVAL; } - nhi = rtnl_dereference(nh->nh_info); if (*nh_family == AF_UNSPEC) { *nh_family = nhi->family; } else if (*nh_family != nhi->family) { @@ -473,19 +478,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], nhg = nla_data(tb[NHA_GROUP]); for (i = 0; i < len; ++i) { struct nexthop *nh; + bool is_fdb_nh; nh = nexthop_find_by_id(net, nhg[i].id); if (!nh) { NL_SET_ERR_MSG(extack, "Invalid nexthop id"); return -EINVAL; } - if (!valid_group_nh(nh, len, extack)) + if (!valid_group_nh(nh, len, &is_fdb_nh, extack)) return -EINVAL; if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack)) return -EINVAL; - if (!nhg_fdb && nh->is_fdb_nh) { + if (!nhg_fdb && is_fdb_nh) { NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops"); return -EINVAL; } @@ -553,13 +559,13 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) if (hash > atomic_read(&nhge->upper_bound)) continue; - if (nhge->nh->is_fdb_nh) + nhi = rcu_dereference(nhge->nh->nh_info); + if (nhi->fdb_nh) return nhge->nh; /* nexthops always check if it is good and does * not rely on a sysctl for this behavior */ - nhi = rcu_dereference(nhge->nh->nh_info); switch (nhi->family) { case AF_INET: if (ipv4_good_nh(&nhi->fib_nh)) @@ -624,11 +630,7 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, struct netlink_ext_ack *extack) { struct nh_info *nhi; - - if (nh->is_fdb_nh) { - NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); - return -EINVAL; - } + bool is_fdb_nh; /* fib6_src is unique to a fib6_info and limits the ability to cache * routes in fib6_nh within a nexthop that is potentially shared @@ -645,10 +647,17 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, nhg = rtnl_dereference(nh->nh_grp); if (nhg->has_v4) goto no_v4_nh; + is_fdb_nh = nhg->fdb_nh; } else { nhi = rtnl_dereference(nh->nh_info); if (nhi->family == AF_INET) goto no_v4_nh; + is_fdb_nh = nhi->fdb_nh; + } + + if (is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + return -EINVAL; } return 0; @@ -677,12 +686,9 @@ static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new, return fib6_check_nexthop(new, NULL, extack); } -static int nexthop_check_scope(struct nexthop *nh, u8 scope, +static int nexthop_check_scope(struct nh_info *nhi, u8 scope, struct netlink_ext_ack *extack) { - struct nh_info *nhi; - - nhi = rtnl_dereference(nh->nh_info); if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) { NL_SET_ERR_MSG(extack, "Route with host scope can not have a gateway"); @@ -704,29 +710,38 @@ static int nexthop_check_scope(struct nexthop *nh, u8 scope, int fib_check_nexthop(struct nexthop *nh, u8 scope, struct netlink_ext_ack *extack) { + struct nh_info *nhi; int err = 0; - if (nh->is_fdb_nh) { - NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); - err = -EINVAL; - goto out; - } - if (nh->is_group) { struct nh_group *nhg; + nhg = rtnl_dereference(nh->nh_grp); + if (nhg->fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + err = -EINVAL; + goto out; + } + if (scope == RT_SCOPE_HOST) { NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops"); err = -EINVAL; goto out; } - nhg = rtnl_dereference(nh->nh_grp); /* all nexthops in a group have the same scope */ - err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack); + nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info); + err = nexthop_check_scope(nhi, scope, extack); } else { - err = nexthop_check_scope(nh, scope, extack); + nhi = rtnl_dereference(nh->nh_info); + if (nhi->fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + err = -EINVAL; + goto out; + } + err = nexthop_check_scope(nhi, scope, extack); } + out: return err; } @@ -787,6 +802,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, newg->has_v4 = nhg->has_v4; newg->mpath = nhg->mpath; + newg->fdb_nh = nhg->fdb_nh; newg->num_nh = nhg->num_nh; /* copy old entries to new except the one getting removed */ @@ -1216,7 +1232,7 @@ static struct nexthop *nexthop_create_group(struct net *net, } if (cfg->nh_fdb) - nh->is_fdb_nh = 1; + nhg->fdb_nh = 1; rcu_assign_pointer(nh->nh_grp, nhg); @@ -1255,7 +1271,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, goto out; } - if (nh->is_fdb_nh) + if (nhi->fdb_nh) goto out; /* sets nh_dev if successful */ @@ -1326,7 +1342,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; if (cfg->nh_fdb) - nh->is_fdb_nh = 1; + nhi->fdb_nh = 1; if (cfg->nh_blackhole) { nhi->reject_nh = 1; @@ -1349,7 +1365,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, } /* add the entry to the device based hash */ - if (!nh->is_fdb_nh) + if (!nhi->fdb_nh) nexthop_devhash_add(net, nhi); rcu_assign_pointer(nh->nh_info, nhi); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 27716e4932bc..810cc164f795 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1742,14 +1742,48 @@ int tcp_mmap(struct file *file, struct socket *sock, } EXPORT_SYMBOL(tcp_mmap); +static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, + struct page **pages, + unsigned long pages_to_map, + unsigned long *insert_addr, + u32 *length_with_pending, + u32 *seq, + struct tcp_zerocopy_receive *zc) +{ + unsigned long pages_remaining = pages_to_map; + int bytes_mapped; + int ret; + + ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining); + bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining); + /* Even if vm_insert_pages fails, it may have partially succeeded in + * mapping (some but not all of the pages). + */ + *seq += bytes_mapped; + *insert_addr += bytes_mapped; + if (ret) { + /* But if vm_insert_pages did fail, we have to unroll some state + * we speculatively touched before. + */ + const int bytes_not_mapped = PAGE_SIZE * pages_remaining; + *length_with_pending -= bytes_not_mapped; + zc->recv_skip_hint += bytes_not_mapped; + } + return ret; +} + static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc) { unsigned long address = (unsigned long)zc->address; u32 length = 0, seq, offset, zap_len; + #define PAGE_BATCH_SIZE 8 + struct page *pages[PAGE_BATCH_SIZE]; const skb_frag_t *frags = NULL; struct vm_area_struct *vma; struct sk_buff *skb = NULL; + unsigned long pg_idx = 0; + unsigned long curr_addr; struct tcp_sock *tp; int inq; int ret; @@ -1762,6 +1796,8 @@ static int tcp_zerocopy_receive(struct sock *sk, sock_rps_record_flow(sk); + tp = tcp_sk(sk); + mmap_read_lock(current->mm); vma = find_vma(current->mm, address); @@ -1771,7 +1807,6 @@ static int tcp_zerocopy_receive(struct sock *sk, } zc->length = min_t(unsigned long, zc->length, vma->vm_end - address); - tp = tcp_sk(sk); seq = tp->copied_seq; inq = tcp_inq(sk); zc->length = min_t(u32, zc->length, inq); @@ -1783,8 +1818,20 @@ static int tcp_zerocopy_receive(struct sock *sk, zc->recv_skip_hint = zc->length; } ret = 0; + curr_addr = address; while (length + PAGE_SIZE <= zc->length) { if (zc->recv_skip_hint < PAGE_SIZE) { + /* If we're here, finish the current batch. */ + if (pg_idx) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, + pg_idx, + &curr_addr, + &length, + &seq, zc); + if (ret) + goto out; + pg_idx = 0; + } if (skb) { if (zc->recv_skip_hint > 0) break; @@ -1793,7 +1840,6 @@ static int tcp_zerocopy_receive(struct sock *sk, } else { skb = tcp_recv_skb(sk, seq, &offset); } - zc->recv_skip_hint = skb->len - offset; offset -= skb_headlen(skb); if ((int)offset < 0 || skb_has_frag_list(skb)) @@ -1817,14 +1863,24 @@ static int tcp_zerocopy_receive(struct sock *sk, zc->recv_skip_hint -= remaining; break; } - ret = vm_insert_page(vma, address + length, - skb_frag_page(frags)); - if (ret) - break; + pages[pg_idx] = skb_frag_page(frags); + pg_idx++; length += PAGE_SIZE; - seq += PAGE_SIZE; zc->recv_skip_hint -= PAGE_SIZE; frags++; + if (pg_idx == PAGE_BATCH_SIZE) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, + &curr_addr, &length, + &seq, zc); + if (ret) + goto out; + pg_idx = 0; + } + } + if (pg_idx) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, + &curr_addr, &length, &seq, + zc); } out: mmap_read_unlock(current->mm); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 629aaa9a1eb9..7aa68f4aae6c 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -64,6 +64,9 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, } while (i != msg_rx->sg.end); if (unlikely(peek)) { + if (msg_rx == list_last_entry(&psock->ingress_msg, + struct sk_msg, list)) + break; msg_rx = list_next_entry(msg_rx, list); continue; } @@ -242,6 +245,9 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, DEFINE_WAIT_FUNC(wait, woken_wake_function); int ret = 0; + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 1; + if (!timeo) return ret; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5820ef02a587..b2a9d47cf86d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -167,6 +167,8 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT | IEEE80211_STA_DISABLE_HE; + else + ret = 0; vht_chandef = *chandef; goto out; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 21854a61a2b7..a88ab6fb16f2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4694,7 +4694,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, * rate_idx is MCS index, which can be [0-76] * as documented on: * - * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n + * https://wireless.wiki.kernel.org/en/developers/Documentation/ieee80211/802.11n * * Anything else would be some sort of driver or * hardware error. The driver should catch hardware diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 01f1f4cf4902..490b92534afc 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -273,6 +273,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE) break; + ptr++; + mp_opt->rm_addr = 1; mp_opt->rm_id = *ptr++; pr_debug("RM_ADDR: id=%d", mp_opt->rm_id); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 14b253d10ccf..3980fbb6f31e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -374,6 +374,27 @@ void mptcp_subflow_eof(struct sock *sk) sock_hold(sk); } +static void mptcp_check_for_eof(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + int receivers = 0; + + mptcp_for_each_subflow(msk, subflow) + receivers += !subflow->rx_eof; + + if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) { + /* hopefully temporary hack: propagate shutdown status + * to msk, when all subflows agree on it + */ + sk->sk_shutdown |= RCV_SHUTDOWN; + + smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ + set_bit(MPTCP_DATA_READY, &msk->flags); + sk->sk_data_ready(sk); + } +} + static void mptcp_stop_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -1011,6 +1032,9 @@ fallback: break; } + if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) + mptcp_check_for_eof(msk); + if (sk->sk_shutdown & RCV_SHUTDOWN) break; @@ -1148,27 +1172,6 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) return 0; } -static void mptcp_check_for_eof(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - int receivers = 0; - - mptcp_for_each_subflow(msk, subflow) - receivers += !subflow->rx_eof; - - if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) { - /* hopefully temporary hack: propagate shutdown status - * to msk, when all subflows agree on it - */ - sk->sk_shutdown |= RCV_SHUTDOWN; - - smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ - set_bit(MPTCP_DATA_READY, &msk->flags); - sk->sk_data_ready(sk); - } -} - static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 493b98a0825c..bf132575040d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -393,6 +393,7 @@ static void mptcp_sock_destruct(struct sock *sk) sock_orphan(sk); } + mptcp_token_destroy(mptcp_sk(sk)->token); inet_sock_destruct(sk); } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 6c19b91bbb86..55ee680e9db1 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -474,8 +474,7 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, struct netlink_ext_ack *extack, const struct genl_ops *ops, int hdrlen, - enum genl_validate_flags no_strict_flag, - bool parallel) + enum genl_validate_flags no_strict_flag) { enum netlink_validation validate = ops->validate & no_strict_flag ? NL_VALIDATE_LIBERAL : @@ -486,7 +485,7 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, if (!family->maxattr) return NULL; - if (parallel) { + if (family->parallel_ops) { attrbuf = kmalloc_array(family->maxattr + 1, sizeof(struct nlattr *), GFP_KERNEL); if (!attrbuf) @@ -498,7 +497,7 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, family->policy, validate, extack); if (err) { - if (parallel) + if (family->parallel_ops) kfree(attrbuf); return ERR_PTR(err); } @@ -506,10 +505,9 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, } static void genl_family_rcv_msg_attrs_free(const struct genl_family *family, - struct nlattr **attrbuf, - bool parallel) + struct nlattr **attrbuf) { - if (parallel) + if (family->parallel_ops) kfree(attrbuf); } @@ -537,15 +535,14 @@ static int genl_start(struct netlink_callback *cb) attrs = genl_family_rcv_msg_attrs_parse(ctx->family, ctx->nlh, ctx->extack, ops, ctx->hdrlen, - GENL_DONT_VALIDATE_DUMP_STRICT, - true); + GENL_DONT_VALIDATE_DUMP_STRICT); if (IS_ERR(attrs)) return PTR_ERR(attrs); no_attrs: info = genl_dumpit_info_alloc(); if (!info) { - kfree(attrs); + genl_family_rcv_msg_attrs_free(ctx->family, attrs); return -ENOMEM; } info->family = ctx->family; @@ -562,7 +559,7 @@ no_attrs: } if (rc) { - kfree(attrs); + genl_family_rcv_msg_attrs_free(info->family, info->attrs); genl_dumpit_info_free(info); cb->data = NULL; } @@ -591,7 +588,7 @@ static int genl_lock_done(struct netlink_callback *cb) rc = ops->done(cb); genl_unlock(); } - genl_family_rcv_msg_attrs_free(info->family, info->attrs, false); + genl_family_rcv_msg_attrs_free(info->family, info->attrs); genl_dumpit_info_free(info); return rc; } @@ -604,7 +601,7 @@ static int genl_parallel_done(struct netlink_callback *cb) if (ops->done) rc = ops->done(cb); - genl_family_rcv_msg_attrs_free(info->family, info->attrs, true); + genl_family_rcv_msg_attrs_free(info->family, info->attrs); genl_dumpit_info_free(info); return rc; } @@ -671,8 +668,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family, attrbuf = genl_family_rcv_msg_attrs_parse(family, nlh, extack, ops, hdrlen, - GENL_DONT_VALIDATE_STRICT, - family->parallel_ops); + GENL_DONT_VALIDATE_STRICT); if (IS_ERR(attrbuf)) return PTR_ERR(attrbuf); @@ -698,7 +694,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family, family->post_doit(ops, skb, &info); out: - genl_family_rcv_msg_attrs_free(family, attrbuf, family->parallel_ops); + genl_family_rcv_msg_attrs_free(family, attrbuf); return err; } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index eccc7d366e17..f90ef6934b8f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -70,6 +70,7 @@ static const struct proto_ops nr_proto_ops; * separate class since they always nest. */ static struct lock_class_key nr_netdev_xmit_lock_key; +static struct lock_class_key nr_netdev_addr_lock_key; static void nr_set_lockdep_one(struct net_device *dev, struct netdev_queue *txq, @@ -80,6 +81,7 @@ static void nr_set_lockdep_one(struct net_device *dev, static void nr_set_lockdep_key(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL); } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index e7a872207b46..ce85656ac9c1 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -71,6 +71,7 @@ ax25_address rose_callsign; * separate class since they always nest. */ static struct lock_class_key rose_netdev_xmit_lock_key; +static struct lock_class_key rose_netdev_addr_lock_key; static void rose_set_lockdep_one(struct net_device *dev, struct netdev_queue *txq, @@ -81,6 +82,7 @@ static void rose_set_lockdep_one(struct net_device *dev, static void rose_set_lockdep_key(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9fe264bec70c..9a2139ebd67d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -810,100 +810,6 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) } /* - * Transition a call to the complete state. - */ -static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - if (call->state < RXRPC_CALL_COMPLETE) { - call->abort_code = abort_code; - call->error = error; - call->completion = compl, - call->state = RXRPC_CALL_COMPLETE; - trace_rxrpc_call_complete(call); - wake_up(&call->waitq); - return true; - } - return false; -} - -static inline bool rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - bool ret; - - write_lock_bh(&call->state_lock); - ret = __rxrpc_set_call_completion(call, compl, abort_code, error); - write_unlock_bh(&call->state_lock); - return ret; -} - -/* - * Record that a call successfully completed. - */ -static inline bool __rxrpc_call_completed(struct rxrpc_call *call) -{ - return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); -} - -static inline bool rxrpc_call_completed(struct rxrpc_call *call) -{ - bool ret; - - write_lock_bh(&call->state_lock); - ret = __rxrpc_call_completed(call); - write_unlock_bh(&call->state_lock); - return ret; -} - -/* - * Record that a call is locally aborted. - */ -static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, - u32 abort_code, int error) -{ - trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, - abort_code, error); - return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, - abort_code, error); -} - -static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, u32 abort_code, int error) -{ - bool ret; - - write_lock_bh(&call->state_lock); - ret = __rxrpc_abort_call(why, call, seq, abort_code, error); - write_unlock_bh(&call->state_lock); - return ret; -} - -/* - * Abort a call due to a protocol error. - */ -static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, - struct sk_buff *skb, - const char *eproto_why, - const char *why, - u32 abort_code) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); - return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); -} - -#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ - __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ - (abort_why), (abort_code)) - -/* * conn_client.c */ extern unsigned int rxrpc_max_client_connections; @@ -1101,9 +1007,34 @@ extern const struct seq_operations rxrpc_peer_seq_ops; * recvmsg.c */ void rxrpc_notify_socket(struct rxrpc_call *); +bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); +bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); +bool __rxrpc_call_completed(struct rxrpc_call *); +bool rxrpc_call_completed(struct rxrpc_call *); +bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); +bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* + * Abort a call due to a protocol error. + */ +static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, + struct sk_buff *skb, + const char *eproto_why, + const char *why, + u32 abort_code) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); + return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); +} + +#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ + __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ + (abort_why), (abort_code)) + +/* * rtt.c */ void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 2a65ac41055f..aa1c8eee6557 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -248,7 +248,18 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) if (anno_type != RXRPC_TX_ANNO_RETRANS) continue; + /* We need to reset the retransmission state, but we need to do + * so before we drop the lock as a new ACK/NAK may come in and + * confuse things + */ + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_RESENT; + call->rxtx_annotations[ix] = annotation; + skb = call->rxtx_buffer[ix]; + if (!skb) + continue; + rxrpc_get_skb(skb, rxrpc_skb_got); spin_unlock_bh(&call->lock); @@ -262,24 +273,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) rxrpc_free_skb(skb, rxrpc_skb_freed); spin_lock_bh(&call->lock); - - /* We need to clear the retransmit state, but there are two - * things we need to be aware of: A new ACK/NAK might have been - * received and the packet might have been hard-ACK'd (in which - * case it will no longer be in the buffer). - */ - if (after(seq, call->tx_hard_ack)) { - annotation = call->rxtx_annotations[ix]; - anno_type = annotation & RXRPC_TX_ANNO_MASK; - if (anno_type == RXRPC_TX_ANNO_RETRANS || - anno_type == RXRPC_TX_ANNO_NAK) { - annotation &= ~RXRPC_TX_ANNO_MASK; - annotation |= RXRPC_TX_ANNO_UNACK; - } - annotation |= RXRPC_TX_ANNO_RESENT; - call->rxtx_annotations[ix] = annotation; - } - if (after(call->tx_hard_ack, seq)) seq = call->tx_hard_ack; } @@ -320,7 +313,6 @@ recheck_state: if (call->state == RXRPC_CALL_COMPLETE) { del_timer_sync(&call->timer); - rxrpc_notify_socket(call); goto out_put; } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 06fcff2ebbba..447f55ca6886 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -173,10 +173,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, else trace_rxrpc_rx_abort(call, serial, conn->abort_code); - if (rxrpc_set_call_completion(call, compl, - conn->abort_code, - conn->error)) - rxrpc_notify_socket(call); + rxrpc_set_call_completion(call, compl, + conn->abort_code, + conn->error); } } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 3be4177baf70..299ac98e9754 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -275,7 +275,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, case RXRPC_CALL_SERVER_AWAIT_ACK: __rxrpc_call_completed(call); - rxrpc_notify_socket(call); state = call->state; break; @@ -1013,9 +1012,8 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); - if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, -ECONNABORTED)) - rxrpc_notify_socket(call); + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, -ECONNABORTED); } /* @@ -1102,7 +1100,6 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, spin_lock(&rx->incoming_lock); __rxrpc_disconnect_call(conn, call); spin_unlock(&rx->incoming_lock); - rxrpc_notify_socket(call); } /* diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 112e490ebbcd..a852f46d5234 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -292,9 +292,7 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) { rxrpc_see_call(call); - if (call->state < RXRPC_CALL_COMPLETE && - rxrpc_set_call_completion(call, compl, 0, -error)) - rxrpc_notify_socket(call); + rxrpc_set_call_completion(call, compl, 0, -error); } } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8578c39ec839..2989742a4aa1 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -59,6 +59,85 @@ void rxrpc_notify_socket(struct rxrpc_call *call) } /* + * Transition a call to the complete state. + */ +bool __rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + if (call->state < RXRPC_CALL_COMPLETE) { + call->abort_code = abort_code; + call->error = error; + call->completion = compl, + call->state = RXRPC_CALL_COMPLETE; + trace_rxrpc_call_complete(call); + wake_up(&call->waitq); + rxrpc_notify_socket(call); + return true; + } + return false; +} + +bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + bool ret = false; + + if (call->state < RXRPC_CALL_COMPLETE) { + write_lock_bh(&call->state_lock); + ret = __rxrpc_set_call_completion(call, compl, abort_code, error); + write_unlock_bh(&call->state_lock); + } + return ret; +} + +/* + * Record that a call successfully completed. + */ +bool __rxrpc_call_completed(struct rxrpc_call *call) +{ + return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); +} + +bool rxrpc_call_completed(struct rxrpc_call *call) +{ + bool ret = false; + + if (call->state < RXRPC_CALL_COMPLETE) { + write_lock_bh(&call->state_lock); + ret = __rxrpc_call_completed(call); + write_unlock_bh(&call->state_lock); + } + return ret; +} + +/* + * Record that a call is locally aborted. + */ +bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) +{ + trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, + abort_code, error); + return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error); +} + +bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) +{ + bool ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_abort_call(why, call, seq, abort_code, error); + write_unlock_bh(&call->state_lock); + return ret; +} + +/* * Pass a call terminating message to userspace. */ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 5e9c43d4a314..1304b8608f56 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -261,10 +261,8 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, case -ENETUNREACH: case -EHOSTUNREACH: case -ECONNREFUSED: - rxrpc_set_call_completion(call, - RXRPC_CALL_LOCAL_ERROR, + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); - rxrpc_notify_socket(call); goto out; } _debug("need instant resend %d", ret); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b19a0021a0bd..265a61d011df 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -464,6 +464,7 @@ void __netdev_watchdog_up(struct net_device *dev) dev_hold(dev); } } +EXPORT_SYMBOL_GPL(__netdev_watchdog_up); static void dev_watchdog_up(struct net_device *dev) { diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 34ca7b789eba..e366ec9a7e4d 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -316,7 +316,6 @@ static int tipc_enable_bearer(struct net *net, const char *name, b->domain = disc_domain; b->net_plane = bearer_id + 'A'; b->priority = prio; - test_and_set_bit_lock(0, &b->up); refcount_set(&b->refcnt, 1); res = tipc_disc_create(net, b, &b->bcast_addr, &skb); @@ -326,6 +325,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, goto rejected; } + test_and_set_bit_lock(0, &b->up); rcu_assign_pointer(tn->bearer_list[bearer_id], b); if (skb) tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 046e4cb3acea..01b64869a173 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -238,14 +238,14 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen, hdr = buf_msg(skb); curr = msg_blocks(hdr); mlen = msg_size(hdr); - cpy = min_t(int, rem, mss - mlen); + cpy = min_t(size_t, rem, mss - mlen); if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter)) return -EFAULT; msg_set_size(hdr, mlen + cpy); skb_put(skb, cpy); rem -= cpy; total += msg_blocks(hdr) - curr; - } while (rem); + } while (rem > 0); return total - accounted; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 26123f4177fd..a94f38333698 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1574,7 +1574,8 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) break; send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE); blocks = tsk->snd_backlog; - if (tsk->oneway++ >= tsk->nagle_start && send <= maxnagle) { + if (tsk->oneway++ >= tsk->nagle_start && maxnagle && + send <= maxnagle) { rc = tipc_msg_append(hdr, m, send, maxnagle, txq); if (unlikely(rc < 0)) break; diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index e9a712fb2392..faf74850a1b5 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -31,7 +31,7 @@ config CFG80211 For more information refer to documentation on the wireless wiki: - http://wireless.kernel.org/en/developers/Documentation/cfg80211 + https://wireless.wiki.kernel.org/en/developers/Documentation/cfg80211 When built as a module it will be called cfg80211. diff --git a/net/wireless/core.c b/net/wireless/core.c index f0226ae9561c..c623d9bf5096 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -497,6 +497,8 @@ use_default_name: INIT_WORK(&rdev->propagate_radar_detect_wk, cfg80211_propagate_radar_detect_wk); INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk); + INIT_WORK(&rdev->mgmt_registrations_update_wk, + cfg80211_mgmt_registrations_update_wk); #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -1047,6 +1049,7 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->sched_scan_stop_wk); flush_work(&rdev->propagate_radar_detect_wk); flush_work(&rdev->propagate_cac_done_wk); + flush_work(&rdev->mgmt_registrations_update_wk); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -1108,7 +1111,6 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync) rdev->devlist_generation++; cfg80211_mlme_purge_registrations(wdev); - flush_work(&wdev->mgmt_registrations_update_wk); switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: @@ -1253,8 +1255,6 @@ void cfg80211_init_wdev(struct cfg80211_registered_device *rdev, spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); - INIT_WORK(&wdev->mgmt_registrations_update_wk, - cfg80211_mgmt_registrations_update_wk); INIT_LIST_HEAD(&wdev->pmsr_list); spin_lock_init(&wdev->pmsr_lock); INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk); diff --git a/net/wireless/core.h b/net/wireless/core.h index e0e5b3ee9699..67b0389fca4d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -99,6 +99,8 @@ struct cfg80211_registered_device { struct cfg80211_chan_def cac_done_chandef; struct work_struct propagate_cac_done_wk; + struct work_struct mgmt_registrations_update_wk; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 189334314cba..a6c61a2e6569 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -440,9 +440,15 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) ASSERT_RTNL(); + spin_lock_bh(&wdev->mgmt_registrations_lock); + if (!wdev->mgmt_registrations_need_update) { + spin_unlock_bh(&wdev->mgmt_registrations_lock); + return; + } + rcu_read_lock(); list_for_each_entry_rcu(tmp, &rdev->wiphy.wdev_list, list) { - list_for_each_entry_rcu(reg, &tmp->mgmt_registrations, list) { + list_for_each_entry(reg, &tmp->mgmt_registrations, list) { u32 mask = BIT(le16_to_cpu(reg->frame_type) >> 4); u32 mcast_mask = 0; @@ -460,16 +466,23 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) } rcu_read_unlock(); + wdev->mgmt_registrations_need_update = 0; + spin_unlock_bh(&wdev->mgmt_registrations_lock); + rdev_update_mgmt_frame_registrations(rdev, wdev, &upd); } void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk) { - struct wireless_dev *wdev = container_of(wk, struct wireless_dev, - mgmt_registrations_update_wk); + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + + rdev = container_of(wk, struct cfg80211_registered_device, + mgmt_registrations_update_wk); rtnl_lock(); - cfg80211_mgmt_registrations_update(wdev); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) + cfg80211_mgmt_registrations_update(wdev); rtnl_unlock(); } @@ -557,6 +570,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, nreg->multicast_rx = multicast_rx; list_add(&nreg->list, &wdev->mgmt_registrations); } + wdev->mgmt_registrations_need_update = 1; spin_unlock_bh(&wdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); @@ -585,7 +599,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) list_del(®->list); kfree(reg); - schedule_work(&wdev->mgmt_registrations_update_wk); + wdev->mgmt_registrations_need_update = 1; + schedule_work(&rdev->mgmt_registrations_update_wk); } spin_unlock_bh(&wdev->mgmt_registrations_lock); @@ -608,6 +623,7 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) list_del(®->list); kfree(reg); } + wdev->mgmt_registrations_need_update = 1; spin_unlock_bh(&wdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index b6c0f08bd80d..3700266229f6 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -352,10 +352,8 @@ static int xsk_generic_xmit(struct sock *sk) len = desc.len; skb = sock_alloc_send_skb(sk, len, 1, &err); - if (unlikely(!skb)) { - err = -EAGAIN; + if (unlikely(!skb)) goto out; - } skb_put(skb, len); addr = desc.addr; diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 57cb14bd8925..92dd745906f4 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -143,8 +143,8 @@ gen_btf() fi pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/') - if [ "${pahole_ver}" -lt "113" ]; then - echo >&2 "BTF: ${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13" + if [ "${pahole_ver}" -lt "116" ]; then + echo >&2 "BTF: ${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.16" return 1 fi diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 77472e28c8fd..6df1850f8353 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -3,7 +3,6 @@ include ../scripts/Makefile.include prefix ?= /usr/local -CC = gcc LEX = flex YACC = bison MAKE = make diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index a3c4bb86c05a..10de76b296ba 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -200,7 +200,7 @@ out: return err; } -static int codegen(const char *template, ...) +static void codegen(const char *template, ...) { const char *src, *end; int skip_tabs = 0, n; @@ -211,7 +211,7 @@ static int codegen(const char *template, ...) n = strlen(template); s = malloc(n + 1); if (!s) - return -ENOMEM; + exit(-1); src = template; dst = s; @@ -224,7 +224,8 @@ static int codegen(const char *template, ...) } else { p_err("unrecognized character at pos %td in template '%s'", src - template - 1, template); - return -EINVAL; + free(s); + exit(-1); } } @@ -234,7 +235,8 @@ static int codegen(const char *template, ...) if (*src != '\t') { p_err("not enough tabs at pos %td in template '%s'", src - template - 1, template); - return -EINVAL; + free(s); + exit(-1); } } /* trim trailing whitespace */ @@ -255,7 +257,6 @@ static int codegen(const char *template, ...) va_end(args); free(s); - return n; } static int do_skeleton(int argc, char **argv) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c65b374a5090..19684813faae 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3761,6 +3761,19 @@ struct xdp_md { __u32 egress_ifindex; /* txq->dev->ifindex */ }; +/* DEVMAP map-value layout + * + * The struct data-layout of map-value is a configuration interface. + * New members can only be added to the end of this structure. + */ +struct bpf_devmap_val { + __u32 ifindex; /* device index */ + union { + int fd; /* prog fd on map write */ + __u32 id; /* prog id on map read */ + } bpf_prog; +}; + enum sk_action { SK_DROP = 0, SK_PASS, diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index de07e559a11d..bbb430317260 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1137,6 +1137,20 @@ static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack) } } +static void btf_dump_drop_mods(struct btf_dump *d, struct id_stack *decl_stack) +{ + const struct btf_type *t; + __u32 id; + + while (decl_stack->cnt) { + id = decl_stack->ids[decl_stack->cnt - 1]; + t = btf__type_by_id(d->btf, id); + if (!btf_is_mod(t)) + return; + decl_stack->cnt--; + } +} + static void btf_dump_emit_name(const struct btf_dump *d, const char *name, bool last_was_ptr) { @@ -1235,14 +1249,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, * a const/volatile modifier for array, so we are * going to silently skip them here. */ - while (decls->cnt) { - next_id = decls->ids[decls->cnt - 1]; - next_t = btf__type_by_id(d->btf, next_id); - if (btf_is_mod(next_t)) - decls->cnt--; - else - break; - } + btf_dump_drop_mods(d, decls); if (decls->cnt == 0) { btf_dump_emit_name(d, fname, last_was_ptr); @@ -1270,7 +1277,15 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, __u16 vlen = btf_vlen(t); int i; - btf_dump_emit_mods(d, decls); + /* + * GCC emits extra volatile qualifier for + * __attribute__((noreturn)) function pointers. Clang + * doesn't do it. It's a GCC quirk for backwards + * compatibility with code written for GCC <2.5. So, + * similarly to extra qualifiers for array, just drop + * them, instead of handling them. + */ + btf_dump_drop_mods(d, decls); if (decls->cnt) { btf_dump_printf(d, " ("); btf_dump_emit_type_chain(d, decls, fname, lvl); diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index e823b35e7371..df59fd4fc95b 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -10,10 +10,9 @@ #include <stdbool.h> #include <stddef.h> -#ifdef __GLIBC__ -#include <bits/wordsize.h> -#else -#include <bits/reg.h> +#include <limits.h> +#ifndef __WORDSIZE +#define __WORDSIZE (__SIZEOF_LONG__ * 8) #endif static inline size_t hash_bits(size_t h, int bits) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7f01be2b88b8..477c679ed945 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3564,10 +3564,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; - /* kernel already zero-initializes .bss map. */ - if (map_type == LIBBPF_MAP_BSS) - return 0; - err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); if (err) { err = -errno; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index 139f8e82c7c6..b549fcfacc0b 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -230,6 +230,13 @@ void test_cgroup_attach_multi(void) "prog_replace", "errno=%d\n", errno)) goto err; + /* replace program with itself */ + attach_opts.replace_prog_fd = allow_prog[6]; + if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1, + BPF_CGROUP_INET_EGRESS, &attach_opts), + "prog_replace", "errno=%d\n", errno)) + goto err; + value = 0; CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); CHECK_FAIL(system(PING_CMD)); diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c new file mode 100644 index 000000000000..c1168e4a9036 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <test_progs.h> +#include <network_helpers.h> + +void test_load_bytes_relative(void) +{ + int server_fd, cgroup_fd, prog_fd, map_fd, client_fd; + int err; + struct bpf_object *obj; + struct bpf_program *prog; + struct bpf_map *test_result; + __u32 duration = 0; + + __u32 map_key = 0; + __u32 map_value = 0; + + cgroup_fd = test__join_cgroup("/load_bytes_relative"); + if (CHECK_FAIL(cgroup_fd < 0)) + return; + + server_fd = start_server(AF_INET, SOCK_STREAM); + if (CHECK_FAIL(server_fd < 0)) + goto close_cgroup_fd; + + err = bpf_prog_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + goto close_server_fd; + + test_result = bpf_object__find_map_by_name(obj, "test_result"); + if (CHECK_FAIL(!test_result)) + goto close_bpf_object; + + map_fd = bpf_map__fd(test_result); + if (map_fd < 0) + goto close_bpf_object; + + prog = bpf_object__find_program_by_name(obj, "load_bytes_relative"); + if (CHECK_FAIL(!prog)) + goto close_bpf_object; + + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI); + if (CHECK_FAIL(err)) + goto close_bpf_object; + + client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd); + if (CHECK_FAIL(client_fd < 0)) + goto close_bpf_object; + close(client_fd); + + err = bpf_map_lookup_elem(map_fd, &map_key, &map_value); + if (CHECK_FAIL(err)) + goto close_bpf_object; + + CHECK(map_value != 1, "bpf", "bpf program returned failure"); + +close_bpf_object: + bpf_object__close(obj); + +close_server_fd: + close(server_fd); + +close_cgroup_fd: + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 2bba908dfa63..c1650548433c 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -25,13 +25,23 @@ struct sample { char comm[16]; }; -static volatile int sample_cnt; +static int sample_cnt; + +static void atomic_inc(int *cnt) +{ + __atomic_add_fetch(cnt, 1, __ATOMIC_SEQ_CST); +} + +static int atomic_xchg(int *cnt, int val) +{ + return __atomic_exchange_n(cnt, val, __ATOMIC_SEQ_CST); +} static int process_sample(void *ctx, void *data, size_t len) { struct sample *s = data; - sample_cnt++; + atomic_inc(&sample_cnt); switch (s->seq) { case 0: @@ -76,7 +86,7 @@ void test_ringbuf(void) const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); pthread_t thread; long bg_ret = -1; - int err; + int err, cnt; skel = test_ringbuf__open_and_load(); if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n")) @@ -116,11 +126,15 @@ void test_ringbuf(void) /* -EDONE is used as an indicator that we are done */ if (CHECK(err != -EDONE, "err_done", "done err: %d\n", err)) goto cleanup; + cnt = atomic_xchg(&sample_cnt, 0); + CHECK(cnt != 2, "cnt", "exp %d samples, got %d\n", 2, cnt); /* we expect extra polling to return nothing */ err = ring_buffer__poll(ringbuf, 0); if (CHECK(err != 0, "extra_samples", "poll result: %d\n", err)) goto cleanup; + cnt = atomic_xchg(&sample_cnt, 0); + CHECK(cnt != 0, "cnt", "exp %d samples, got %d\n", 0, cnt); CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n", 0L, skel->bss->dropped); @@ -136,6 +150,8 @@ void test_ringbuf(void) 3L * rec_sz, skel->bss->cons_pos); err = ring_buffer__poll(ringbuf, -1); CHECK(err <= 0, "poll_err", "err %d\n", err); + cnt = atomic_xchg(&sample_cnt, 0); + CHECK(cnt != 2, "cnt", "exp %d samples, got %d\n", 2, cnt); /* start poll in background w/ long timeout */ err = pthread_create(&thread, NULL, poll_thread, (void *)(long)10000); @@ -164,6 +180,8 @@ void test_ringbuf(void) 2L, skel->bss->total); CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n", 1L, skel->bss->discarded); + cnt = atomic_xchg(&sample_cnt, 0); + CHECK(cnt != 0, "cnt", "exp %d samples, got %d\n", 0, cnt); /* clear flags to return to "adaptive" notification mode */ skel->bss->flags = 0; @@ -178,10 +196,20 @@ void test_ringbuf(void) if (CHECK(err != EBUSY, "try_join", "err %d\n", err)) goto cleanup; + /* still no samples, because consumer is behind */ + cnt = atomic_xchg(&sample_cnt, 0); + CHECK(cnt != 0, "cnt", "exp %d samples, got %d\n", 0, cnt); + + skel->bss->dropped = 0; + skel->bss->total = 0; + skel->bss->discarded = 0; + + skel->bss->value = 333; + syscall(__NR_getpgid); /* now force notifications */ skel->bss->flags = BPF_RB_FORCE_WAKEUP; - sample_cnt = 0; - trigger_samples(); + skel->bss->value = 777; + syscall(__NR_getpgid); /* now we should get a pending notification */ usleep(50000); @@ -193,8 +221,8 @@ void test_ringbuf(void) goto cleanup; /* 3 rounds, 2 samples each */ - CHECK(sample_cnt != 6, "wrong_sample_cnt", - "expected to see %d samples, got %d\n", 6, sample_cnt); + cnt = atomic_xchg(&sample_cnt, 0); + CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt); /* BPF side did everything right */ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n", diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index 9264a2736018..fa153cf67b1b 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -15,6 +15,8 @@ void test_skeleton(void) int duration = 0, err; struct test_skeleton* skel; struct test_skeleton__bss *bss; + struct test_skeleton__data *data; + struct test_skeleton__rodata *rodata; struct test_skeleton__kconfig *kcfg; skel = test_skeleton__open(); @@ -24,13 +26,45 @@ void test_skeleton(void) if (CHECK(skel->kconfig, "skel_kconfig", "kconfig is mmaped()!\n")) goto cleanup; + bss = skel->bss; + data = skel->data; + rodata = skel->rodata; + + /* validate values are pre-initialized correctly */ + CHECK(data->in1 != -1, "in1", "got %d != exp %d\n", data->in1, -1); + CHECK(data->out1 != -1, "out1", "got %d != exp %d\n", data->out1, -1); + CHECK(data->in2 != -1, "in2", "got %lld != exp %lld\n", data->in2, -1LL); + CHECK(data->out2 != -1, "out2", "got %lld != exp %lld\n", data->out2, -1LL); + + CHECK(bss->in3 != 0, "in3", "got %d != exp %d\n", bss->in3, 0); + CHECK(bss->out3 != 0, "out3", "got %d != exp %d\n", bss->out3, 0); + CHECK(bss->in4 != 0, "in4", "got %lld != exp %lld\n", bss->in4, 0LL); + CHECK(bss->out4 != 0, "out4", "got %lld != exp %lld\n", bss->out4, 0LL); + + CHECK(rodata->in6 != 0, "in6", "got %d != exp %d\n", rodata->in6, 0); + CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0); + + /* validate we can pre-setup global variables, even in .bss */ + data->in1 = 10; + data->in2 = 11; + bss->in3 = 12; + bss->in4 = 13; + rodata->in6 = 14; + err = test_skeleton__load(skel); if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) goto cleanup; - bss = skel->bss; - bss->in1 = 1; - bss->in2 = 2; + /* validate pre-setup values are still there */ + CHECK(data->in1 != 10, "in1", "got %d != exp %d\n", data->in1, 10); + CHECK(data->in2 != 11, "in2", "got %lld != exp %lld\n", data->in2, 11LL); + CHECK(bss->in3 != 12, "in3", "got %d != exp %d\n", bss->in3, 12); + CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL); + CHECK(rodata->in6 != 14, "in6", "got %d != exp %d\n", rodata->in6, 14); + + /* now set new values and attach to get them into outX variables */ + data->in1 = 1; + data->in2 = 2; bss->in3 = 3; bss->in4 = 4; bss->in5.a = 5; @@ -44,14 +78,15 @@ void test_skeleton(void) /* trigger tracepoint */ usleep(1); - CHECK(bss->out1 != 1, "res1", "got %d != exp %d\n", bss->out1, 1); - CHECK(bss->out2 != 2, "res2", "got %lld != exp %d\n", bss->out2, 2); + CHECK(data->out1 != 1, "res1", "got %d != exp %d\n", data->out1, 1); + CHECK(data->out2 != 2, "res2", "got %lld != exp %d\n", data->out2, 2); CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3); CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4); CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n", bss->handler_out5.a, 5); CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n", bss->handler_out5.b, 6); + CHECK(bss->out6 != 14, "res7", "got %d != exp %d\n", bss->out6, 14); CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1", "got %d != exp %d\n", bss->bpf_syscall, kcfg->CONFIG_BPF_SYSCALL); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index d19dbd668f6a..88ef3ec8ac4c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -8,14 +8,6 @@ #define IFINDEX_LO 1 -struct bpf_devmap_val { - u32 ifindex; /* device index */ - union { - int fd; /* prog fd on map write */ - u32 id; /* prog id on map read */ - } bpf_prog; -}; - void test_xdp_with_devmap_helpers(void) { struct test_xdp_with_devmap_helpers *skel; diff --git a/tools/testing/selftests/bpf/progs/load_bytes_relative.c b/tools/testing/selftests/bpf/progs/load_bytes_relative.c new file mode 100644 index 000000000000..dc1d04a7a3d6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/load_bytes_relative.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} test_result SEC(".maps"); + +SEC("cgroup_skb/egress") +int load_bytes_relative(struct __sk_buff *skb) +{ + struct ethhdr eth; + struct iphdr iph; + + __u32 map_key = 0; + __u32 test_passed = 0; + + /* MAC header is not set by the time cgroup_skb/egress triggers */ + if (bpf_skb_load_bytes_relative(skb, 0, ð, sizeof(eth), + BPF_HDR_START_MAC) != -EFAULT) + goto fail; + + if (bpf_skb_load_bytes_relative(skb, 0, &iph, sizeof(iph), + BPF_HDR_START_NET)) + goto fail; + + if (bpf_skb_load_bytes_relative(skb, 0xffff, &iph, sizeof(iph), + BPF_HDR_START_NET) != -EFAULT) + goto fail; + + test_passed = 1; + +fail: + bpf_map_update_elem(&test_result, &map_key, &test_passed, BPF_ANY); + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index de03a90f78ca..77ae86f44db5 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -10,16 +10,26 @@ struct s { long long b; } __attribute__((packed)); -int in1 = 0; -long long in2 = 0; +/* .data section */ +int in1 = -1; +long long in2 = -1; + +/* .bss section */ char in3 = '\0'; long long in4 __attribute__((aligned(64))) = 0; struct s in5 = {}; -long long out2 = 0; +/* .rodata section */ +const volatile int in6 = 0; + +/* .data section */ +int out1 = -1; +long long out2 = -1; + +/* .bss section */ char out3 = 0; long long out4 = 0; -int out1 = 0; +int out6 = 0; extern bool CONFIG_BPF_SYSCALL __kconfig; extern int LINUX_KERNEL_VERSION __kconfig; @@ -36,6 +46,7 @@ int handler(const void *ctx) out3 = in3; out4 = in4; out5 = in5; + out6 = in6; bpf_syscall = CONFIG_BPF_SYSCALL; kern_ver = LINUX_KERNEL_VERSION; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c index e5c0f131c8a7..b360ba2bd441 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c @@ -2,7 +2,7 @@ /* fails to load without expected_attach_type = BPF_XDP_DEVMAP * because of access to egress_ifindex */ -#include "vmlinux.h" +#include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("xdp_dm_log") diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c index deef0e050863..330811260123 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 - -#include "vmlinux.h" +#include <linux/bpf.h> #include <bpf/bpf_helpers.h> struct { diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index 6dee9e636a95..422e7761254d 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -115,6 +115,7 @@ static struct option long_options[] = { { "tcp", no_argument, 0, 't' }, { "udp", no_argument, 0, 'u' }, { "ip", no_argument, 0, 'i' }, + { NULL, 0, NULL, 0 }, }; static int next_port = 19999; diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c index aca3491174a1..f4bb4fef0f39 100644 --- a/tools/testing/selftests/net/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c @@ -313,10 +313,16 @@ int main(int argc, char **argv) int val; socklen_t len; struct timeval next; + size_t if_len; if (argc < 2) usage(0); interface = argv[1]; + if_len = strlen(interface); + if (if_len >= IFNAMSIZ) { + printf("interface name exceeds IFNAMSIZ\n"); + exit(1); + } for (i = 2; i < argc; i++) { if (!strcasecmp(argv[i], "SO_TIMESTAMP")) @@ -350,12 +356,12 @@ int main(int argc, char **argv) bail("socket"); memset(&device, 0, sizeof(device)); - strncpy(device.ifr_name, interface, sizeof(device.ifr_name)); + memcpy(device.ifr_name, interface, if_len + 1); if (ioctl(sock, SIOCGIFADDR, &device) < 0) bail("getting interface IP address"); memset(&hwtstamp, 0, sizeof(hwtstamp)); - strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name)); + memcpy(hwtstamp.ifr_name, interface, if_len + 1); hwtstamp.ifr_data = (void *)&hwconfig; memset(&hwconfig, 0, sizeof(hwconfig)); hwconfig.tx_type = diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index c5282e62df75..b599f1fa99b5 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -213,6 +213,64 @@ TEST_F(tls, send_then_sendfile) EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size); } +static void chunked_sendfile(struct __test_metadata *_metadata, + struct _test_data_tls *self, + uint16_t chunk_size, + uint16_t extra_payload_size) +{ + char buf[TLS_PAYLOAD_MAX_LEN]; + uint16_t test_payload_size; + int size = 0; + int ret; + char filename[] = "/tmp/mytemp.XXXXXX"; + int fd = mkstemp(filename); + off_t offset = 0; + + unlink(filename); + ASSERT_GE(fd, 0); + EXPECT_GE(chunk_size, 1); + test_payload_size = chunk_size + extra_payload_size; + ASSERT_GE(TLS_PAYLOAD_MAX_LEN, test_payload_size); + memset(buf, 1, test_payload_size); + size = write(fd, buf, test_payload_size); + EXPECT_EQ(size, test_payload_size); + fsync(fd); + + while (size > 0) { + ret = sendfile(self->fd, fd, &offset, chunk_size); + EXPECT_GE(ret, 0); + size -= ret; + } + + EXPECT_EQ(recv(self->cfd, buf, test_payload_size, MSG_WAITALL), + test_payload_size); + + close(fd); +} + +TEST_F(tls, multi_chunk_sendfile) +{ + chunked_sendfile(_metadata, self, 4096, 4096); + chunked_sendfile(_metadata, self, 4096, 0); + chunked_sendfile(_metadata, self, 4096, 1); + chunked_sendfile(_metadata, self, 4096, 2048); + chunked_sendfile(_metadata, self, 8192, 2048); + chunked_sendfile(_metadata, self, 4096, 8192); + chunked_sendfile(_metadata, self, 8192, 4096); + chunked_sendfile(_metadata, self, 12288, 1024); + chunked_sendfile(_metadata, self, 12288, 2000); + chunked_sendfile(_metadata, self, 15360, 100); + chunked_sendfile(_metadata, self, 15360, 300); + chunked_sendfile(_metadata, self, 1, 4096); + chunked_sendfile(_metadata, self, 2048, 4096); + chunked_sendfile(_metadata, self, 2048, 8192); + chunked_sendfile(_metadata, self, 4096, 8192); + chunked_sendfile(_metadata, self, 1024, 12288); + chunked_sendfile(_metadata, self, 2000, 12288); + chunked_sendfile(_metadata, self, 100, 15360); + chunked_sendfile(_metadata, self, 300, 15360); +} + TEST_F(tls, recv_max) { unsigned int send_len = TLS_PAYLOAD_MAX_LEN; |