summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/i40e.rst12
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.c120
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c107
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h11
-rw-r--r--drivers/net/ethernet/intel/igb/Makefile2
-rw-r--r--drivers/net/ethernet/intel/igb/igb.h58
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c270
-rw-r--r--drivers/net/ethernet/intel/igb/igb_xsk.c562
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c79
-rw-r--r--drivers/net/ethernet/intel/igc/igc_xdp.c8
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c23
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c23
18 files changed, 1002 insertions, 287 deletions
diff --git a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
index 4fbaa1a2d674..53d9d5829d69 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
@@ -299,6 +299,18 @@ Use ethtool to view and set link-down-on-close, as follows::
ethtool --show-priv-flags ethX
ethtool --set-priv-flags ethX link-down-on-close [on|off]
+Setting the mdd-auto-reset-vf Private Flag
+------------------------------------------
+
+When the mdd-auto-reset-vf private flag is set to "on", the problematic VF will
+be automatically reset if a malformed descriptor is detected. If the flag is
+set to "off", the problematic VF will be disabled.
+
+Use ethtool to view and set mdd-auto-reset-vf, as follows::
+
+ ethtool --show-priv-flags ethX
+ ethtool --set-priv-flags ethX mdd-auto-reset-vf [on|off]
+
Viewing Link Messages
---------------------
Link messages will not be displayed to the console if the distribution is
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index 98861cc6df7c..b9dd7b719832 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1180,126 +1180,6 @@ s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid)
}
/**
- * fm10k_iov_msg_mac_vlan_pf - Message handler for MAC/VLAN request from VF
- * @hw: Pointer to hardware structure
- * @results: Pointer array to message, results[0] is pointer to message
- * @mbx: Pointer to mailbox information structure
- *
- * This function is a default handler for MAC/VLAN requests from the VF.
- * The assumption is that in this case it is acceptable to just directly
- * hand off the message from the VF to the underlying shared code.
- **/
-s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results,
- struct fm10k_mbx_info *mbx)
-{
- struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
- u8 mac[ETH_ALEN];
- u32 *result;
- int err = 0;
- bool set;
- u16 vlan;
- u32 vid;
-
- /* we shouldn't be updating rules on a disabled interface */
- if (!FM10K_VF_FLAG_ENABLED(vf_info))
- err = FM10K_ERR_PARAM;
-
- if (!err && !!results[FM10K_MAC_VLAN_MSG_VLAN]) {
- result = results[FM10K_MAC_VLAN_MSG_VLAN];
-
- /* record VLAN id requested */
- err = fm10k_tlv_attr_get_u32(result, &vid);
- if (err)
- return err;
-
- set = !(vid & FM10K_VLAN_CLEAR);
- vid &= ~FM10K_VLAN_CLEAR;
-
- /* if the length field has been set, this is a multi-bit
- * update request. For multi-bit requests, simply disallow
- * them when the pf_vid has been set. In this case, the PF
- * should have already cleared the VLAN_TABLE, and if we
- * allowed them, it could allow a rogue VF to receive traffic
- * on a VLAN it was not assigned. In the single-bit case, we
- * need to modify requests for VLAN 0 to use the default PF or
- * SW vid when assigned.
- */
-
- if (vid >> 16) {
- /* prevent multi-bit requests when PF has
- * administratively set the VLAN for this VF
- */
- if (vf_info->pf_vid)
- return FM10K_ERR_PARAM;
- } else {
- err = fm10k_iov_select_vid(vf_info, (u16)vid);
- if (err < 0)
- return err;
-
- vid = err;
- }
-
- /* update VSI info for VF in regards to VLAN table */
- err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set);
- }
-
- if (!err && !!results[FM10K_MAC_VLAN_MSG_MAC]) {
- result = results[FM10K_MAC_VLAN_MSG_MAC];
-
- /* record unicast MAC address requested */
- err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
- if (err)
- return err;
-
- /* block attempts to set MAC for a locked device */
- if (is_valid_ether_addr(vf_info->mac) &&
- !ether_addr_equal(mac, vf_info->mac))
- return FM10K_ERR_PARAM;
-
- set = !(vlan & FM10K_VLAN_CLEAR);
- vlan &= ~FM10K_VLAN_CLEAR;
-
- err = fm10k_iov_select_vid(vf_info, vlan);
- if (err < 0)
- return err;
-
- vlan = (u16)err;
-
- /* notify switch of request for new unicast address */
- err = hw->mac.ops.update_uc_addr(hw, vf_info->glort,
- mac, vlan, set, 0);
- }
-
- if (!err && !!results[FM10K_MAC_VLAN_MSG_MULTICAST]) {
- result = results[FM10K_MAC_VLAN_MSG_MULTICAST];
-
- /* record multicast MAC address requested */
- err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
- if (err)
- return err;
-
- /* verify that the VF is allowed to request multicast */
- if (!(vf_info->vf_flags & FM10K_VF_FLAG_MULTI_ENABLED))
- return FM10K_ERR_PARAM;
-
- set = !(vlan & FM10K_VLAN_CLEAR);
- vlan &= ~FM10K_VLAN_CLEAR;
-
- err = fm10k_iov_select_vid(vf_info, vlan);
- if (err < 0)
- return err;
-
- vlan = (u16)err;
-
- /* notify switch of request for new multicast address */
- err = hw->mac.ops.update_mc_addr(hw, vf_info->glort,
- mac, vlan, set);
- }
-
- return err;
-}
-
-/**
* fm10k_iov_supported_xcast_mode_pf - Determine best match for xcast mode
* @vf_info: VF info structure containing capability flags
* @mode: Requested xcast mode
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
index 8e814df709d2..ad3696893cb1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
@@ -99,8 +99,6 @@ extern const struct fm10k_tlv_attr fm10k_err_msg_attr[];
s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid);
s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *);
-s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **,
- struct fm10k_mbx_info *);
s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *, u32 **,
struct fm10k_mbx_info *);
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index ce63a7cfe955..c67963bfe14e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -88,6 +88,7 @@ enum i40e_state {
__I40E_SERVICE_SCHED,
__I40E_ADMINQ_EVENT_PENDING,
__I40E_MDD_EVENT_PENDING,
+ __I40E_MDD_VF_PRINT_PENDING,
__I40E_VFLR_EVENT_PENDING,
__I40E_RESET_RECOVERY_PENDING,
__I40E_TIMEOUT_RECOVERY_PENDING,
@@ -191,6 +192,7 @@ enum i40e_pf_flags {
*/
I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
I40E_FLAG_VF_VLAN_PRUNING_ENA,
+ I40E_FLAG_MDD_AUTO_RESET_VF,
I40E_PF_FLAGS_NBITS, /* must be last */
};
@@ -572,7 +574,7 @@ struct i40e_pf {
int num_alloc_vfs; /* actual number of VFs allocated */
u32 vf_aq_requests;
u32 arq_overflows; /* Not fatal, possibly indicative of problems */
-
+ struct ratelimit_state mdd_message_rate_limit;
/* DCBx/DCBNL capability for PF that indicates
* whether DCBx is managed by firmware or host
* based agent (LLDPAD). Also, indicates what
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 208c2f0857b6..6cd9da662ae1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -722,7 +722,7 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
dev_info(&pf->pdev->dev, " num MDD=%lld\n",
- vf->num_mdd_events);
+ vf->mdd_tx_events.count + vf->mdd_rx_events.count);
} else {
dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index bce5b76f1e7a..8a7a83f83ee5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -459,6 +459,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
I40E_PRIV_FLAG("vf-vlan-pruning",
I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
+ I40E_PRIV_FLAG("mdd-auto-reset-vf",
+ I40E_FLAG_MDD_AUTO_RESET_VF, 0),
};
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 8a333d0e2218..65a702668e21 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11180,6 +11180,67 @@ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
}
/**
+ * i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event
+ * @pf: board private structure
+ * @vf: pointer to the VF structure
+ * @is_tx: true - for Tx event, false - for Rx
+ */
+static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
+ bool is_tx)
+{
+ dev_err(&pf->pdev->dev, is_tx ?
+ "%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
+ "%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
+ is_tx ? vf->mdd_tx_events.count : vf->mdd_rx_events.count,
+ pf->hw.pf_id,
+ vf->vf_id,
+ vf->default_lan_addr.addr,
+ str_on_off(test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)));
+}
+
+/**
+ * i40e_print_vfs_mdd_events - print VFs malicious driver detect event
+ * @pf: pointer to the PF structure
+ *
+ * Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
+ */
+static void i40e_print_vfs_mdd_events(struct i40e_pf *pf)
+{
+ unsigned int i;
+
+ /* check that there are pending MDD events to print */
+ if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state))
+ return;
+
+ if (!__ratelimit(&pf->mdd_message_rate_limit))
+ return;
+
+ for (i = 0; i < pf->num_alloc_vfs; i++) {
+ struct i40e_vf *vf = &pf->vf[i];
+ bool is_printed = false;
+
+ /* only print Rx MDD event message if there are new events */
+ if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
+ vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
+ i40e_print_vf_mdd_event(pf, vf, false);
+ is_printed = true;
+ }
+
+ /* only print Tx MDD event message if there are new events */
+ if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
+ vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
+ i40e_print_vf_mdd_event(pf, vf, true);
+ is_printed = true;
+ }
+
+ if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
+ dev_info(&pf->pdev->dev,
+ "Use PF Control I/F to re-enable the VF #%d\n",
+ i);
+ }
+}
+
+/**
* i40e_handle_mdd_event
* @pf: pointer to the PF structure
*
@@ -11193,8 +11254,13 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
u32 reg;
int i;
- if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
+ if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) {
+ /* Since the VF MDD event logging is rate limited, check if
+ * there are pending MDD events.
+ */
+ i40e_print_vfs_mdd_events(pf);
return;
+ }
/* find what triggered the MDD event */
reg = rd32(hw, I40E_GL_MDET_TX);
@@ -11238,36 +11304,48 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
/* see if one of the VFs needs its hand slapped */
for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
+ bool is_mdd_on_tx = false;
+ bool is_mdd_on_rx = false;
+
vf = &(pf->vf[i]);
reg = rd32(hw, I40E_VP_MDET_TX(i));
if (reg & I40E_VP_MDET_TX_VALID_MASK) {
+ set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
- vf->num_mdd_events++;
- dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
- i);
- dev_info(&pf->pdev->dev,
- "Use PF Control I/F to re-enable the VF\n");
+ vf->mdd_tx_events.count++;
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+ is_mdd_on_tx = true;
}
reg = rd32(hw, I40E_VP_MDET_RX(i));
if (reg & I40E_VP_MDET_RX_VALID_MASK) {
+ set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
- vf->num_mdd_events++;
- dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
- i);
- dev_info(&pf->pdev->dev,
- "Use PF Control I/F to re-enable the VF\n");
+ vf->mdd_rx_events.count++;
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+ is_mdd_on_rx = true;
+ }
+
+ if ((is_mdd_on_tx || is_mdd_on_rx) &&
+ test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
+ /* VF MDD event counters will be cleared by
+ * reset, so print the event prior to reset.
+ */
+ if (is_mdd_on_rx)
+ i40e_print_vf_mdd_event(pf, vf, false);
+ if (is_mdd_on_tx)
+ i40e_print_vf_mdd_event(pf, vf, true);
+
+ i40e_vc_reset_vf(vf, true);
}
}
- /* re-enable mdd interrupt cause */
- clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
reg = rd32(hw, I40E_PFINT_ICR0_ENA);
reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
wr32(hw, I40E_PFINT_ICR0_ENA, reg);
i40e_flush(hw);
+
+ i40e_print_vfs_mdd_events(pf);
}
/**
@@ -15878,6 +15956,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
ERR_PTR(err),
i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ /* VF MDD event logs are rate limited to one second intervals */
+ ratelimit_state_init(&pf->mdd_message_rate_limit, 1 * HZ, 1);
+
/* Reconfigure hardware for allowing smaller MSS in the case
* of TSO, so that we avoid the MDD being fired and causing
* a reset in the case of small MSS+TSO.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index dfa785e39458..1120f8e4bb67 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -216,7 +216,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
* @notify_vf: notify vf about reset or not
* Reset VF handler.
**/
-static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
+void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
{
struct i40e_pf *pf = vf->pf;
int i;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 66f95e2f3146..5cf74f16f433 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -64,6 +64,12 @@ struct i40evf_channel {
u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
};
+struct i40e_mdd_vf_events {
+ u64 count; /* total count of Rx|Tx events */
+ /* count number of the last printed event */
+ u64 last_printed;
+};
+
/* VF information structure */
struct i40e_vf {
struct i40e_pf *pf;
@@ -92,7 +98,9 @@ struct i40e_vf {
u8 num_queue_pairs; /* num of qps assigned to VF vsis */
u8 num_req_queues; /* num of requested qps */
- u64 num_mdd_events; /* num of mdd events detected */
+ /* num of mdd tx and rx events detected */
+ struct i40e_mdd_vf_events mdd_rx_events;
+ struct i40e_mdd_vf_events mdd_tx_events;
unsigned long vf_caps; /* vf's adv. capabilities */
unsigned long vf_states; /* vf's runtime states */
@@ -120,6 +128,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
u32 v_retval, u8 *msg, u16 msglen);
int i40e_vc_process_vflr_event(struct i40e_pf *pf);
+void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf);
bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile
index 463c0d26b9d4..6c1b702fd992 100644
--- a/drivers/net/ethernet/intel/igb/Makefile
+++ b/drivers/net/ethernet/intel/igb/Makefile
@@ -8,4 +8,4 @@ obj-$(CONFIG_IGB) += igb.o
igb-y := igb_main.o igb_ethtool.o e1000_82575.o \
e1000_mac.o e1000_nvm.o e1000_phy.o e1000_mbx.o \
- e1000_i210.o igb_ptp.o igb_hwmon.o
+ e1000_i210.o igb_ptp.o igb_hwmon.o igb_xsk.o
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 3c2dc7bdebb5..02f340280d20 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -18,8 +18,10 @@
#include <linux/i2c-algo-bit.h>
#include <linux/pci.h>
#include <linux/mdio.h>
+#include <linux/lockdep.h>
#include <net/xdp.h>
+#include <net/xdp_sock_drv.h>
struct igb_adapter;
@@ -86,6 +88,7 @@ struct igb_adapter;
#define IGB_XDP_CONSUMED BIT(0)
#define IGB_XDP_TX BIT(1)
#define IGB_XDP_REDIR BIT(2)
+#define IGB_XDP_EXIT BIT(3)
struct vf_data_storage {
unsigned char vf_mac_addresses[ETH_ALEN];
@@ -255,6 +258,7 @@ enum igb_tx_flags {
enum igb_tx_buf_type {
IGB_TYPE_SKB = 0,
IGB_TYPE_XDP,
+ IGB_TYPE_XSK
};
/* wrapper around a pointer to a socket buffer,
@@ -320,6 +324,7 @@ struct igb_ring {
union { /* array of buffer info structs */
struct igb_tx_buffer *tx_buffer_info;
struct igb_rx_buffer *rx_buffer_info;
+ struct xdp_buff **rx_buffer_info_zc;
};
void *desc; /* descriptor ring memory */
unsigned long flags; /* ring specific flags */
@@ -357,6 +362,7 @@ struct igb_ring {
};
};
struct xdp_rxq_info xdp_rxq;
+ struct xsk_buff_pool *xsk_pool;
} ____cacheline_internodealigned_in_smp;
struct igb_q_vector {
@@ -384,7 +390,8 @@ enum e1000_ring_flags_t {
IGB_RING_FLAG_RX_SCTP_CSUM,
IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
IGB_RING_FLAG_TX_CTX_IDX,
- IGB_RING_FLAG_TX_DETECT_HANG
+ IGB_RING_FLAG_TX_DETECT_HANG,
+ IGB_RING_FLAG_TX_DISABLED
};
#define ring_uses_large_buffer(ring) \
@@ -731,12 +738,21 @@ int igb_setup_tx_resources(struct igb_ring *);
int igb_setup_rx_resources(struct igb_ring *);
void igb_free_tx_resources(struct igb_ring *);
void igb_free_rx_resources(struct igb_ring *);
+void igb_clean_tx_ring(struct igb_ring *tx_ring);
+void igb_clean_rx_ring(struct igb_ring *rx_ring);
void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
+void igb_finalize_xdp(struct igb_adapter *adapter, unsigned int status);
+void igb_update_rx_stats(struct igb_q_vector *q_vector, unsigned int packets,
+ unsigned int bytes);
void igb_setup_tctl(struct igb_adapter *);
void igb_setup_rctl(struct igb_adapter *);
void igb_setup_srrctl(struct igb_adapter *, struct igb_ring *);
netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *);
+int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp);
+void igb_process_skb_fields(struct igb_ring *rx_ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb);
void igb_alloc_rx_buffers(struct igb_ring *, u16);
void igb_update_stats(struct igb_adapter *);
bool igb_has_link(struct igb_adapter *adapter);
@@ -797,6 +813,33 @@ static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring)
return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
}
+/* This function assumes __netif_tx_lock is held by the caller. */
+static inline void igb_xdp_ring_update_tail(struct igb_ring *ring)
+{
+ lockdep_assert_held(&txring_txq(ring)->_xmit_lock);
+
+ /* Force memory writes to complete before letting h/w know there
+ * are new descriptors to fetch.
+ */
+ wmb();
+ writel(ring->next_to_use, ring->tail);
+}
+
+static inline struct igb_ring *igb_xdp_tx_queue_mapping(struct igb_adapter *adapter)
+{
+ unsigned int r_idx = smp_processor_id();
+
+ if (r_idx >= adapter->num_tx_queues)
+ r_idx = r_idx % adapter->num_tx_queues;
+
+ return adapter->tx_ring[r_idx];
+}
+
+static inline bool igb_xdp_is_enabled(struct igb_adapter *adapter)
+{
+ return !!READ_ONCE(adapter->xdp_prog);
+}
+
int igb_add_filter(struct igb_adapter *adapter,
struct igb_nfc_filter *input);
int igb_erase_filter(struct igb_adapter *adapter,
@@ -807,4 +850,17 @@ int igb_add_mac_steering_filter(struct igb_adapter *adapter,
int igb_del_mac_steering_filter(struct igb_adapter *adapter,
const u8 *addr, u8 queue, u8 flags);
+struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
+ struct igb_ring *ring);
+int igb_xsk_pool_setup(struct igb_adapter *adapter,
+ struct xsk_buff_pool *pool,
+ u16 qid);
+bool igb_alloc_rx_buffers_zc(struct igb_ring *rx_ring,
+ struct xsk_buff_pool *xsk_pool, u16 count);
+void igb_clean_rx_ring_zc(struct igb_ring *rx_ring);
+int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector,
+ struct xsk_buff_pool *xsk_pool, const int budget);
+bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool);
+int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
+
#endif /* _IGB_H_ */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 288a4bb2683a..d368b753a467 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -33,7 +33,6 @@
#include <linux/bpf_trace.h>
#include <linux/pm_runtime.h>
#include <linux/etherdevice.h>
-#include <linux/lockdep.h>
#ifdef CONFIG_IGB_DCA
#include <linux/dca.h>
#endif
@@ -116,8 +115,6 @@ static void igb_configure_tx(struct igb_adapter *);
static void igb_configure_rx(struct igb_adapter *);
static void igb_clean_all_tx_rings(struct igb_adapter *);
static void igb_clean_all_rx_rings(struct igb_adapter *);
-static void igb_clean_tx_ring(struct igb_ring *);
-static void igb_clean_rx_ring(struct igb_ring *);
static void igb_set_rx_mode(struct net_device *);
static void igb_update_phy_info(struct timer_list *);
static void igb_watchdog(struct timer_list *);
@@ -475,12 +472,17 @@ rx_ring_summary:
for (i = 0; i < rx_ring->count; i++) {
const char *next_desc;
- struct igb_rx_buffer *buffer_info;
- buffer_info = &rx_ring->rx_buffer_info[i];
+ dma_addr_t dma = (dma_addr_t)0;
+ struct igb_rx_buffer *buffer_info = NULL;
rx_desc = IGB_RX_DESC(rx_ring, i);
u0 = (struct my_u0 *)rx_desc;
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+ if (!rx_ring->xsk_pool) {
+ buffer_info = &rx_ring->rx_buffer_info[i];
+ dma = buffer_info->dma;
+ }
+
if (i == rx_ring->next_to_use)
next_desc = " NTU";
else if (i == rx_ring->next_to_clean)
@@ -500,11 +502,11 @@ rx_ring_summary:
"R ", i,
le64_to_cpu(u0->a),
le64_to_cpu(u0->b),
- (u64)buffer_info->dma,
+ (u64)dma,
next_desc);
if (netif_msg_pktdata(adapter) &&
- buffer_info->dma && buffer_info->page) {
+ buffer_info && dma && buffer_info->page) {
print_hex_dump(KERN_INFO, "",
DUMP_PREFIX_ADDRESS,
16, 1,
@@ -1990,7 +1992,11 @@ static void igb_configure(struct igb_adapter *adapter)
*/
for (i = 0; i < adapter->num_rx_queues; i++) {
struct igb_ring *ring = adapter->rx_ring[i];
- igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
+ if (ring->xsk_pool)
+ igb_alloc_rx_buffers_zc(ring, ring->xsk_pool,
+ igb_desc_unused(ring));
+ else
+ igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
}
}
@@ -2911,37 +2917,20 @@ static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf)
static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
+ struct igb_adapter *adapter = netdev_priv(dev);
+
switch (xdp->command) {
case XDP_SETUP_PROG:
return igb_xdp_setup(dev, xdp);
+ case XDP_SETUP_XSK_POOL:
+ return igb_xsk_pool_setup(adapter, xdp->xsk.pool,
+ xdp->xsk.queue_id);
default:
return -EINVAL;
}
}
-/* This function assumes __netif_tx_lock is held by the caller. */
-static void igb_xdp_ring_update_tail(struct igb_ring *ring)
-{
- lockdep_assert_held(&txring_txq(ring)->_xmit_lock);
-
- /* Force memory writes to complete before letting h/w know there
- * are new descriptors to fetch.
- */
- wmb();
- writel(ring->next_to_use, ring->tail);
-}
-
-static struct igb_ring *igb_xdp_tx_queue_mapping(struct igb_adapter *adapter)
-{
- unsigned int r_idx = smp_processor_id();
-
- if (r_idx >= adapter->num_tx_queues)
- r_idx = r_idx % adapter->num_tx_queues;
-
- return adapter->tx_ring[r_idx];
-}
-
-static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
+int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
{
struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
int cpu = smp_processor_id();
@@ -2955,7 +2944,8 @@ static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
/* During program transitions its possible adapter->xdp_prog is assigned
* but ring has not been configured yet. In this case simply abort xmit.
*/
- tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
+ tx_ring = igb_xdp_is_enabled(adapter) ?
+ igb_xdp_tx_queue_mapping(adapter) : NULL;
if (unlikely(!tx_ring))
return IGB_XDP_CONSUMED;
@@ -2988,10 +2978,14 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
/* During program transitions its possible adapter->xdp_prog is assigned
* but ring has not been configured yet. In this case simply abort xmit.
*/
- tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
+ tx_ring = igb_xdp_is_enabled(adapter) ?
+ igb_xdp_tx_queue_mapping(adapter) : NULL;
if (unlikely(!tx_ring))
return -ENXIO;
+ if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags)))
+ return -ENXIO;
+
nq = txring_txq(tx_ring);
__netif_tx_lock(nq, cpu);
@@ -3042,6 +3036,7 @@ static const struct net_device_ops igb_netdev_ops = {
.ndo_setup_tc = igb_setup_tc,
.ndo_bpf = igb_xdp,
.ndo_xdp_xmit = igb_xdp_xmit,
+ .ndo_xsk_wakeup = igb_xsk_wakeup,
};
/**
@@ -3338,7 +3333,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->priv_flags |= IFF_SUPP_NOFCS;
netdev->priv_flags |= IFF_UNICAST_FLT;
- netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
+ netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_XSK_ZEROCOPY;
/* MTU range: 68 - 9216 */
netdev->min_mtu = ETH_MIN_MTU;
@@ -4364,6 +4360,8 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
u64 tdba = ring->dma;
int reg_idx = ring->reg_idx;
+ WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
+
wr32(E1000_TDLEN(reg_idx),
ring->count * sizeof(union e1000_adv_tx_desc));
wr32(E1000_TDBAL(reg_idx),
@@ -4424,7 +4422,8 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
- rx_ring->queue_index, 0);
+ rx_ring->queue_index,
+ rx_ring->q_vector->napi.napi_id);
if (res < 0) {
dev_err(dev, "Failed to register xdp_rxq index %u\n",
rx_ring->queue_index);
@@ -4720,12 +4719,17 @@ void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring)
struct e1000_hw *hw = &adapter->hw;
int reg_idx = ring->reg_idx;
u32 srrctl = 0;
+ u32 buf_size;
- srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
- if (ring_uses_large_buffer(ring))
- srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+ if (ring->xsk_pool)
+ buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
+ else if (ring_uses_large_buffer(ring))
+ buf_size = IGB_RXBUFFER_3072;
else
- srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+ buf_size = IGB_RXBUFFER_2048;
+
+ srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+ srrctl |= buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT;
srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
if (hw->mac.type >= e1000_82580)
srrctl |= E1000_SRRCTL_TIMESTAMP;
@@ -4757,8 +4761,17 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
u32 rxdctl = 0;
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
- WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
- MEM_TYPE_PAGE_SHARED, NULL));
+ WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
+ if (ring->xsk_pool) {
+ WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_XSK_BUFF_POOL,
+ NULL));
+ xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+ } else {
+ WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL));
+ }
/* disable the queue */
wr32(E1000_RXDCTL(reg_idx), 0);
@@ -4785,9 +4798,12 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
rxdctl |= IGB_RX_HTHRESH << 8;
rxdctl |= IGB_RX_WTHRESH << 16;
- /* initialize rx_buffer_info */
- memset(ring->rx_buffer_info, 0,
- sizeof(struct igb_rx_buffer) * ring->count);
+ if (ring->xsk_pool)
+ memset(ring->rx_buffer_info_zc, 0,
+ sizeof(*ring->rx_buffer_info_zc) * ring->count);
+ else
+ memset(ring->rx_buffer_info, 0,
+ sizeof(*ring->rx_buffer_info) * ring->count);
/* initialize Rx descriptor 0 */
rx_desc = IGB_RX_DESC(ring, 0);
@@ -4888,19 +4904,24 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter)
* igb_clean_tx_ring - Free Tx Buffers
* @tx_ring: ring to be cleaned
**/
-static void igb_clean_tx_ring(struct igb_ring *tx_ring)
+void igb_clean_tx_ring(struct igb_ring *tx_ring)
{
u16 i = tx_ring->next_to_clean;
struct igb_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
+ u32 xsk_frames = 0;
while (i != tx_ring->next_to_use) {
union e1000_adv_tx_desc *eop_desc, *tx_desc;
/* Free all the Tx ring sk_buffs or xdp frames */
- if (tx_buffer->type == IGB_TYPE_SKB)
+ if (tx_buffer->type == IGB_TYPE_SKB) {
dev_kfree_skb_any(tx_buffer->skb);
- else
+ } else if (tx_buffer->type == IGB_TYPE_XDP) {
xdp_return_frame(tx_buffer->xdpf);
+ } else if (tx_buffer->type == IGB_TYPE_XSK) {
+ xsk_frames++;
+ goto skip_for_xsk;
+ }
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -4931,6 +4952,7 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring)
DMA_TO_DEVICE);
}
+skip_for_xsk:
tx_buffer->next_to_watch = NULL;
/* move us one more past the eop_desc for start of next pkt */
@@ -4945,6 +4967,9 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring)
/* reset BQL for queue */
netdev_tx_reset_queue(txring_txq(tx_ring));
+ if (tx_ring->xsk_pool && xsk_frames)
+ xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
+
/* reset next_to_use and next_to_clean */
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
@@ -4975,8 +5000,13 @@ void igb_free_rx_resources(struct igb_ring *rx_ring)
rx_ring->xdp_prog = NULL;
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
- vfree(rx_ring->rx_buffer_info);
- rx_ring->rx_buffer_info = NULL;
+ if (rx_ring->xsk_pool) {
+ vfree(rx_ring->rx_buffer_info_zc);
+ rx_ring->rx_buffer_info_zc = NULL;
+ } else {
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+ }
/* if not set, then don't free */
if (!rx_ring->desc)
@@ -5007,13 +5037,18 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter)
* igb_clean_rx_ring - Free Rx Buffers per Queue
* @rx_ring: ring to free buffers from
**/
-static void igb_clean_rx_ring(struct igb_ring *rx_ring)
+void igb_clean_rx_ring(struct igb_ring *rx_ring)
{
u16 i = rx_ring->next_to_clean;
dev_kfree_skb(rx_ring->skb);
rx_ring->skb = NULL;
+ if (rx_ring->xsk_pool) {
+ igb_clean_rx_ring_zc(rx_ring);
+ goto skip_for_xsk;
+ }
+
/* Free all the Rx ring sk_buffs */
while (i != rx_ring->next_to_alloc) {
struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
@@ -5041,6 +5076,7 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring)
i = 0;
}
+skip_for_xsk:
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
@@ -6467,6 +6503,9 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
return NETDEV_TX_BUSY;
}
+ if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags)))
+ return NETDEV_TX_BUSY;
+
/* record the location of the first descriptor for this packet */
first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
first->type = IGB_TYPE_SKB;
@@ -6622,7 +6661,7 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
struct igb_adapter *adapter = netdev_priv(netdev);
int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD;
- if (adapter->xdp_prog) {
+ if (igb_xdp_is_enabled(adapter)) {
int i;
for (i = 0; i < adapter->num_rx_queues; i++) {
@@ -8195,6 +8234,7 @@ static int igb_poll(struct napi_struct *napi, int budget)
struct igb_q_vector *q_vector = container_of(napi,
struct igb_q_vector,
napi);
+ struct xsk_buff_pool *xsk_pool;
bool clean_complete = true;
int work_done = 0;
@@ -8206,7 +8246,12 @@ static int igb_poll(struct napi_struct *napi, int budget)
clean_complete = igb_clean_tx_irq(q_vector, budget);
if (q_vector->rx.ring) {
- int cleaned = igb_clean_rx_irq(q_vector, budget);
+ int cleaned;
+
+ xsk_pool = READ_ONCE(q_vector->rx.ring->xsk_pool);
+ cleaned = xsk_pool ?
+ igb_clean_rx_irq_zc(q_vector, xsk_pool, budget) :
+ igb_clean_rx_irq(q_vector, budget);
work_done += cleaned;
if (cleaned >= budget)
@@ -8235,13 +8280,18 @@ static int igb_poll(struct napi_struct *napi, int budget)
**/
static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
{
- struct igb_adapter *adapter = q_vector->adapter;
- struct igb_ring *tx_ring = q_vector->tx.ring;
- struct igb_tx_buffer *tx_buffer;
- union e1000_adv_tx_desc *tx_desc;
unsigned int total_bytes = 0, total_packets = 0;
+ struct igb_adapter *adapter = q_vector->adapter;
unsigned int budget = q_vector->tx.work_limit;
+ struct igb_ring *tx_ring = q_vector->tx.ring;
unsigned int i = tx_ring->next_to_clean;
+ union e1000_adv_tx_desc *tx_desc;
+ struct igb_tx_buffer *tx_buffer;
+ struct xsk_buff_pool *xsk_pool;
+ int cpu = smp_processor_id();
+ bool xsk_xmit_done = true;
+ struct netdev_queue *nq;
+ u32 xsk_frames = 0;
if (test_bit(__IGB_DOWN, &adapter->state))
return true;
@@ -8272,10 +8322,14 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
total_packets += tx_buffer->gso_segs;
/* free the skb */
- if (tx_buffer->type == IGB_TYPE_SKB)
+ if (tx_buffer->type == IGB_TYPE_SKB) {
napi_consume_skb(tx_buffer->skb, napi_budget);
- else
+ } else if (tx_buffer->type == IGB_TYPE_XDP) {
xdp_return_frame(tx_buffer->xdpf);
+ } else if (tx_buffer->type == IGB_TYPE_XSK) {
+ xsk_frames++;
+ goto skip_for_xsk;
+ }
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -8307,6 +8361,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
}
}
+skip_for_xsk:
/* move us one more past the eop_desc for start of next pkt */
tx_buffer++;
tx_desc++;
@@ -8335,6 +8390,21 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
q_vector->tx.total_bytes += total_bytes;
q_vector->tx.total_packets += total_packets;
+ xsk_pool = READ_ONCE(tx_ring->xsk_pool);
+ if (xsk_pool) {
+ if (xsk_frames)
+ xsk_tx_completed(xsk_pool, xsk_frames);
+ if (xsk_uses_need_wakeup(xsk_pool))
+ xsk_set_tx_need_wakeup(xsk_pool);
+
+ nq = txring_txq(tx_ring);
+ __netif_tx_lock(nq, cpu);
+ /* Avoid transmit queue timeout since we share it with the slow path */
+ txq_trans_cond_update(nq);
+ xsk_xmit_done = igb_xmit_zc(tx_ring, xsk_pool);
+ __netif_tx_unlock(nq);
+ }
+
if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
struct e1000_hw *hw = &adapter->hw;
@@ -8397,7 +8467,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
}
}
- return !!budget;
+ return !!budget && xsk_xmit_done;
}
/**
@@ -8588,9 +8658,8 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
return skb;
}
-static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
- struct igb_ring *rx_ring,
- struct xdp_buff *xdp)
+static int igb_run_xdp(struct igb_adapter *adapter, struct igb_ring *rx_ring,
+ struct xdp_buff *xdp)
{
int err, result = IGB_XDP_PASS;
struct bpf_prog *xdp_prog;
@@ -8630,7 +8699,7 @@ out_failure:
break;
}
xdp_out:
- return ERR_PTR(-result);
+ return result;
}
static unsigned int igb_rx_frame_truesize(struct igb_ring *rx_ring,
@@ -8756,10 +8825,6 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring,
union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
- /* XDP packets use error pointer so abort at this point */
- if (IS_ERR(skb))
- return true;
-
if (unlikely((igb_test_staterr(rx_desc,
E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
struct net_device *netdev = rx_ring->netdev;
@@ -8786,9 +8851,9 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring,
* order to populate the hash, checksum, VLAN, timestamp, protocol, and
* other fields within the skb.
**/
-static void igb_process_skb_fields(struct igb_ring *rx_ring,
- union e1000_adv_rx_desc *rx_desc,
- struct sk_buff *skb)
+void igb_process_skb_fields(struct igb_ring *rx_ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
{
struct net_device *dev = rx_ring->netdev;
@@ -8870,6 +8935,38 @@ static void igb_put_rx_buffer(struct igb_ring *rx_ring,
rx_buffer->page = NULL;
}
+void igb_finalize_xdp(struct igb_adapter *adapter, unsigned int status)
+{
+ int cpu = smp_processor_id();
+ struct netdev_queue *nq;
+
+ if (status & IGB_XDP_REDIR)
+ xdp_do_flush();
+
+ if (status & IGB_XDP_TX) {
+ struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
+
+ nq = txring_txq(tx_ring);
+ __netif_tx_lock(nq, cpu);
+ igb_xdp_ring_update_tail(tx_ring);
+ __netif_tx_unlock(nq);
+ }
+}
+
+void igb_update_rx_stats(struct igb_q_vector *q_vector, unsigned int packets,
+ unsigned int bytes)
+{
+ struct igb_ring *ring = q_vector->rx.ring;
+
+ u64_stats_update_begin(&ring->rx_syncp);
+ ring->rx_stats.packets += packets;
+ ring->rx_stats.bytes += bytes;
+ u64_stats_update_end(&ring->rx_syncp);
+
+ q_vector->rx.total_packets += packets;
+ q_vector->rx.total_bytes += bytes;
+}
+
static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
{
unsigned int total_bytes = 0, total_packets = 0;
@@ -8877,12 +8974,11 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
struct igb_ring *rx_ring = q_vector->rx.ring;
u16 cleaned_count = igb_desc_unused(rx_ring);
struct sk_buff *skb = rx_ring->skb;
- int cpu = smp_processor_id();
unsigned int xdp_xmit = 0;
- struct netdev_queue *nq;
struct xdp_buff xdp;
u32 frame_sz = 0;
int rx_buf_pgcnt;
+ int xdp_res = 0;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
@@ -8940,12 +9036,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size);
#endif
- skb = igb_run_xdp(adapter, rx_ring, &xdp);
+ xdp_res = igb_run_xdp(adapter, rx_ring, &xdp);
}
- if (IS_ERR(skb)) {
- unsigned int xdp_res = -PTR_ERR(skb);
-
+ if (xdp_res) {
if (xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR)) {
xdp_xmit |= xdp_res;
igb_rx_buffer_flip(rx_ring, rx_buffer, size);
@@ -8964,7 +9058,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
&xdp, timestamp);
/* exit if we failed to retrieve a buffer */
- if (!skb) {
+ if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_failed++;
rx_buffer->pagecnt_bias++;
break;
@@ -8978,7 +9072,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
continue;
/* verify the packet layout is correct */
- if (igb_cleanup_headers(rx_ring, rx_desc, skb)) {
+ if (xdp_res || igb_cleanup_headers(rx_ring, rx_desc, skb)) {
skb = NULL;
continue;
}
@@ -9001,24 +9095,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
/* place incomplete frames back on ring for completion */
rx_ring->skb = skb;
- if (xdp_xmit & IGB_XDP_REDIR)
- xdp_do_flush();
-
- if (xdp_xmit & IGB_XDP_TX) {
- struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
-
- nq = txring_txq(tx_ring);
- __netif_tx_lock(nq, cpu);
- igb_xdp_ring_update_tail(tx_ring);
- __netif_tx_unlock(nq);
- }
+ if (xdp_xmit)
+ igb_finalize_xdp(adapter, xdp_xmit);
- u64_stats_update_begin(&rx_ring->rx_syncp);
- rx_ring->rx_stats.packets += total_packets;
- rx_ring->rx_stats.bytes += total_bytes;
- u64_stats_update_end(&rx_ring->rx_syncp);
- q_vector->rx.total_packets += total_packets;
- q_vector->rx.total_bytes += total_bytes;
+ igb_update_rx_stats(q_vector, total_packets, total_bytes);
if (cleaned_count)
igb_alloc_rx_buffers(rx_ring, cleaned_count);
diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c
new file mode 100644
index 000000000000..157d43787fa0
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
@@ -0,0 +1,562 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp.h>
+
+#include "e1000_hw.h"
+#include "igb.h"
+
+static int igb_realloc_rx_buffer_info(struct igb_ring *ring, bool pool_present)
+{
+ int size = pool_present ?
+ sizeof(*ring->rx_buffer_info_zc) * ring->count :
+ sizeof(*ring->rx_buffer_info) * ring->count;
+ void *buff_info = vmalloc(size);
+
+ if (!buff_info)
+ return -ENOMEM;
+
+ if (pool_present) {
+ vfree(ring->rx_buffer_info);
+ ring->rx_buffer_info = NULL;
+ ring->rx_buffer_info_zc = buff_info;
+ } else {
+ vfree(ring->rx_buffer_info_zc);
+ ring->rx_buffer_info_zc = NULL;
+ ring->rx_buffer_info = buff_info;
+ }
+
+ return 0;
+}
+
+static void igb_txrx_ring_disable(struct igb_adapter *adapter, u16 qid)
+{
+ struct igb_ring *tx_ring = adapter->tx_ring[qid];
+ struct igb_ring *rx_ring = adapter->rx_ring[qid];
+ struct e1000_hw *hw = &adapter->hw;
+
+ set_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
+
+ wr32(E1000_TXDCTL(tx_ring->reg_idx), 0);
+ wr32(E1000_RXDCTL(rx_ring->reg_idx), 0);
+
+ synchronize_net();
+
+ /* Rx/Tx share the same napi context. */
+ napi_disable(&rx_ring->q_vector->napi);
+
+ igb_clean_tx_ring(tx_ring);
+ igb_clean_rx_ring(rx_ring);
+
+ memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats));
+ memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats));
+}
+
+static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid)
+{
+ struct igb_ring *tx_ring = adapter->tx_ring[qid];
+ struct igb_ring *rx_ring = adapter->rx_ring[qid];
+
+ igb_configure_tx_ring(adapter, tx_ring);
+ igb_configure_rx_ring(adapter, rx_ring);
+
+ synchronize_net();
+
+ clear_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
+
+ /* call igb_desc_unused which always leaves
+ * at least 1 descriptor unused to make sure
+ * next_to_use != next_to_clean
+ */
+ if (rx_ring->xsk_pool)
+ igb_alloc_rx_buffers_zc(rx_ring, rx_ring->xsk_pool,
+ igb_desc_unused(rx_ring));
+ else
+ igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
+
+ /* Rx/Tx share the same napi context. */
+ napi_enable(&rx_ring->q_vector->napi);
+}
+
+struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
+ struct igb_ring *ring)
+{
+ int qid = ring->queue_index;
+ struct xsk_buff_pool *pool;
+
+ pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+
+ if (!igb_xdp_is_enabled(adapter))
+ return NULL;
+
+ return (pool && pool->dev) ? pool : NULL;
+}
+
+static int igb_xsk_pool_enable(struct igb_adapter *adapter,
+ struct xsk_buff_pool *pool,
+ u16 qid)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct igb_ring *rx_ring;
+ bool if_running;
+ int err;
+
+ if (qid >= adapter->num_rx_queues)
+ return -EINVAL;
+
+ if (qid >= netdev->real_num_rx_queues ||
+ qid >= netdev->real_num_tx_queues)
+ return -EINVAL;
+
+ err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IGB_RX_DMA_ATTR);
+ if (err)
+ return err;
+
+ rx_ring = adapter->rx_ring[qid];
+ if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
+ if (if_running)
+ igb_txrx_ring_disable(adapter, qid);
+
+ if (if_running) {
+ err = igb_realloc_rx_buffer_info(rx_ring, true);
+ if (!err) {
+ igb_txrx_ring_enable(adapter, qid);
+ /* Kick start the NAPI context so that receiving will start */
+ err = igb_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
+ }
+
+ if (err) {
+ xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int igb_xsk_pool_disable(struct igb_adapter *adapter, u16 qid)
+{
+ struct xsk_buff_pool *pool;
+ struct igb_ring *rx_ring;
+ bool if_running;
+ int err;
+
+ pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+ if (!pool)
+ return -EINVAL;
+
+ rx_ring = adapter->rx_ring[qid];
+ if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
+ if (if_running)
+ igb_txrx_ring_disable(adapter, qid);
+
+ xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
+
+ if (if_running) {
+ err = igb_realloc_rx_buffer_info(rx_ring, false);
+ if (err)
+ return err;
+
+ igb_txrx_ring_enable(adapter, qid);
+ }
+
+ return 0;
+}
+
+int igb_xsk_pool_setup(struct igb_adapter *adapter,
+ struct xsk_buff_pool *pool,
+ u16 qid)
+{
+ return pool ? igb_xsk_pool_enable(adapter, pool, qid) :
+ igb_xsk_pool_disable(adapter, qid);
+}
+
+static u16 igb_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+ union e1000_adv_rx_desc *rx_desc, u16 count)
+{
+ dma_addr_t dma;
+ u16 buffs;
+ int i;
+
+ /* nothing to do */
+ if (!count)
+ return 0;
+
+ buffs = xsk_buff_alloc_batch(pool, xdp, count);
+ for (i = 0; i < buffs; i++) {
+ dma = xsk_buff_xdp_get_dma(*xdp);
+ rx_desc->read.pkt_addr = cpu_to_le64(dma);
+ rx_desc->wb.upper.length = 0;
+
+ rx_desc++;
+ xdp++;
+ }
+
+ return buffs;
+}
+
+bool igb_alloc_rx_buffers_zc(struct igb_ring *rx_ring,
+ struct xsk_buff_pool *xsk_pool, u16 count)
+{
+ u32 nb_buffs_extra = 0, nb_buffs = 0;
+ union e1000_adv_rx_desc *rx_desc;
+ u16 ntu = rx_ring->next_to_use;
+ u16 total_count = count;
+ struct xdp_buff **xdp;
+
+ rx_desc = IGB_RX_DESC(rx_ring, ntu);
+ xdp = &rx_ring->rx_buffer_info_zc[ntu];
+
+ if (ntu + count >= rx_ring->count) {
+ nb_buffs_extra = igb_fill_rx_descs(xsk_pool, xdp, rx_desc,
+ rx_ring->count - ntu);
+ if (nb_buffs_extra != rx_ring->count - ntu) {
+ ntu += nb_buffs_extra;
+ goto exit;
+ }
+ rx_desc = IGB_RX_DESC(rx_ring, 0);
+ xdp = rx_ring->rx_buffer_info_zc;
+ ntu = 0;
+ count -= nb_buffs_extra;
+ }
+
+ nb_buffs = igb_fill_rx_descs(xsk_pool, xdp, rx_desc, count);
+ ntu += nb_buffs;
+ if (ntu == rx_ring->count)
+ ntu = 0;
+
+ /* clear the length for the next_to_use descriptor */
+ rx_desc = IGB_RX_DESC(rx_ring, ntu);
+ rx_desc->wb.upper.length = 0;
+
+exit:
+ if (rx_ring->next_to_use != ntu) {
+ rx_ring->next_to_use = ntu;
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(ntu, rx_ring->tail);
+ }
+
+ return total_count == (nb_buffs + nb_buffs_extra);
+}
+
+void igb_clean_rx_ring_zc(struct igb_ring *rx_ring)
+{
+ u16 ntc = rx_ring->next_to_clean;
+ u16 ntu = rx_ring->next_to_use;
+
+ while (ntc != ntu) {
+ struct xdp_buff *xdp = rx_ring->rx_buffer_info_zc[ntc];
+
+ xsk_buff_free(xdp);
+ ntc++;
+ if (ntc >= rx_ring->count)
+ ntc = 0;
+ }
+}
+
+static struct sk_buff *igb_construct_skb_zc(struct igb_ring *rx_ring,
+ struct xdp_buff *xdp,
+ ktime_t timestamp)
+{
+ unsigned int totalsize = xdp->data_end - xdp->data_meta;
+ unsigned int metasize = xdp->data - xdp->data_meta;
+ struct sk_buff *skb;
+
+ net_prefetch(xdp->data_meta);
+
+ /* allocate a skb to store the frags */
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize);
+ if (unlikely(!skb))
+ return NULL;
+
+ if (timestamp)
+ skb_hwtstamps(skb)->hwtstamp = timestamp;
+
+ memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+ ALIGN(totalsize, sizeof(long)));
+
+ if (metasize) {
+ skb_metadata_set(skb, metasize);
+ __skb_pull(skb, metasize);
+ }
+
+ return skb;
+}
+
+static int igb_run_xdp_zc(struct igb_adapter *adapter, struct igb_ring *rx_ring,
+ struct xdp_buff *xdp, struct xsk_buff_pool *xsk_pool,
+ struct bpf_prog *xdp_prog)
+{
+ int err, result = IGB_XDP_PASS;
+ u32 act;
+
+ prefetchw(xdp->data_hard_start); /* xdp_frame write */
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+
+ if (likely(act == XDP_REDIRECT)) {
+ err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
+ if (!err)
+ return IGB_XDP_REDIR;
+
+ if (xsk_uses_need_wakeup(xsk_pool) &&
+ err == -ENOBUFS)
+ result = IGB_XDP_EXIT;
+ else
+ result = IGB_XDP_CONSUMED;
+ goto out_failure;
+ }
+
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ result = igb_xdp_xmit_back(adapter, xdp);
+ if (result == IGB_XDP_CONSUMED)
+ goto out_failure;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(adapter->netdev, xdp_prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+out_failure:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ fallthrough;
+ case XDP_DROP:
+ result = IGB_XDP_CONSUMED;
+ break;
+ }
+
+ return result;
+}
+
+int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector,
+ struct xsk_buff_pool *xsk_pool, const int budget)
+{
+ struct igb_adapter *adapter = q_vector->adapter;
+ unsigned int total_bytes = 0, total_packets = 0;
+ struct igb_ring *rx_ring = q_vector->rx.ring;
+ u32 ntc = rx_ring->next_to_clean;
+ struct bpf_prog *xdp_prog;
+ unsigned int xdp_xmit = 0;
+ bool failure = false;
+ u16 entries_to_alloc;
+ struct sk_buff *skb;
+
+ /* xdp_prog cannot be NULL in the ZC path */
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+ while (likely(total_packets < budget)) {
+ union e1000_adv_rx_desc *rx_desc;
+ ktime_t timestamp = 0;
+ struct xdp_buff *xdp;
+ unsigned int size;
+ int xdp_res = 0;
+
+ rx_desc = IGB_RX_DESC(rx_ring, ntc);
+ size = le16_to_cpu(rx_desc->wb.upper.length);
+ if (!size)
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we know the
+ * descriptor has been written back
+ */
+ dma_rmb();
+
+ xdp = rx_ring->rx_buffer_info_zc[ntc];
+ xsk_buff_set_size(xdp, size);
+ xsk_buff_dma_sync_for_cpu(xdp);
+
+ /* pull rx packet timestamp if available and valid */
+ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+ int ts_hdr_len;
+
+ ts_hdr_len = igb_ptp_rx_pktstamp(rx_ring->q_vector,
+ xdp->data,
+ &timestamp);
+
+ xdp->data += ts_hdr_len;
+ xdp->data_meta += ts_hdr_len;
+ size -= ts_hdr_len;
+ }
+
+ xdp_res = igb_run_xdp_zc(adapter, rx_ring, xdp, xsk_pool,
+ xdp_prog);
+
+ if (xdp_res) {
+ if (likely(xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR))) {
+ xdp_xmit |= xdp_res;
+ } else if (xdp_res == IGB_XDP_EXIT) {
+ failure = true;
+ break;
+ } else if (xdp_res == IGB_XDP_CONSUMED) {
+ xsk_buff_free(xdp);
+ }
+
+ total_packets++;
+ total_bytes += size;
+ ntc++;
+ if (ntc == rx_ring->count)
+ ntc = 0;
+ continue;
+ }
+
+ skb = igb_construct_skb_zc(rx_ring, xdp, timestamp);
+
+ /* exit if we failed to retrieve a buffer */
+ if (!skb) {
+ rx_ring->rx_stats.alloc_failed++;
+ break;
+ }
+
+ xsk_buff_free(xdp);
+ ntc++;
+ if (ntc == rx_ring->count)
+ ntc = 0;
+
+ if (eth_skb_pad(skb))
+ continue;
+
+ /* probably a little skewed due to removing CRC */
+ total_bytes += skb->len;
+
+ /* populate checksum, timestamp, VLAN, and protocol */
+ igb_process_skb_fields(rx_ring, rx_desc, skb);
+
+ napi_gro_receive(&q_vector->napi, skb);
+
+ /* update budget accounting */
+ total_packets++;
+ }
+
+ rx_ring->next_to_clean = ntc;
+
+ if (xdp_xmit)
+ igb_finalize_xdp(adapter, xdp_xmit);
+
+ igb_update_rx_stats(q_vector, total_packets, total_bytes);
+
+ entries_to_alloc = igb_desc_unused(rx_ring);
+ if (entries_to_alloc >= IGB_RX_BUFFER_WRITE)
+ failure |= !igb_alloc_rx_buffers_zc(rx_ring, xsk_pool,
+ entries_to_alloc);
+
+ if (xsk_uses_need_wakeup(xsk_pool)) {
+ if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+ xsk_set_rx_need_wakeup(xsk_pool);
+ else
+ xsk_clear_rx_need_wakeup(xsk_pool);
+
+ return (int)total_packets;
+ }
+ return failure ? budget : (int)total_packets;
+}
+
+bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool)
+{
+ unsigned int budget = igb_desc_unused(tx_ring);
+ u32 cmd_type, olinfo_status, nb_pkts, i = 0;
+ struct xdp_desc *descs = xsk_pool->tx_descs;
+ union e1000_adv_tx_desc *tx_desc = NULL;
+ struct igb_tx_buffer *tx_buffer_info;
+ unsigned int total_bytes = 0;
+ dma_addr_t dma;
+
+ if (!netif_carrier_ok(tx_ring->netdev))
+ return true;
+
+ if (test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags))
+ return true;
+
+ nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget);
+ if (!nb_pkts)
+ return true;
+
+ while (nb_pkts-- > 0) {
+ dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr);
+ xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len);
+
+ tx_buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+ tx_buffer_info->bytecount = descs[i].len;
+ tx_buffer_info->type = IGB_TYPE_XSK;
+ tx_buffer_info->xdpf = NULL;
+ tx_buffer_info->gso_segs = 1;
+ tx_buffer_info->time_stamp = jiffies;
+
+ tx_desc = IGB_TX_DESC(tx_ring, tx_ring->next_to_use);
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+ /* put descriptor type bits */
+ cmd_type = E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_DEXT |
+ E1000_ADVTXD_DCMD_IFCS;
+ olinfo_status = descs[i].len << E1000_ADVTXD_PAYLEN_SHIFT;
+
+ /* FIXME: This sets the Report Status (RS) bit for every
+ * descriptor. One nice to have optimization would be to set it
+ * only for the last descriptor in the whole batch. See Intel
+ * ice driver for an example on how to do it.
+ */
+ cmd_type |= descs[i].len | IGB_TXD_DCMD;
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+ tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+
+ total_bytes += descs[i].len;
+
+ i++;
+ tx_ring->next_to_use++;
+ tx_buffer_info->next_to_watch = tx_desc;
+ if (tx_ring->next_to_use == tx_ring->count)
+ tx_ring->next_to_use = 0;
+ }
+
+ netdev_tx_sent_queue(txring_txq(tx_ring), total_bytes);
+ igb_xdp_ring_update_tail(tx_ring);
+
+ return nb_pkts < budget;
+}
+
+int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+ struct e1000_hw *hw = &adapter->hw;
+ struct igb_ring *ring;
+ u32 eics = 0;
+
+ if (test_bit(__IGB_DOWN, &adapter->state))
+ return -ENETDOWN;
+
+ if (!igb_xdp_is_enabled(adapter))
+ return -EINVAL;
+
+ if (qid >= adapter->num_tx_queues)
+ return -EINVAL;
+
+ ring = adapter->tx_ring[qid];
+
+ if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
+ return -ENETDOWN;
+
+ if (!READ_ONCE(ring->xsk_pool))
+ return -EINVAL;
+
+ if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
+ /* Cause software interrupt */
+ if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+ eics |= ring->q_vector->eims_value;
+ wr32(E1000_EICS, eics);
+ } else {
+ wr32(E1000_ICS, E1000_ICS_RXDMT0);
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index eac0f966e0e4..b8111ad9a9a8 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -337,6 +337,8 @@ struct igc_adapter {
struct igc_led_classdev *leds;
};
+void igc_set_queue_napi(struct igc_adapter *adapter, int q_idx,
+ struct napi_struct *napi);
void igc_up(struct igc_adapter *adapter);
void igc_down(struct igc_adapter *adapter);
int igc_open(struct net_device *netdev);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index f58cd6940434..56a35d58e7a6 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2123,10 +2123,6 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring,
union igc_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
- /* XDP packets use error pointer so abort at this point */
- if (IS_ERR(skb))
- return true;
-
if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
struct net_device *netdev = rx_ring->netdev;
@@ -2515,8 +2511,7 @@ out_failure:
}
}
-static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
- struct xdp_buff *xdp)
+static int igc_xdp_run_prog(struct igc_adapter *adapter, struct xdp_buff *xdp)
{
struct bpf_prog *prog;
int res;
@@ -2530,7 +2525,7 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
res = __igc_xdp_run_prog(adapter, prog, xdp);
out:
- return ERR_PTR(-res);
+ return res;
}
/* This function assumes __netif_tx_lock is held by the caller. */
@@ -2585,6 +2580,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
struct sk_buff *skb = rx_ring->skb;
u16 cleaned_count = igc_desc_unused(rx_ring);
int xdp_status = 0, rx_buffer_pgcnt;
+ int xdp_res = 0;
while (likely(total_packets < budget)) {
struct igc_xdp_buff ctx = { .rx_ts = NULL };
@@ -2630,12 +2626,10 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
xdp_buff_clear_frags_flag(&ctx.xdp);
ctx.rx_desc = rx_desc;
- skb = igc_xdp_run_prog(adapter, &ctx.xdp);
+ xdp_res = igc_xdp_run_prog(adapter, &ctx.xdp);
}
- if (IS_ERR(skb)) {
- unsigned int xdp_res = -PTR_ERR(skb);
-
+ if (xdp_res) {
switch (xdp_res) {
case IGC_XDP_CONSUMED:
rx_buffer->pagecnt_bias++;
@@ -2657,7 +2651,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
skb = igc_construct_skb(rx_ring, rx_buffer, &ctx);
/* exit if we failed to retrieve a buffer */
- if (!skb) {
+ if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_failed++;
rx_buffer->pagecnt_bias++;
set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags);
@@ -2672,7 +2666,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
continue;
/* verify the packet layout is correct */
- if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
+ if (xdp_res || igc_cleanup_headers(rx_ring, rx_desc, skb)) {
skb = NULL;
continue;
}
@@ -4948,6 +4942,22 @@ static int igc_sw_init(struct igc_adapter *adapter)
return 0;
}
+void igc_set_queue_napi(struct igc_adapter *adapter, int vector,
+ struct napi_struct *napi)
+{
+ struct igc_q_vector *q_vector = adapter->q_vector[vector];
+
+ if (q_vector->rx.ring)
+ netif_queue_set_napi(adapter->netdev,
+ q_vector->rx.ring->queue_index,
+ NETDEV_QUEUE_TYPE_RX, napi);
+
+ if (q_vector->tx.ring)
+ netif_queue_set_napi(adapter->netdev,
+ q_vector->tx.ring->queue_index,
+ NETDEV_QUEUE_TYPE_TX, napi);
+}
+
/**
* igc_up - Open the interface and prepare it to handle traffic
* @adapter: board private structure
@@ -4955,6 +4965,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
void igc_up(struct igc_adapter *adapter)
{
struct igc_hw *hw = &adapter->hw;
+ struct napi_struct *napi;
int i = 0;
/* hardware has been reset, we need to reload some things */
@@ -4962,8 +4973,11 @@ void igc_up(struct igc_adapter *adapter)
clear_bit(__IGC_DOWN, &adapter->state);
- for (i = 0; i < adapter->num_q_vectors; i++)
- napi_enable(&adapter->q_vector[i]->napi);
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ napi = &adapter->q_vector[i]->napi;
+ napi_enable(napi);
+ igc_set_queue_napi(adapter, i, napi);
+ }
if (adapter->msix_entries)
igc_configure_msix(adapter);
@@ -5192,6 +5206,7 @@ void igc_down(struct igc_adapter *adapter)
for (i = 0; i < adapter->num_q_vectors; i++) {
if (adapter->q_vector[i]) {
napi_synchronize(&adapter->q_vector[i]->napi);
+ igc_set_queue_napi(adapter, i, NULL);
napi_disable(&adapter->q_vector[i]->napi);
}
}
@@ -5576,6 +5591,9 @@ static int igc_request_msix(struct igc_adapter *adapter)
q_vector);
if (err)
goto err_free;
+
+ netif_napi_set_irq(&q_vector->napi,
+ adapter->msix_entries[vector].vector);
}
igc_configure_msix(adapter);
@@ -6018,6 +6036,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
struct igc_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = adapter->pdev;
struct igc_hw *hw = &adapter->hw;
+ struct napi_struct *napi;
int err = 0;
int i = 0;
@@ -6053,8 +6072,11 @@ static int __igc_open(struct net_device *netdev, bool resuming)
clear_bit(__IGC_DOWN, &adapter->state);
- for (i = 0; i < adapter->num_q_vectors; i++)
- napi_enable(&adapter->q_vector[i]->napi);
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ napi = &adapter->q_vector[i]->napi;
+ napi_enable(napi);
+ igc_set_queue_napi(adapter, i, napi);
+ }
/* Clear any pending interrupts. */
rd32(IGC_ICR);
@@ -7299,7 +7321,7 @@ static void igc_deliver_wake_packet(struct net_device *netdev)
netif_rx(skb);
}
-static int igc_resume(struct device *dev)
+static int __igc_resume(struct device *dev, bool rpm)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct net_device *netdev = pci_get_drvdata(pdev);
@@ -7342,7 +7364,11 @@ static int igc_resume(struct device *dev)
wr32(IGC_WUS, ~0);
if (netif_running(netdev)) {
+ if (!rpm)
+ rtnl_lock();
err = __igc_open(netdev, true);
+ if (!rpm)
+ rtnl_unlock();
if (!err)
netif_device_attach(netdev);
}
@@ -7350,9 +7376,14 @@ static int igc_resume(struct device *dev)
return err;
}
+static int igc_resume(struct device *dev)
+{
+ return __igc_resume(dev, false);
+}
+
static int igc_runtime_resume(struct device *dev)
{
- return igc_resume(dev);
+ return __igc_resume(dev, true);
}
static int igc_suspend(struct device *dev)
@@ -7397,14 +7428,18 @@ static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
struct net_device *netdev = pci_get_drvdata(pdev);
struct igc_adapter *adapter = netdev_priv(netdev);
+ rtnl_lock();
netif_device_detach(netdev);
- if (state == pci_channel_io_perm_failure)
+ if (state == pci_channel_io_perm_failure) {
+ rtnl_unlock();
return PCI_ERS_RESULT_DISCONNECT;
+ }
if (netif_running(netdev))
igc_down(adapter);
pci_disable_device(pdev);
+ rtnl_unlock();
/* Request a slot reset. */
return PCI_ERS_RESULT_NEED_RESET;
@@ -7415,7 +7450,7 @@ static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
* @pdev: Pointer to PCI device
*
* Restart the card from scratch, as if from a cold-boot. Implementation
- * resembles the first-half of the igc_resume routine.
+ * resembles the first-half of the __igc_resume routine.
**/
static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
{
@@ -7454,7 +7489,7 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
*
* This callback is called when the error recovery driver tells us that
* its OK to resume normal operation. Implementation resembles the
- * second-half of the igc_resume routine.
+ * second-half of the __igc_resume routine.
*/
static void igc_io_resume(struct pci_dev *pdev)
{
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
index e27af72aada8..13bbd3346e01 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.c
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
@@ -13,6 +13,7 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
struct net_device *dev = adapter->netdev;
bool if_running = netif_running(dev);
struct bpf_prog *old_prog;
+ bool need_update;
if (dev->mtu > ETH_DATA_LEN) {
/* For now, the driver doesn't support XDP functionality with
@@ -22,7 +23,8 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
return -EOPNOTSUPP;
}
- if (if_running)
+ need_update = !!adapter->xdp_prog != !!prog;
+ if (if_running && need_update)
igc_close(dev);
old_prog = xchg(&adapter->xdp_prog, prog);
@@ -34,7 +36,7 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
else
xdp_features_clear_redirect_target(dev);
- if (if_running)
+ if (if_running && need_update)
igc_open(dev);
return 0;
@@ -84,6 +86,7 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
napi_disable(napi);
}
+ igc_set_queue_napi(adapter, queue_id, NULL);
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
@@ -133,6 +136,7 @@ static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
+ igc_set_queue_napi(adapter, queue_id, napi);
if (needs_reset) {
napi_enable(napi);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 336e08d35f97..7236f20c9a30 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1923,10 +1923,6 @@ bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
{
struct net_device *netdev = rx_ring->netdev;
- /* XDP packets use error pointer so abort at this point */
- if (IS_ERR(skb))
- return true;
-
/* Verify netdev is present, and that packet does not have any
* errors that would be unacceptable to the netdev.
*/
@@ -2234,9 +2230,9 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
return skb;
}
-static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
- struct ixgbe_ring *rx_ring,
- struct xdp_buff *xdp)
+static int ixgbe_run_xdp(struct ixgbe_adapter *adapter,
+ struct ixgbe_ring *rx_ring,
+ struct xdp_buff *xdp)
{
int err, result = IXGBE_XDP_PASS;
struct bpf_prog *xdp_prog;
@@ -2286,7 +2282,7 @@ out_failure:
break;
}
xdp_out:
- return ERR_PTR(-result);
+ return result;
}
static unsigned int ixgbe_rx_frame_truesize(struct ixgbe_ring *rx_ring,
@@ -2344,6 +2340,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
unsigned int offset = rx_ring->rx_offset;
unsigned int xdp_xmit = 0;
struct xdp_buff xdp;
+ int xdp_res = 0;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
@@ -2389,12 +2386,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, size);
#endif
- skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
+ xdp_res = ixgbe_run_xdp(adapter, rx_ring, &xdp);
}
- if (IS_ERR(skb)) {
- unsigned int xdp_res = -PTR_ERR(skb);
-
+ if (xdp_res) {
if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
xdp_xmit |= xdp_res;
ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
@@ -2414,7 +2409,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
}
/* exit if we failed to retrieve a buffer */
- if (!skb) {
+ if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_rx_buff_failed++;
rx_buffer->pagecnt_bias++;
break;
@@ -2428,7 +2423,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
continue;
/* verify the packet layout is correct */
- if (ixgbe_cleanup_headers(rx_ring, rx_desc, skb))
+ if (xdp_res || ixgbe_cleanup_headers(rx_ring, rx_desc, skb))
continue;
/* probably a little skewed due to removing CRC */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 2829bac9af94..6442f115a262 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -737,10 +737,6 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring,
union ixgbe_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
- /* XDP packets use error pointer so abort at this point */
- if (IS_ERR(skb))
- return true;
-
/* verify that the packet does not have any known errors */
if (unlikely(ixgbevf_test_staterr(rx_desc,
IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) {
@@ -1049,9 +1045,9 @@ static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring,
return IXGBEVF_XDP_TX;
}
-static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
- struct ixgbevf_ring *rx_ring,
- struct xdp_buff *xdp)
+static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
+ struct ixgbevf_ring *rx_ring,
+ struct xdp_buff *xdp)
{
int result = IXGBEVF_XDP_PASS;
struct ixgbevf_ring *xdp_ring;
@@ -1085,7 +1081,7 @@ out_failure:
break;
}
xdp_out:
- return ERR_PTR(-result);
+ return result;
}
static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring,
@@ -1127,6 +1123,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
struct sk_buff *skb = rx_ring->skb;
bool xdp_xmit = false;
struct xdp_buff xdp;
+ int xdp_res = 0;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
@@ -1170,11 +1167,11 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size);
#endif
- skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
+ xdp_res = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
}
- if (IS_ERR(skb)) {
- if (PTR_ERR(skb) == -IXGBEVF_XDP_TX) {
+ if (xdp_res) {
+ if (xdp_res == IXGBEVF_XDP_TX) {
xdp_xmit = true;
ixgbevf_rx_buffer_flip(rx_ring, rx_buffer,
size);
@@ -1194,7 +1191,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
}
/* exit if we failed to retrieve a buffer */
- if (!skb) {
+ if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_rx_buff_failed++;
rx_buffer->pagecnt_bias++;
break;
@@ -1208,7 +1205,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
continue;
/* verify the packet layout is correct */
- if (ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) {
+ if (xdp_res || ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) {
skb = NULL;
continue;
}