summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/sfc/ef100_tx.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-04 02:29:08 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-04 02:29:08 +0300
commitf86d1fbbe7858884d6754534a0afbb74fc30bc26 (patch)
treef61796870edefbe77d495e9d719c68af1d14275b /drivers/net/ethernet/sfc/ef100_tx.c
parent526942b8134cc34d25d27f95dfff98b8ce2f6fcd (diff)
parent7c6327c77d509e78bff76f2a4551fcfee851682e (diff)
downloadlinux-f86d1fbbe7858884d6754534a0afbb74fc30bc26.tar.xz
Merge tag 'net-next-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking changes from Paolo Abeni: "Core: - Refactor the forward memory allocation to better cope with memory pressure with many open sockets, moving from a per socket cache to a per-CPU one - Replace rwlocks with RCU for better fairness in ping, raw sockets and IP multicast router. - Network-side support for IO uring zero-copy send. - A few skb drop reason improvements, including codegen the source file with string mapping instead of using macro magic. - Rename reference tracking helpers to a more consistent netdev_* schema. - Adapt u64_stats_t type to address load/store tearing issues. - Refine debug helper usage to reduce the log noise caused by bots. BPF: - Improve socket map performance, avoiding skb cloning on read operation. - Add support for 64 bits enum, to match types exposed by kernel. - Introduce support for sleepable uprobes program. - Introduce support for enum textual representation in libbpf. - New helpers to implement synproxy with eBPF/XDP. - Improve loop performances, inlining indirect calls when possible. - Removed all the deprecated libbpf APIs. - Implement new eBPF-based LSM flavor. - Add type match support, which allow accurate queries to the eBPF used types. - A few TCP congetsion control framework usability improvements. - Add new infrastructure to manipulate CT entries via eBPF programs. - Allow for livepatch (KLP) and BPF trampolines to attach to the same kernel function. Protocols: - Introduce per network namespace lookup tables for unix sockets, increasing scalability and reducing contention. - Preparation work for Wi-Fi 7 Multi-Link Operation (MLO) support. - Add support to forciby close TIME_WAIT TCP sockets via user-space tools. - Significant performance improvement for the TLS 1.3 receive path, both for zero-copy and not-zero-copy. - Support for changing the initial MTPCP subflow priority/backup status - Introduce virtually contingus buffers for sockets over RDMA, to cope better with memory pressure. - Extend CAN ethtool support with timestamping capabilities - Refactor CAN build infrastructure to allow building only the needed features. Driver API: - Remove devlink mutex to allow parallel commands on multiple links. - Add support for pause stats in distributed switch. - Implement devlink helpers to query and flash line cards. - New helper for phy mode to register conversion. New hardware / drivers: - Ethernet DSA driver for the rockchip mt7531 on BPI-R2 Pro. - Ethernet DSA driver for the Renesas RZ/N1 A5PSW switch. - Ethernet DSA driver for the Microchip LAN937x switch. - Ethernet PHY driver for the Aquantia AQR113C EPHY. - CAN driver for the OBD-II ELM327 interface. - CAN driver for RZ/N1 SJA1000 CAN controller. - Bluetooth: Infineon CYW55572 Wi-Fi plus Bluetooth combo device. Drivers: - Intel Ethernet NICs: - i40e: add support for vlan pruning - i40e: add support for XDP framented packets - ice: improved vlan offload support - ice: add support for PPPoE offload - Mellanox Ethernet (mlx5) - refactor packet steering offload for performance and scalability - extend support for TC offload - refactor devlink code to clean-up the locking schema - support stacked vlans for bridge offloads - use TLS objects pool to improve connection rate - Netronome Ethernet NICs (nfp): - extend support for IPv6 fields mangling offload - add support for vepa mode in HW bridge - better support for virtio data path acceleration (VDPA) - enable TSO by default - Microsoft vNIC driver (mana) - add support for XDP redirect - Others Ethernet drivers: - bonding: add per-port priority support - microchip lan743x: extend phy support - Fungible funeth: support UDP segmentation offload and XDP xmit - Solarflare EF100: add support for virtual function representors - MediaTek SoC: add XDP support - Mellanox Ethernet/IB switch (mlxsw): - dropped support for unreleased H/W (XM router). - improved stats accuracy - unified bridge model coversion improving scalability (parts 1-6) - support for PTP in Spectrum-2 asics - Broadcom PHYs - add PTP support for BCM54210E - add support for the BCM53128 internal PHY - Marvell Ethernet switches (prestera): - implement support for multicast forwarding offload - Embedded Ethernet switches: - refactor OcteonTx MAC filter for better scalability - improve TC H/W offload for the Felix driver - refactor the Microchip ksz8 and ksz9477 drivers to share the probe code (parts 1, 2), add support for phylink mac configuration - Other WiFi: - Microchip wilc1000: diable WEP support and enable WPA3 - Atheros ath10k: encapsulation offload support Old code removal: - Neterion vxge ethernet driver: this is untouched since more than 10 years" * tag 'net-next-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1890 commits) doc: sfp-phylink: Fix a broken reference wireguard: selftests: support UML wireguard: allowedips: don't corrupt stack when detecting overflow wireguard: selftests: update config fragments wireguard: ratelimiter: use hrtimer in selftest net/mlx5e: xsk: Discard unaligned XSK frames on striding RQ net: usb: ax88179_178a: Bind only to vendor-specific interface selftests: net: fix IOAM test skip return code net: usb: make USB_RTL8153_ECM non user configurable net: marvell: prestera: remove reduntant code octeontx2-pf: Reduce minimum mtu size to 60 net: devlink: Fix missing mutex_unlock() call net/tls: Remove redundant workqueue flush before destroy net: txgbe: Fix an error handling path in txgbe_probe() net: dsa: Fix spelling mistakes and cleanup code Documentation: devlink: add add devlink-selftests to the table of contents dccp: put dccp_qpolicy_full() and dccp_qpolicy_push() in the same lock net: ionic: fix error check for vlan flags in ionic_set_nic_features() net: ice: fix error NETIF_F_HW_VLAN_CTAG_FILTER check in ice_vsi_sync_fltr() nfp: flower: add support for tunnel offload without key ID ...
Diffstat (limited to 'drivers/net/ethernet/sfc/ef100_tx.c')
-rw-r--r--drivers/net/ethernet/sfc/ef100_tx.c84
1 files changed, 80 insertions, 4 deletions
diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c
index 26ef51d6b542..102ddc7e206a 100644
--- a/drivers/net/ethernet/sfc/ef100_tx.c
+++ b/drivers/net/ethernet/sfc/ef100_tx.c
@@ -254,7 +254,8 @@ static void ef100_make_tso_desc(struct efx_nic *efx,
static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
const struct sk_buff *skb,
- unsigned int segment_count)
+ unsigned int segment_count,
+ struct efx_rep *efv)
{
unsigned int old_write_count = tx_queue->write_count;
unsigned int new_write_count = old_write_count;
@@ -272,6 +273,20 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
else
next_desc_type = ESE_GZ_TX_DESC_TYPE_SEND;
+ if (unlikely(efv)) {
+ /* Create TX override descriptor */
+ write_ptr = new_write_count & tx_queue->ptr_mask;
+ txd = ef100_tx_desc(tx_queue, write_ptr);
+ ++new_write_count;
+
+ tx_queue->packet_write_count = new_write_count;
+ EFX_POPULATE_OWORD_3(*txd,
+ ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_PREFIX,
+ ESF_GZ_TX_PREFIX_EGRESS_MPORT, efv->mport,
+ ESF_GZ_TX_PREFIX_EGRESS_MPORT_EN, 1);
+ nr_descs--;
+ }
+
/* if it's a raw write (such as XDP) then always SEND single frames */
if (!skb)
nr_descs = 1;
@@ -306,6 +321,9 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
/* if it's a raw write (such as XDP) then always SEND */
next_desc_type = skb ? ESE_GZ_TX_DESC_TYPE_SEG :
ESE_GZ_TX_DESC_TYPE_SEND;
+ /* mark as an EFV buffer if applicable */
+ if (unlikely(efv))
+ buffer->flags |= EFX_TX_BUF_EFV;
} while (new_write_count != tx_queue->insert_count);
@@ -324,7 +342,7 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
void ef100_tx_write(struct efx_tx_queue *tx_queue)
{
- ef100_tx_make_descriptors(tx_queue, NULL, 0);
+ ef100_tx_make_descriptors(tx_queue, NULL, 0, NULL);
ef100_tx_push_buffers(tx_queue);
}
@@ -351,6 +369,12 @@ void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event)
*/
int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
{
+ return __ef100_enqueue_skb(tx_queue, skb, NULL);
+}
+
+int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ struct efx_rep *efv)
+{
unsigned int old_insert_count = tx_queue->insert_count;
struct efx_nic *efx = tx_queue->efx;
bool xmit_more = netdev_xmit_more();
@@ -376,16 +400,64 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
return 0;
}
+ if (unlikely(efv)) {
+ struct efx_tx_buffer *buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
+
+ /* Drop representor packets if the queue is stopped.
+ * We currently don't assert backoff to representors so this is
+ * to make sure representor traffic can't starve the main
+ * net device.
+ * And, of course, if there are no TX descriptors left.
+ */
+ if (netif_tx_queue_stopped(tx_queue->core_txq) ||
+ unlikely(efx_tx_buffer_in_use(buffer))) {
+ atomic64_inc(&efv->stats.tx_errors);
+ rc = -ENOSPC;
+ goto err;
+ }
+
+ /* Also drop representor traffic if it could cause us to
+ * stop the queue. If we assert backoff and we haven't
+ * received traffic on the main net device recently then the
+ * TX watchdog can go off erroneously.
+ */
+ fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
+ fill_level += efx_tx_max_skb_descs(efx);
+ if (fill_level > efx->txq_stop_thresh) {
+ struct efx_tx_queue *txq2;
+
+ /* Refresh cached fill level and re-check */
+ efx_for_each_channel_tx_queue(txq2, tx_queue->channel)
+ txq2->old_read_count = READ_ONCE(txq2->read_count);
+
+ fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
+ fill_level += efx_tx_max_skb_descs(efx);
+ if (fill_level > efx->txq_stop_thresh) {
+ atomic64_inc(&efv->stats.tx_errors);
+ rc = -ENOSPC;
+ goto err;
+ }
+ }
+
+ buffer->flags = EFX_TX_BUF_OPTION | EFX_TX_BUF_EFV;
+ tx_queue->insert_count++;
+ }
+
/* Map for DMA and create descriptors */
rc = efx_tx_map_data(tx_queue, skb, segments);
if (rc)
goto err;
- ef100_tx_make_descriptors(tx_queue, skb, segments);
+ ef100_tx_make_descriptors(tx_queue, skb, segments, efv);
fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
if (fill_level > efx->txq_stop_thresh) {
struct efx_tx_queue *txq2;
+ /* Because of checks above, representor traffic should
+ * not be able to stop the queue.
+ */
+ WARN_ON(efv);
+
netif_tx_stop_queue(tx_queue->core_txq);
/* Re-read after a memory barrier in case we've raced with
* the completion path. Otherwise there's a danger we'll never
@@ -404,8 +476,12 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
/* If xmit_more then we don't need to push the doorbell, unless there
* are 256 descriptors already queued in which case we have to push to
* ensure we never push more than 256 at once.
+ *
+ * Always push for representor traffic, and don't account it to parent
+ * PF netdevice's BQL.
*/
- if (__netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
+ if (unlikely(efv) ||
+ __netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
tx_queue->write_count - tx_queue->notify_count > 255)
ef100_tx_push_buffers(tx_queue);