diff options
author | David S. Miller <davem@davemloft.net> | 2022-03-05 14:12:09 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-03-05 14:12:09 +0300 |
commit | 2057b8b70e86d31e9a83c48948e56add2cc5668c (patch) | |
tree | fbbf0f36b2bc8ab8cd7b3c6096e662665566d7c7 | |
parent | c409f9b91f7157b8e3de6ddfd10d5a380a26caf4 (diff) | |
parent | 0b79b8dc97b9df4f873f63161e3050bafc4c4237 (diff) | |
download | linux-2057b8b70e86d31e9a83c48948e56add2cc5668c.tar.xz |
Merge branch 'axienet-napi-gro-support'
Robert Hancock says:
====================
NAPI/GRO support for axienet driver
Add support for NAPI and GRO receive in the Xilinx AXI Ethernet driver,
and some other related cleanups.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/xilinx/xilinx_axienet.h | 18 | ||||
-rw-r--r-- | drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 498 |
2 files changed, 266 insertions, 250 deletions
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 40108968b350..0f9c88dd1a4a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -119,11 +119,11 @@ #define XAXIDMA_IRQ_ERROR_MASK 0x00004000 /* Error interrupt */ #define XAXIDMA_IRQ_ALL_MASK 0x00007000 /* All interrupts */ -/* Default TX/RX Threshold and waitbound values for SGDMA mode */ +/* Default TX/RX Threshold and delay timer values for SGDMA mode */ #define XAXIDMA_DFT_TX_THRESHOLD 24 -#define XAXIDMA_DFT_TX_WAITBOUND 254 -#define XAXIDMA_DFT_RX_THRESHOLD 24 -#define XAXIDMA_DFT_RX_WAITBOUND 254 +#define XAXIDMA_DFT_TX_USEC 50 +#define XAXIDMA_DFT_RX_THRESHOLD 1 +#define XAXIDMA_DFT_RX_USEC 50 #define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */ #define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */ @@ -385,6 +385,7 @@ struct axidma_bd { * @phy_node: Pointer to device node structure * @phylink: Pointer to phylink instance * @phylink_config: phylink configuration settings + * @napi: NAPI control structure * @pcs_phy: Reference to PCS/PMA PHY if used * @pcs: phylink pcs structure for PCS PHY * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core @@ -395,6 +396,7 @@ struct axidma_bd { * @regs_start: Resource start for axienet device addresses * @regs: Base address for the axienet_local device address space * @dma_regs: Base address for the axidma device address space + * @rx_dma_cr: Nominal content of RX DMA control register * @dma_err_task: Work structure to process Axi DMA errors * @tx_irq: Axidma TX IRQ number * @rx_irq: Axidma RX IRQ number @@ -423,7 +425,9 @@ struct axidma_bd { * @csum_offload_on_tx_path: Stores the checksum selection on TX side. * @csum_offload_on_rx_path: Stores the checksum selection on RX side. * @coalesce_count_rx: Store the irq coalesce on RX side. + * @coalesce_usec_rx: IRQ coalesce delay for RX * @coalesce_count_tx: Store the irq coalesce on TX side. + * @coalesce_usec_tx: IRQ coalesce delay for TX */ struct axienet_local { struct net_device *ndev; @@ -434,6 +438,8 @@ struct axienet_local { struct phylink *phylink; struct phylink_config phylink_config; + struct napi_struct napi; + struct mdio_device *pcs_phy; struct phylink_pcs pcs; @@ -449,6 +455,8 @@ struct axienet_local { void __iomem *regs; void __iomem *dma_regs; + u32 rx_dma_cr; + struct work_struct dma_err_task; int tx_irq; @@ -476,7 +484,9 @@ struct axienet_local { int csum_offload_on_rx_path; u32 coalesce_count_rx; + u32 coalesce_usec_rx; u32 coalesce_count_tx; + u32 coalesce_usec_tx; }; /** diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 6eeaab77fbe0..1d8c6e910279 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -7,7 +7,7 @@ * Copyright (c) 2008-2009 Secret Lab Technologies Ltd. * Copyright (c) 2010 - 2011 Michal Simek <monstr@monstr.eu> * Copyright (c) 2010 - 2011 PetaLogix - * Copyright (c) 2019 SED Systems, a division of Calian Ltd. + * Copyright (c) 2019 - 2022 Calian Advanced Technologies * Copyright (c) 2010 - 2012 Xilinx, Inc. All rights reserved. * * This is a driver for the Xilinx Axi Ethernet which is used in the Virtex6 @@ -33,7 +33,7 @@ #include <linux/of_irq.h> #include <linux/of_address.h> #include <linux/skbuff.h> -#include <linux/spinlock.h> +#include <linux/math64.h> #include <linux/phy.h> #include <linux/mii.h> #include <linux/ethtool.h> @@ -190,7 +190,7 @@ static void axienet_dma_bd_release(struct net_device *ndev) struct axienet_local *lp = netdev_priv(ndev); /* If we end up here, tx_bd_v must have been DMA allocated. */ - dma_free_coherent(ndev->dev.parent, + dma_free_coherent(lp->dev, sizeof(*lp->tx_bd_v) * lp->tx_bd_num, lp->tx_bd_v, lp->tx_bd_p); @@ -215,18 +215,90 @@ static void axienet_dma_bd_release(struct net_device *ndev) */ if (lp->rx_bd_v[i].cntrl) { phys = desc_get_phys_addr(lp, &lp->rx_bd_v[i]); - dma_unmap_single(ndev->dev.parent, phys, + dma_unmap_single(lp->dev, phys, lp->max_frm_size, DMA_FROM_DEVICE); } } - dma_free_coherent(ndev->dev.parent, + dma_free_coherent(lp->dev, sizeof(*lp->rx_bd_v) * lp->rx_bd_num, lp->rx_bd_v, lp->rx_bd_p); } /** + * axienet_usec_to_timer - Calculate IRQ delay timer value + * @lp: Pointer to the axienet_local structure + * @coalesce_usec: Microseconds to convert into timer value + */ +static u32 axienet_usec_to_timer(struct axienet_local *lp, u32 coalesce_usec) +{ + u32 result; + u64 clk_rate = 125000000; /* arbitrary guess if no clock rate set */ + + if (lp->axi_clk) + clk_rate = clk_get_rate(lp->axi_clk); + + /* 1 Timeout Interval = 125 * (clock period of SG clock) */ + result = DIV64_U64_ROUND_CLOSEST((u64)coalesce_usec * clk_rate, + (u64)125000000); + if (result > 255) + result = 255; + + return result; +} + +/** + * axienet_dma_start - Set up DMA registers and start DMA operation + * @lp: Pointer to the axienet_local structure + */ +static void axienet_dma_start(struct axienet_local *lp) +{ + u32 tx_cr; + + /* Start updating the Rx channel control register */ + lp->rx_dma_cr = (lp->coalesce_count_rx << XAXIDMA_COALESCE_SHIFT) | + XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK; + /* Only set interrupt delay timer if not generating an interrupt on + * the first RX packet. Otherwise leave at 0 to disable delay interrupt. + */ + if (lp->coalesce_count_rx > 1) + lp->rx_dma_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_rx) + << XAXIDMA_DELAY_SHIFT) | + XAXIDMA_IRQ_DELAY_MASK; + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); + + /* Start updating the Tx channel control register */ + tx_cr = (lp->coalesce_count_tx << XAXIDMA_COALESCE_SHIFT) | + XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK; + /* Only set interrupt delay timer if not generating an interrupt on + * the first TX packet. Otherwise leave at 0 to disable delay interrupt. + */ + if (lp->coalesce_count_tx > 1) + tx_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_tx) + << XAXIDMA_DELAY_SHIFT) | + XAXIDMA_IRQ_DELAY_MASK; + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, tx_cr); + + /* Populate the tail pointer and bring the Rx Axi DMA engine out of + * halted state. This will make the Rx side ready for reception. + */ + axienet_dma_out_addr(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p); + lp->rx_dma_cr |= XAXIDMA_CR_RUNSTOP_MASK; + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); + axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p + + (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1))); + + /* Write to the RS (Run-stop) bit in the Tx channel control register. + * Tx channel is now ready to run. But only after we write to the + * tail pointer register that the Tx channel will start transmitting. + */ + axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); + tx_cr |= XAXIDMA_CR_RUNSTOP_MASK; + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, tx_cr); +} + +/** * axienet_dma_bd_init - Setup buffer descriptor rings for Axi DMA * @ndev: Pointer to the net_device structure * @@ -238,7 +310,6 @@ static void axienet_dma_bd_release(struct net_device *ndev) */ static int axienet_dma_bd_init(struct net_device *ndev) { - u32 cr; int i; struct sk_buff *skb; struct axienet_local *lp = netdev_priv(ndev); @@ -249,13 +320,13 @@ static int axienet_dma_bd_init(struct net_device *ndev) lp->rx_bd_ci = 0; /* Allocate the Tx and Rx buffer descriptors. */ - lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent, + lp->tx_bd_v = dma_alloc_coherent(lp->dev, sizeof(*lp->tx_bd_v) * lp->tx_bd_num, &lp->tx_bd_p, GFP_KERNEL); if (!lp->tx_bd_v) return -ENOMEM; - lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent, + lp->rx_bd_v = dma_alloc_coherent(lp->dev, sizeof(*lp->rx_bd_v) * lp->rx_bd_num, &lp->rx_bd_p, GFP_KERNEL); if (!lp->rx_bd_v) @@ -285,9 +356,9 @@ static int axienet_dma_bd_init(struct net_device *ndev) goto out; lp->rx_bd_v[i].skb = skb; - addr = dma_map_single(ndev->dev.parent, skb->data, + addr = dma_map_single(lp->dev, skb->data, lp->max_frm_size, DMA_FROM_DEVICE); - if (dma_mapping_error(ndev->dev.parent, addr)) { + if (dma_mapping_error(lp->dev, addr)) { netdev_err(ndev, "DMA mapping error\n"); goto out; } @@ -296,50 +367,7 @@ static int axienet_dma_bd_init(struct net_device *ndev) lp->rx_bd_v[i].cntrl = lp->max_frm_size; } - /* Start updating the Rx channel control register */ - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - /* Update the interrupt coalesce count */ - cr = ((cr & ~XAXIDMA_COALESCE_MASK) | - ((lp->coalesce_count_rx) << XAXIDMA_COALESCE_SHIFT)); - /* Update the delay timer count */ - cr = ((cr & ~XAXIDMA_DELAY_MASK) | - (XAXIDMA_DFT_RX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); - /* Enable coalesce, delay timer and error interrupts */ - cr |= XAXIDMA_IRQ_ALL_MASK; - /* Write to the Rx channel control register */ - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); - - /* Start updating the Tx channel control register */ - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - /* Update the interrupt coalesce count */ - cr = (((cr & ~XAXIDMA_COALESCE_MASK)) | - ((lp->coalesce_count_tx) << XAXIDMA_COALESCE_SHIFT)); - /* Update the delay timer count */ - cr = (((cr & ~XAXIDMA_DELAY_MASK)) | - (XAXIDMA_DFT_TX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); - /* Enable coalesce, delay timer and error interrupts */ - cr |= XAXIDMA_IRQ_ALL_MASK; - /* Write to the Tx channel control register */ - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); - - /* Populate the tail pointer and bring the Rx Axi DMA engine out of - * halted state. This will make the Rx side ready for reception. - */ - axienet_dma_out_addr(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p); - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, - cr | XAXIDMA_CR_RUNSTOP_MASK); - axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p + - (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1))); - - /* Write to the RS (Run-stop) bit in the Tx channel control register. - * Tx channel is now ready to run. But only after we write to the - * tail pointer register that the Tx channel will start transmitting. - */ - axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, - cr | XAXIDMA_CR_RUNSTOP_MASK); + axienet_dma_start(lp); return 0; out: @@ -531,6 +559,44 @@ static int __axienet_device_reset(struct axienet_local *lp) } /** + * axienet_dma_stop - Stop DMA operation + * @lp: Pointer to the axienet_local structure + */ +static void axienet_dma_stop(struct axienet_local *lp) +{ + int count; + u32 cr, sr; + + cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); + cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + synchronize_irq(lp->rx_irq); + + cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); + cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + synchronize_irq(lp->tx_irq); + + /* Give DMAs a chance to halt gracefully */ + sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); + for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) { + msleep(20); + sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); + } + + sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); + for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) { + msleep(20); + sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); + } + + /* Do a reset to ensure DMA is really stopped */ + axienet_lock_mii(lp); + __axienet_device_reset(lp); + axienet_unlock_mii(lp); +} + +/** * axienet_device_reset - Reset and initialize the Axi Ethernet hardware. * @ndev: Pointer to the net_device structure * @@ -636,7 +702,7 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, /* Ensure we see complete descriptor update */ dma_rmb(); phys = desc_get_phys_addr(lp, cur_p); - dma_unmap_single(ndev->dev.parent, phys, + dma_unmap_single(lp->dev, phys, (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), DMA_TO_DEVICE); @@ -774,9 +840,9 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) cur_p->app0 |= 2; /* Tx Full Checksum Offload Enabled */ } - phys = dma_map_single(ndev->dev.parent, skb->data, + phys = dma_map_single(lp->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(ndev->dev.parent, phys))) { + if (unlikely(dma_mapping_error(lp->dev, phys))) { if (net_ratelimit()) netdev_err(ndev, "TX DMA mapping error\n"); ndev->stats.tx_dropped++; @@ -790,11 +856,11 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) lp->tx_bd_tail = 0; cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; frag = &skb_shinfo(skb)->frags[ii]; - phys = dma_map_single(ndev->dev.parent, + phys = dma_map_single(lp->dev, skb_frag_address(frag), skb_frag_size(frag), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(ndev->dev.parent, phys))) { + if (unlikely(dma_mapping_error(lp->dev, phys))) { if (net_ratelimit()) netdev_err(ndev, "TX DMA mapping error\n"); ndev->stats.tx_dropped++; @@ -833,79 +899,84 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) } /** - * axienet_recv - Is called from Axi DMA Rx Isr to complete the received - * BD processing. - * @ndev: Pointer to net_device structure. + * axienet_poll - Triggered by RX ISR to complete the received BD processing. + * @napi: Pointer to NAPI structure. + * @budget: Max number of packets to process. * - * This function is invoked from the Axi DMA Rx isr to process the Rx BDs. It - * does minimal processing and invokes "netif_rx" to complete further - * processing. + * Return: Number of RX packets processed. */ -static void axienet_recv(struct net_device *ndev) +static int axienet_poll(struct napi_struct *napi, int budget) { u32 length; u32 csumstatus; u32 size = 0; - u32 packets = 0; + int packets = 0; dma_addr_t tail_p = 0; - struct axienet_local *lp = netdev_priv(ndev); - struct sk_buff *skb, *new_skb; struct axidma_bd *cur_p; + struct sk_buff *skb, *new_skb; + struct axienet_local *lp = container_of(napi, struct axienet_local, napi); cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; - while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) { + while (packets < budget && (cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) { dma_addr_t phys; - tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; - /* Ensure we see complete descriptor update */ dma_rmb(); - phys = desc_get_phys_addr(lp, cur_p); - dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size, - DMA_FROM_DEVICE); skb = cur_p->skb; cur_p->skb = NULL; - length = cur_p->app4 & 0x0000FFFF; - - skb_put(skb, length); - skb->protocol = eth_type_trans(skb, ndev); - /*skb_checksum_none_assert(skb);*/ - skb->ip_summed = CHECKSUM_NONE; - - /* if we're doing Rx csum offload, set it up */ - if (lp->features & XAE_FEATURE_FULL_RX_CSUM) { - csumstatus = (cur_p->app2 & - XAE_FULL_CSUM_STATUS_MASK) >> 3; - if ((csumstatus == XAE_IP_TCP_CSUM_VALIDATED) || - (csumstatus == XAE_IP_UDP_CSUM_VALIDATED)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* skb could be NULL if a previous pass already received the + * packet for this slot in the ring, but failed to refill it + * with a newly allocated buffer. In this case, don't try to + * receive it again. + */ + if (likely(skb)) { + length = cur_p->app4 & 0x0000FFFF; + + phys = desc_get_phys_addr(lp, cur_p); + dma_unmap_single(lp->dev, phys, lp->max_frm_size, + DMA_FROM_DEVICE); + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, lp->ndev); + /*skb_checksum_none_assert(skb);*/ + skb->ip_summed = CHECKSUM_NONE; + + /* if we're doing Rx csum offload, set it up */ + if (lp->features & XAE_FEATURE_FULL_RX_CSUM) { + csumstatus = (cur_p->app2 & + XAE_FULL_CSUM_STATUS_MASK) >> 3; + if (csumstatus == XAE_IP_TCP_CSUM_VALIDATED || + csumstatus == XAE_IP_UDP_CSUM_VALIDATED) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 && + skb->protocol == htons(ETH_P_IP) && + skb->len > 64) { + skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF); + skb->ip_summed = CHECKSUM_COMPLETE; } - } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 && - skb->protocol == htons(ETH_P_IP) && - skb->len > 64) { - skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF); - skb->ip_summed = CHECKSUM_COMPLETE; - } - netif_rx(skb); + napi_gro_receive(napi, skb); - size += length; - packets++; + size += length; + packets++; + } - new_skb = netdev_alloc_skb_ip_align(ndev, lp->max_frm_size); + new_skb = netdev_alloc_skb_ip_align(lp->ndev, lp->max_frm_size); if (!new_skb) - return; + break; - phys = dma_map_single(ndev->dev.parent, new_skb->data, + phys = dma_map_single(lp->dev, new_skb->data, lp->max_frm_size, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(ndev->dev.parent, phys))) { + if (unlikely(dma_mapping_error(lp->dev, phys))) { if (net_ratelimit()) - netdev_err(ndev, "RX DMA mapping error\n"); + netdev_err(lp->ndev, "RX DMA mapping error\n"); dev_kfree_skb(new_skb); - return; + break; } desc_set_phys_addr(lp, phys, cur_p); @@ -913,16 +984,30 @@ static void axienet_recv(struct net_device *ndev) cur_p->status = 0; cur_p->skb = new_skb; + /* Only update tail_p to mark this slot as usable after it has + * been successfully refilled. + */ + tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; + if (++lp->rx_bd_ci >= lp->rx_bd_num) lp->rx_bd_ci = 0; cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; } - ndev->stats.rx_packets += packets; - ndev->stats.rx_bytes += size; + lp->ndev->stats.rx_packets += packets; + lp->ndev->stats.rx_bytes += size; if (tail_p) axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, tail_p); + + if (packets < budget && napi_complete_done(napi, packets)) { + /* Re-enable RX completion interrupts. This should + * cause an immediate interrupt if any RX packets are + * already pending. + */ + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); + } + return packets; } /** @@ -937,41 +1022,27 @@ static void axienet_recv(struct net_device *ndev) */ static irqreturn_t axienet_tx_irq(int irq, void *_ndev) { - u32 cr; unsigned int status; struct net_device *ndev = _ndev; struct axienet_local *lp = netdev_priv(ndev); status = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); - if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { - axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); - axienet_start_xmit_done(lp->ndev); - goto out; - } + if (!(status & XAXIDMA_IRQ_ALL_MASK)) return IRQ_NONE; - if (status & XAXIDMA_IRQ_ERROR_MASK) { - dev_err(&ndev->dev, "DMA Tx error 0x%x\n", status); - dev_err(&ndev->dev, "Current BD is at: 0x%x%08x\n", - (lp->tx_bd_v[lp->tx_bd_ci]).phys_msb, - (lp->tx_bd_v[lp->tx_bd_ci]).phys); - - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - /* Disable coalesce, delay timer and error interrupts */ - cr &= (~XAXIDMA_IRQ_ALL_MASK); - /* Write to the Tx channel control register */ - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); - - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - /* Disable coalesce, delay timer and error interrupts */ - cr &= (~XAXIDMA_IRQ_ALL_MASK); - /* Write to the Rx channel control register */ - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); + + if (unlikely(status & XAXIDMA_IRQ_ERROR_MASK)) { + netdev_err(ndev, "DMA Tx error 0x%x\n", status); + netdev_err(ndev, "Current BD is at: 0x%x%08x\n", + (lp->tx_bd_v[lp->tx_bd_ci]).phys_msb, + (lp->tx_bd_v[lp->tx_bd_ci]).phys); schedule_work(&lp->dma_err_task); - axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); + } else { + axienet_start_xmit_done(lp->ndev); } -out: + return IRQ_HANDLED; } @@ -982,46 +1053,40 @@ out: * * Return: IRQ_HANDLED if device generated a RX interrupt, IRQ_NONE otherwise. * - * This is the Axi DMA Rx Isr. It invokes "axienet_recv" to complete the BD + * This is the Axi DMA Rx Isr. It invokes NAPI polling to complete the RX BD * processing. */ static irqreturn_t axienet_rx_irq(int irq, void *_ndev) { - u32 cr; unsigned int status; struct net_device *ndev = _ndev; struct axienet_local *lp = netdev_priv(ndev); status = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); - if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { - axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); - axienet_recv(lp->ndev); - goto out; - } + if (!(status & XAXIDMA_IRQ_ALL_MASK)) return IRQ_NONE; - if (status & XAXIDMA_IRQ_ERROR_MASK) { - dev_err(&ndev->dev, "DMA Rx error 0x%x\n", status); - dev_err(&ndev->dev, "Current BD is at: 0x%x%08x\n", - (lp->rx_bd_v[lp->rx_bd_ci]).phys_msb, - (lp->rx_bd_v[lp->rx_bd_ci]).phys); - - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - /* Disable coalesce, delay timer and error interrupts */ - cr &= (~XAXIDMA_IRQ_ALL_MASK); - /* Finally write to the Tx channel control register */ - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); - - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - /* Disable coalesce, delay timer and error interrupts */ - cr &= (~XAXIDMA_IRQ_ALL_MASK); - /* write to the Rx channel control register */ - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); + + if (unlikely(status & XAXIDMA_IRQ_ERROR_MASK)) { + netdev_err(ndev, "DMA Rx error 0x%x\n", status); + netdev_err(ndev, "Current BD is at: 0x%x%08x\n", + (lp->rx_bd_v[lp->rx_bd_ci]).phys_msb, + (lp->rx_bd_v[lp->rx_bd_ci]).phys); schedule_work(&lp->dma_err_task); - axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); + } else { + /* Disable further RX completion interrupts and schedule + * NAPI receive. + */ + u32 cr = lp->rx_dma_cr; + + cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + + napi_schedule(&lp->napi); } -out: + return IRQ_HANDLED; } @@ -1095,6 +1160,8 @@ static int axienet_open(struct net_device *ndev) /* Enable worker thread for Axi DMA error handling */ INIT_WORK(&lp->dma_err_task, axienet_dma_err_handler); + napi_enable(&lp->napi); + /* Enable interrupts for Axi DMA Tx */ ret = request_irq(lp->tx_irq, axienet_tx_irq, IRQF_SHARED, ndev->name, ndev); @@ -1120,6 +1187,7 @@ err_eth_irq: err_rx_irq: free_irq(lp->tx_irq, ndev); err_tx_irq: + napi_disable(&lp->napi); phylink_stop(lp->phylink); phylink_disconnect_phy(lp->phylink); cancel_work_sync(&lp->dma_err_task); @@ -1139,46 +1207,22 @@ err_tx_irq: */ static int axienet_stop(struct net_device *ndev) { - u32 cr, sr; - int count; struct axienet_local *lp = netdev_priv(ndev); dev_dbg(&ndev->dev, "axienet_close()\n"); + napi_disable(&lp->napi); + phylink_stop(lp->phylink); phylink_disconnect_phy(lp->phylink); axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); - - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + axienet_dma_stop(lp); axienet_iow(lp, XAE_IE_OFFSET, 0); - /* Give DMAs a chance to halt gracefully */ - sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); - for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) { - msleep(20); - sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); - } - - sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); - for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) { - msleep(20); - sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); - } - - /* Do a reset to ensure DMA is really stopped */ - axienet_lock_mii(lp); - __axienet_device_reset(lp); - axienet_unlock_mii(lp); - cancel_work_sync(&lp->dma_err_task); if (lp->eth_irq > 0) @@ -1449,14 +1493,12 @@ axienet_ethtools_get_coalesce(struct net_device *ndev, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { - u32 regval = 0; struct axienet_local *lp = netdev_priv(ndev); - regval = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - ecoalesce->rx_max_coalesced_frames = (regval & XAXIDMA_COALESCE_MASK) - >> XAXIDMA_COALESCE_SHIFT; - regval = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - ecoalesce->tx_max_coalesced_frames = (regval & XAXIDMA_COALESCE_MASK) - >> XAXIDMA_COALESCE_SHIFT; + + ecoalesce->rx_max_coalesced_frames = lp->coalesce_count_rx; + ecoalesce->rx_coalesce_usecs = lp->coalesce_usec_rx; + ecoalesce->tx_max_coalesced_frames = lp->coalesce_count_tx; + ecoalesce->tx_coalesce_usecs = lp->coalesce_usec_tx; return 0; } @@ -1489,8 +1531,12 @@ axienet_ethtools_set_coalesce(struct net_device *ndev, if (ecoalesce->rx_max_coalesced_frames) lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; + if (ecoalesce->rx_coalesce_usecs) + lp->coalesce_usec_rx = ecoalesce->rx_coalesce_usecs; if (ecoalesce->tx_max_coalesced_frames) lp->coalesce_count_tx = ecoalesce->tx_max_coalesced_frames; + if (ecoalesce->tx_coalesce_usecs) + lp->coalesce_usec_tx = ecoalesce->tx_coalesce_usecs; return 0; } @@ -1521,7 +1567,8 @@ static int axienet_ethtools_nway_reset(struct net_device *dev) } static const struct ethtool_ops axienet_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USECS, .get_drvinfo = axienet_ethtools_get_drvinfo, .get_regs_len = axienet_ethtools_get_regs_len, .get_regs = axienet_ethtools_get_regs, @@ -1678,29 +1725,26 @@ static const struct phylink_mac_ops axienet_phylink_ops = { */ static void axienet_dma_err_handler(struct work_struct *work) { + u32 i; u32 axienet_status; - u32 cr, i; + struct axidma_bd *cur_p; struct axienet_local *lp = container_of(work, struct axienet_local, dma_err_task); struct net_device *ndev = lp->ndev; - struct axidma_bd *cur_p; + + napi_disable(&lp->napi); axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); - /* When we do an Axi Ethernet reset, it resets the complete core - * including the MDIO. MDIO must be disabled before resetting. - * Hold MDIO bus lock to avoid MDIO accesses during the reset. - */ - axienet_lock_mii(lp); - __axienet_device_reset(lp); - axienet_unlock_mii(lp); + + axienet_dma_stop(lp); for (i = 0; i < lp->tx_bd_num; i++) { cur_p = &lp->tx_bd_v[i]; if (cur_p->cntrl) { dma_addr_t addr = desc_get_phys_addr(lp, cur_p); - dma_unmap_single(ndev->dev.parent, addr, + dma_unmap_single(lp->dev, addr, (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), DMA_TO_DEVICE); @@ -1733,50 +1777,7 @@ static void axienet_dma_err_handler(struct work_struct *work) lp->tx_bd_tail = 0; lp->rx_bd_ci = 0; - /* Start updating the Rx channel control register */ - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - /* Update the interrupt coalesce count */ - cr = ((cr & ~XAXIDMA_COALESCE_MASK) | - (XAXIDMA_DFT_RX_THRESHOLD << XAXIDMA_COALESCE_SHIFT)); - /* Update the delay timer count */ - cr = ((cr & ~XAXIDMA_DELAY_MASK) | - (XAXIDMA_DFT_RX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); - /* Enable coalesce, delay timer and error interrupts */ - cr |= XAXIDMA_IRQ_ALL_MASK; - /* Finally write to the Rx channel control register */ - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); - - /* Start updating the Tx channel control register */ - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - /* Update the interrupt coalesce count */ - cr = (((cr & ~XAXIDMA_COALESCE_MASK)) | - (XAXIDMA_DFT_TX_THRESHOLD << XAXIDMA_COALESCE_SHIFT)); - /* Update the delay timer count */ - cr = (((cr & ~XAXIDMA_DELAY_MASK)) | - (XAXIDMA_DFT_TX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); - /* Enable coalesce, delay timer and error interrupts */ - cr |= XAXIDMA_IRQ_ALL_MASK; - /* Finally write to the Tx channel control register */ - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); - - /* Populate the tail pointer and bring the Rx Axi DMA engine out of - * halted state. This will make the Rx side ready for reception. - */ - axienet_dma_out_addr(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p); - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, - cr | XAXIDMA_CR_RUNSTOP_MASK); - axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p + - (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1))); - - /* Write to the RS (Run-stop) bit in the Tx channel control register. - * Tx channel is now ready to run. But only after we write to the - * tail pointer register that the Tx channel will start transmitting - */ - axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, - cr | XAXIDMA_CR_RUNSTOP_MASK); + axienet_dma_start(lp); axienet_status = axienet_ior(lp, XAE_RCW1_OFFSET); axienet_status &= ~XAE_RCW1_RX_MASK; @@ -1797,6 +1798,7 @@ static void axienet_dma_err_handler(struct work_struct *work) axienet_set_mac_address(ndev, NULL); axienet_set_multicast_list(ndev); axienet_setoptions(ndev, lp->options); + napi_enable(&lp->napi); } /** @@ -1845,6 +1847,8 @@ static int axienet_probe(struct platform_device *pdev) lp->rx_bd_num = RX_BD_NUM_DEFAULT; lp->tx_bd_num = TX_BD_NUM_DEFAULT; + netif_napi_add(ndev, &lp->napi, axienet_poll, NAPI_POLL_WEIGHT); + lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk"); if (!lp->axi_clk) { /* For backward compatibility, if named AXI clock is not present, @@ -2051,7 +2055,9 @@ static int axienet_probe(struct platform_device *pdev) } lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; + lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC; lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; + lp->coalesce_usec_tx = XAXIDMA_DFT_TX_USEC; /* Reset core now that clocks are enabled, prior to accessing MDIO */ ret = __axienet_device_reset(lp); |