diff options
Diffstat (limited to 'drivers/net/ethernet/chelsio/cxgb4/sge.c')
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/sge.c | 161 |
1 files changed, 137 insertions, 24 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index b4b9f6048fe7..0d2eddab04ef 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -46,6 +46,9 @@ #ifdef CONFIG_NET_RX_BUSY_POLL #include <net/busy_poll.h> #endif /* CONFIG_NET_RX_BUSY_POLL */ +#ifdef CONFIG_CHELSIO_T4_FCOE +#include <scsi/fc/fc_fcoe.h> +#endif /* CONFIG_CHELSIO_T4_FCOE */ #include "cxgb4.h" #include "t4_regs.h" #include "t4_values.h" @@ -118,12 +121,6 @@ #define NOMEM_TMR_IDX (SGE_NTIMERS - 1) /* - * An FL with <= FL_STARVE_THRES buffers is starving and a periodic timer will - * attempt to refill it. - */ -#define FL_STARVE_THRES 4 - -/* * Suspend an Ethernet Tx queue with fewer available descriptors than this. * This is the same as calc_tx_descs() for a TSO packet with * nr_frags == MAX_SKB_FRAGS. @@ -141,7 +138,7 @@ * Max Tx descriptor space we allow for an Ethernet packet to be inlined * into a WR. */ -#define MAX_IMM_TX_PKT_LEN 128 +#define MAX_IMM_TX_PKT_LEN 256 /* * Max size of a WR sent through a control Tx queue. @@ -245,9 +242,21 @@ static inline unsigned int fl_cap(const struct sge_fl *fl) return fl->size - 8; /* 1 descriptor = 8 buffers */ } -static inline bool fl_starving(const struct sge_fl *fl) +/** + * fl_starving - return whether a Free List is starving. + * @adapter: pointer to the adapter + * @fl: the Free List + * + * Tests specified Free List to see whether the number of buffers + * available to the hardware has falled below our "starvation" + * threshold. + */ +static inline bool fl_starving(const struct adapter *adapter, + const struct sge_fl *fl) { - return fl->avail - fl->pend_cred <= FL_STARVE_THRES; + const struct sge *s = &adapter->sge; + + return fl->avail - fl->pend_cred <= s->fl_starve_thres; } static int map_skb(struct device *dev, const struct sk_buff *skb, @@ -583,8 +592,10 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, unsigned int cred = q->avail; __be64 *d = &q->desc[q->pidx]; struct rx_sw_desc *sd = &q->sdesc[q->pidx]; + int node; gfp |= __GFP_NOWARN; + node = dev_to_node(adap->pdev_dev); if (s->fl_pg_order == 0) goto alloc_small_pages; @@ -593,7 +604,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, * Prefer large buffers */ while (n) { - pg = __dev_alloc_pages(gfp, s->fl_pg_order); + pg = alloc_pages_node(node, gfp | __GFP_COMP, s->fl_pg_order); if (unlikely(!pg)) { q->large_alloc_failed++; break; /* fall back to single pages */ @@ -623,7 +634,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, alloc_small_pages: while (n--) { - pg = __dev_alloc_page(gfp); + pg = alloc_pages_node(node, gfp, 0); if (unlikely(!pg)) { q->alloc_failed++; break; @@ -652,7 +663,7 @@ out: cred = q->avail - cred; q->pend_cred += cred; ring_fl_db(adap, q); - if (unlikely(fl_starving(q))) { + if (unlikely(fl_starving(adap, q))) { smp_wmb(); set_bit(q->cntxt_id - adap->sge.egr_start, adap->sge.starving_fl); @@ -719,6 +730,22 @@ static void *alloc_ring(struct device *dev, size_t nelem, size_t elem_size, */ static inline unsigned int sgl_len(unsigned int n) { + /* A Direct Scatter Gather List uses 32-bit lengths and 64-bit PCI DMA + * addresses. The DSGL Work Request starts off with a 32-bit DSGL + * ULPTX header, then Length0, then Address0, then, for 1 <= i <= N, + * repeated sequences of { Length[i], Length[i+1], Address[i], + * Address[i+1] } (this ensures that all addresses are on 64-bit + * boundaries). If N is even, then Length[N+1] should be set to 0 and + * Address[N+1] is omitted. + * + * The following calculation incorporates all of the above. It's + * somewhat hard to follow but, briefly: the "+2" accounts for the + * first two flits which include the DSGL header, Length0 and + * Address0; the "(3*(n-1))/2" covers the main body of list entries (3 + * flits for every pair of the remaining N) +1 if (n-1) is odd; and + * finally the "+((n-1)&1)" adds the one remaining flit needed if + * (n-1) is odd ... + */ n--; return (3 * n) / 2 + (n & 1) + 2; } @@ -766,12 +793,30 @@ static inline unsigned int calc_tx_flits(const struct sk_buff *skb) unsigned int flits; int hdrlen = is_eth_imm(skb); + /* If the skb is small enough, we can pump it out as a work request + * with only immediate data. In that case we just have to have the + * TX Packet header plus the skb data in the Work Request. + */ + if (hdrlen) return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64)); + /* Otherwise, we're going to have to construct a Scatter gather list + * of the skb body and fragments. We also include the flits necessary + * for the TX Packet Work Request and CPL. We always have a firmware + * Write Header (incorporated as part of the cpl_tx_pkt_lso and + * cpl_tx_pkt structures), followed by either a TX Packet Write CPL + * message or, if we're doing a Large Send Offload, an LSO CPL message + * with an embedded TX Packet Write CPL message. + */ flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 4; if (skb_shinfo(skb)->gso_size) - flits += 2; + flits += (sizeof(struct fw_eth_tx_pkt_wr) + + sizeof(struct cpl_tx_pkt_lso_core) + + sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); + else + flits += (sizeof(struct fw_eth_tx_pkt_wr) + + sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); return flits; } @@ -1044,6 +1089,38 @@ static inline void txq_advance(struct sge_txq *q, unsigned int n) q->pidx -= q->size; } +#ifdef CONFIG_CHELSIO_T4_FCOE +static inline int +cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap, + const struct port_info *pi, u64 *cntrl) +{ + const struct cxgb_fcoe *fcoe = &pi->fcoe; + + if (!(fcoe->flags & CXGB_FCOE_ENABLED)) + return 0; + + if (skb->protocol != htons(ETH_P_FCOE)) + return 0; + + skb_reset_mac_header(skb); + skb->mac_len = sizeof(struct ethhdr); + + skb_set_network_header(skb, skb->mac_len); + skb_set_transport_header(skb, skb->mac_len + sizeof(struct fcoe_hdr)); + + if (!cxgb_fcoe_sof_eof_supported(adap, skb)) + return -ENOTSUPP; + + /* FC CRC offload */ + *cntrl = TXPKT_CSUM_TYPE(TX_CSUM_FCOE) | + TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS | + TXPKT_CSUM_START(CXGB_FCOE_TXPKT_CSUM_START) | + TXPKT_CSUM_END(CXGB_FCOE_TXPKT_CSUM_END) | + TXPKT_CSUM_LOC(CXGB_FCOE_TXPKT_CSUM_END); + return 0; +} +#endif /* CONFIG_CHELSIO_T4_FCOE */ + /** * t4_eth_xmit - add a packet to an Ethernet Tx queue * @skb: the packet @@ -1066,6 +1143,9 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) const struct skb_shared_info *ssi; dma_addr_t addr[MAX_SKB_FRAGS + 1]; bool immediate = false; +#ifdef CONFIG_CHELSIO_T4_FCOE + int err; +#endif /* CONFIG_CHELSIO_T4_FCOE */ /* * The chip min packet length is 10 octets but play safe and reject @@ -1082,6 +1162,13 @@ out_free: dev_kfree_skb_any(skb); q = &adap->sge.ethtxq[qidx + pi->first_qset]; reclaim_completed_tx(adap, &q->q, true); + cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS; + +#ifdef CONFIG_CHELSIO_T4_FCOE + err = cxgb_fcoe_offload(skb, adap, pi, &cntrl); + if (unlikely(err == -ENOTSUPP)) + goto out_free; +#endif /* CONFIG_CHELSIO_T4_FCOE */ flits = calc_tx_flits(skb); ndesc = flits_to_desc(flits); @@ -1153,13 +1240,17 @@ out_free: dev_kfree_skb_any(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS; q->tx_cso++; - } else - cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS; + } } if (skb_vlan_tag_present(skb)) { q->vlan_ins++; cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(skb_vlan_tag_get(skb)); +#ifdef CONFIG_CHELSIO_T4_FCOE + if (skb->protocol == htons(ETH_P_FCOE)) + cntrl |= TXPKT_VLAN( + ((skb->priority & 0x7) << VLAN_PRIO_SHIFT)); +#endif /* CONFIG_CHELSIO_T4_FCOE */ } cpl->ctrl0 = htonl(TXPKT_OPCODE(CPL_TX_PKT_XT) | @@ -1759,6 +1850,9 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, struct sge *s = &q->adap->sge; int cpl_trace_pkt = is_t4(q->adap->params.chip) ? CPL_TRACE_PKT : CPL_TRACE_PKT_T5; +#ifdef CONFIG_CHELSIO_T4_FCOE + struct port_info *pi; +#endif if (unlikely(*(u8 *)rsp == cpl_trace_pkt)) return handle_trace_pkt(q->adap, si); @@ -1799,8 +1893,24 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, skb->ip_summed = CHECKSUM_COMPLETE; rxq->stats.rx_cso++; } - } else + } else { skb_checksum_none_assert(skb); +#ifdef CONFIG_CHELSIO_T4_FCOE +#define CPL_RX_PKT_FLAGS (RXF_PSH_F | RXF_SYN_F | RXF_UDP_F | \ + RXF_TCP_F | RXF_IP_F | RXF_IP6_F | RXF_LRO_F) + + pi = netdev_priv(skb->dev); + if (!(pkt->l2info & cpu_to_be32(CPL_RX_PKT_FLAGS))) { + if ((pkt->l2info & cpu_to_be32(RXF_FCOE_F)) && + (pi->fcoe.flags & CXGB_FCOE_ENABLED)) { + if (!(pkt->err_vec & cpu_to_be16(RXERR_CSUM_F))) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } + +#undef CPL_RX_PKT_FLAGS +#endif /* CONFIG_CHELSIO_T4_FCOE */ + } if (unlikely(pkt->vlan_ex)) { __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan)); @@ -1900,7 +2010,7 @@ static int process_responses(struct sge_rspq *q, int budget) if (!is_new_response(rc, q)) break; - rmb(); + dma_rmb(); rsp_type = RSPD_TYPE(rc->type_gen); if (likely(rsp_type == RSP_TYPE_FLBUF)) { struct page_frag *fp; @@ -2092,7 +2202,7 @@ static unsigned int process_intrq(struct adapter *adap) if (!is_new_response(rc, q)) break; - rmb(); + dma_rmb(); if (RSPD_TYPE(rc->type_gen) == RSP_TYPE_INTR) { unsigned int qid = ntohl(rc->pldbuflen_qid); @@ -2128,7 +2238,8 @@ static irqreturn_t t4_intr_msi(int irq, void *cookie) { struct adapter *adap = cookie; - t4_slow_intr_handler(adap); + if (adap->flags & MASTER_PF) + t4_slow_intr_handler(adap); process_intrq(adap); return IRQ_HANDLED; } @@ -2143,7 +2254,8 @@ static irqreturn_t t4_intr_intx(int irq, void *cookie) struct adapter *adap = cookie; t4_write_reg(adap, MYPF_REG(PCIE_PF_CLI_A), 0); - if (t4_slow_intr_handler(adap) | process_intrq(adap)) + if (((adap->flags & MASTER_PF) && t4_slow_intr_handler(adap)) | + process_intrq(adap)) return IRQ_HANDLED; return IRQ_NONE; /* probably shared interrupt */ } @@ -2171,7 +2283,7 @@ static void sge_rx_timer_cb(unsigned long data) struct adapter *adap = (struct adapter *)data; struct sge *s = &adap->sge; - for (i = 0; i < ARRAY_SIZE(s->starving_fl); i++) + for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) for (m = s->starving_fl[i]; m; m &= m - 1) { struct sge_eth_rxq *rxq; unsigned int id = __ffs(m) + i * BITS_PER_LONG; @@ -2180,7 +2292,7 @@ static void sge_rx_timer_cb(unsigned long data) clear_bit(id, s->starving_fl); smp_mb__after_atomic(); - if (fl_starving(fl)) { + if (fl_starving(adap, fl)) { rxq = container_of(fl, struct sge_eth_rxq, fl); if (napi_reschedule(&rxq->rspq.napi)) fl->starving++; @@ -2259,7 +2371,7 @@ static void sge_tx_timer_cb(unsigned long data) struct adapter *adap = (struct adapter *)data; struct sge *s = &adap->sge; - for (i = 0; i < ARRAY_SIZE(s->txq_maperr); i++) + for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) for (m = s->txq_maperr[i]; m; m &= m - 1) { unsigned long id = __ffs(m) + i * BITS_PER_LONG; struct sge_ofld_txq *txq = s->egr_map[id]; @@ -2741,7 +2853,8 @@ void t4_free_sge_resources(struct adapter *adap) free_rspq_fl(adap, &adap->sge.intrq, NULL); /* clear the reverse egress queue map */ - memset(adap->sge.egr_map, 0, sizeof(adap->sge.egr_map)); + memset(adap->sge.egr_map, 0, + adap->sge.egr_sz * sizeof(*adap->sge.egr_map)); } void t4_sge_start(struct adapter *adap) |