summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/ixgbe
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-09 05:40:54 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-09 05:40:54 +0400
commit35a9ad8af0bb0fa3525e6d0d20e32551d226f38e (patch)
tree15b4b33206818886d9cff371fd2163e073b70568 /drivers/net/ethernet/intel/ixgbe
parentd5935b07da53f74726e2a65dd4281d0f2c70e5d4 (diff)
parent64b1f00a0830e1c53874067273a096b228d83d36 (diff)
downloadlinux-35a9ad8af0bb0fa3525e6d0d20e32551d226f38e.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Most notable changes in here: 1) By far the biggest accomplishment, thanks to a large range of contributors, is the addition of multi-send for transmit. This is the result of discussions back in Chicago, and the hard work of several individuals. Now, when the ->ndo_start_xmit() method of a driver sees skb->xmit_more as true, it can choose to defer the doorbell telling the driver to start processing the new TX queue entires. skb->xmit_more means that the generic networking is guaranteed to call the driver immediately with another SKB to send. There is logic added to the qdisc layer to dequeue multiple packets at a time, and the handling mis-predicted offloads in software is now done with no locks held. Finally, pktgen is extended to have a "burst" parameter that can be used to test a multi-send implementation. Several drivers have xmit_more support: i40e, igb, ixgbe, mlx4, virtio_net Adding support is almost trivial, so export more drivers to support this optimization soon. I want to thank, in no particular or implied order, Jesper Dangaard Brouer, Eric Dumazet, Alexander Duyck, Tom Herbert, Jamal Hadi Salim, John Fastabend, Florian Westphal, Daniel Borkmann, David Tat, Hannes Frederic Sowa, and Rusty Russell. 2) PTP and timestamping support in bnx2x, from Michal Kalderon. 3) Allow adjusting the rx_copybreak threshold for a driver via ethtool, and add rx_copybreak support to enic driver. From Govindarajulu Varadarajan. 4) Significant enhancements to the generic PHY layer and the bcm7xxx driver in particular (EEE support, auto power down, etc.) from Florian Fainelli. 5) Allow raw buffers to be used for flow dissection, allowing drivers to determine the optimal "linear pull" size for devices that DMA into pools of pages. The objective is to get exactly the necessary amount of headers into the linear SKB area pre-pulled, but no more. The new interface drivers use is eth_get_headlen(). From WANG Cong, with driver conversions (several had their own by-hand duplicated implementations) by Alexander Duyck and Eric Dumazet. 6) Support checksumming more smoothly and efficiently for encapsulations, and add "foo over UDP" facility. From Tom Herbert. 7) Add Broadcom SF2 switch driver to DSA layer, from Florian Fainelli. 8) eBPF now can load programs via a system call and has an extensive testsuite. Alexei Starovoitov and Daniel Borkmann. 9) Major overhaul of the packet scheduler to use RCU in several major areas such as the classifiers and rate estimators. From John Fastabend. 10) Add driver for Intel FM10000 Ethernet Switch, from Alexander Duyck. 11) Rearrange TCP_SKB_CB() to reduce cache line misses, from Eric Dumazet. 12) Add Datacenter TCP congestion control algorithm support, From Florian Westphal. 13) Reorganize sk_buff so that __copy_skb_header() is significantly faster. From Eric Dumazet" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1558 commits) netlabel: directly return netlbl_unlabel_genl_init() net: add netdev_txq_bql_{enqueue, complete}_prefetchw() helpers net: description of dma_cookie cause make xmldocs warning cxgb4: clean up a type issue cxgb4: potential shift wrapping bug i40e: skb->xmit_more support net: fs_enet: Add NAPI TX net: fs_enet: Remove non NAPI RX r8169:add support for RTL8168EP net_sched: copy exts->type in tcf_exts_change() wimax: convert printk to pr_foo() af_unix: remove 0 assignment on static ipv6: Do not warn for informational ICMP messages, regardless of type. Update Intel Ethernet Driver maintainers list bridge: Save frag_max_size between PRE_ROUTING and POST_ROUTING tipc: fix bug in multicast congestion handling net: better IFF_XMIT_DST_RELEASE support net/mlx4_en: remove NETDEV_TX_BUSY 3c59x: fix bad split of cpu_to_le32(pci_map_single()) net: bcmgenet: fix Tx ring priority programming ...
Diffstat (limited to 'drivers/net/ethernet/intel/ixgbe')
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h117
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c7
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c160
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c316
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c41
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c14
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_type.h7
7 files changed, 253 insertions, 409 deletions
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index ac9f2148cdc5..5032a602d5c9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -307,7 +307,6 @@ enum ixgbe_ring_f_enum {
#define MAX_RX_QUEUES (IXGBE_MAX_FDIR_INDICES + 1)
#define MAX_TX_QUEUES (IXGBE_MAX_FDIR_INDICES + 1)
#define IXGBE_MAX_L2A_QUEUES 4
-#define IXGBE_MAX_L2A_QUEUES 4
#define IXGBE_BAD_L2A_QUEUE 3
#define IXGBE_MAX_MACVLANS 31
#define IXGBE_MAX_DCBMACVLANS 8
@@ -386,119 +385,87 @@ struct ixgbe_q_vector {
char name[IFNAMSIZ + 9];
#ifdef CONFIG_NET_RX_BUSY_POLL
- unsigned int state;
-#define IXGBE_QV_STATE_IDLE 0
-#define IXGBE_QV_STATE_NAPI 1 /* NAPI owns this QV */
-#define IXGBE_QV_STATE_POLL 2 /* poll owns this QV */
-#define IXGBE_QV_STATE_DISABLED 4 /* QV is disabled */
-#define IXGBE_QV_OWNED (IXGBE_QV_STATE_NAPI | IXGBE_QV_STATE_POLL)
-#define IXGBE_QV_LOCKED (IXGBE_QV_OWNED | IXGBE_QV_STATE_DISABLED)
-#define IXGBE_QV_STATE_NAPI_YIELD 8 /* NAPI yielded this QV */
-#define IXGBE_QV_STATE_POLL_YIELD 16 /* poll yielded this QV */
-#define IXGBE_QV_YIELD (IXGBE_QV_STATE_NAPI_YIELD | IXGBE_QV_STATE_POLL_YIELD)
-#define IXGBE_QV_USER_PEND (IXGBE_QV_STATE_POLL | IXGBE_QV_STATE_POLL_YIELD)
- spinlock_t lock;
+ atomic_t state;
#endif /* CONFIG_NET_RX_BUSY_POLL */
/* for dynamic allocation of rings associated with this q_vector */
struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp;
};
+
#ifdef CONFIG_NET_RX_BUSY_POLL
+enum ixgbe_qv_state_t {
+ IXGBE_QV_STATE_IDLE = 0,
+ IXGBE_QV_STATE_NAPI,
+ IXGBE_QV_STATE_POLL,
+ IXGBE_QV_STATE_DISABLE
+};
+
static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector)
{
-
- spin_lock_init(&q_vector->lock);
- q_vector->state = IXGBE_QV_STATE_IDLE;
+ /* reset state to idle */
+ atomic_set(&q_vector->state, IXGBE_QV_STATE_IDLE);
}
/* called from the device poll routine to get ownership of a q_vector */
static inline bool ixgbe_qv_lock_napi(struct ixgbe_q_vector *q_vector)
{
- int rc = true;
- spin_lock_bh(&q_vector->lock);
- if (q_vector->state & IXGBE_QV_LOCKED) {
- WARN_ON(q_vector->state & IXGBE_QV_STATE_NAPI);
- q_vector->state |= IXGBE_QV_STATE_NAPI_YIELD;
- rc = false;
+ int rc = atomic_cmpxchg(&q_vector->state, IXGBE_QV_STATE_IDLE,
+ IXGBE_QV_STATE_NAPI);
#ifdef BP_EXTENDED_STATS
+ if (rc != IXGBE_QV_STATE_IDLE)
q_vector->tx.ring->stats.yields++;
#endif
- } else {
- /* we don't care if someone yielded */
- q_vector->state = IXGBE_QV_STATE_NAPI;
- }
- spin_unlock_bh(&q_vector->lock);
- return rc;
+
+ return rc == IXGBE_QV_STATE_IDLE;
}
/* returns true is someone tried to get the qv while napi had it */
-static inline bool ixgbe_qv_unlock_napi(struct ixgbe_q_vector *q_vector)
+static inline void ixgbe_qv_unlock_napi(struct ixgbe_q_vector *q_vector)
{
- int rc = false;
- spin_lock_bh(&q_vector->lock);
- WARN_ON(q_vector->state & (IXGBE_QV_STATE_POLL |
- IXGBE_QV_STATE_NAPI_YIELD));
-
- if (q_vector->state & IXGBE_QV_STATE_POLL_YIELD)
- rc = true;
- /* will reset state to idle, unless QV is disabled */
- q_vector->state &= IXGBE_QV_STATE_DISABLED;
- spin_unlock_bh(&q_vector->lock);
- return rc;
+ WARN_ON(atomic_read(&q_vector->state) != IXGBE_QV_STATE_NAPI);
+
+ /* flush any outstanding Rx frames */
+ if (q_vector->napi.gro_list)
+ napi_gro_flush(&q_vector->napi, false);
+
+ /* reset state to idle */
+ atomic_set(&q_vector->state, IXGBE_QV_STATE_IDLE);
}
/* called from ixgbe_low_latency_poll() */
static inline bool ixgbe_qv_lock_poll(struct ixgbe_q_vector *q_vector)
{
- int rc = true;
- spin_lock_bh(&q_vector->lock);
- if ((q_vector->state & IXGBE_QV_LOCKED)) {
- q_vector->state |= IXGBE_QV_STATE_POLL_YIELD;
- rc = false;
+ int rc = atomic_cmpxchg(&q_vector->state, IXGBE_QV_STATE_IDLE,
+ IXGBE_QV_STATE_POLL);
#ifdef BP_EXTENDED_STATS
- q_vector->rx.ring->stats.yields++;
+ if (rc != IXGBE_QV_STATE_IDLE)
+ q_vector->tx.ring->stats.yields++;
#endif
- } else {
- /* preserve yield marks */
- q_vector->state |= IXGBE_QV_STATE_POLL;
- }
- spin_unlock_bh(&q_vector->lock);
- return rc;
+ return rc == IXGBE_QV_STATE_IDLE;
}
/* returns true if someone tried to get the qv while it was locked */
-static inline bool ixgbe_qv_unlock_poll(struct ixgbe_q_vector *q_vector)
+static inline void ixgbe_qv_unlock_poll(struct ixgbe_q_vector *q_vector)
{
- int rc = false;
- spin_lock_bh(&q_vector->lock);
- WARN_ON(q_vector->state & (IXGBE_QV_STATE_NAPI));
-
- if (q_vector->state & IXGBE_QV_STATE_POLL_YIELD)
- rc = true;
- /* will reset state to idle, unless QV is disabled */
- q_vector->state &= IXGBE_QV_STATE_DISABLED;
- spin_unlock_bh(&q_vector->lock);
- return rc;
+ WARN_ON(atomic_read(&q_vector->state) != IXGBE_QV_STATE_POLL);
+
+ /* reset state to idle */
+ atomic_set(&q_vector->state, IXGBE_QV_STATE_IDLE);
}
/* true if a socket is polling, even if it did not get the lock */
static inline bool ixgbe_qv_busy_polling(struct ixgbe_q_vector *q_vector)
{
- WARN_ON(!(q_vector->state & IXGBE_QV_OWNED));
- return q_vector->state & IXGBE_QV_USER_PEND;
+ return atomic_read(&q_vector->state) == IXGBE_QV_STATE_POLL;
}
/* false if QV is currently owned */
static inline bool ixgbe_qv_disable(struct ixgbe_q_vector *q_vector)
{
- int rc = true;
- spin_lock_bh(&q_vector->lock);
- if (q_vector->state & IXGBE_QV_OWNED)
- rc = false;
- q_vector->state |= IXGBE_QV_STATE_DISABLED;
- spin_unlock_bh(&q_vector->lock);
-
- return rc;
+ int rc = atomic_cmpxchg(&q_vector->state, IXGBE_QV_STATE_IDLE,
+ IXGBE_QV_STATE_DISABLE);
+
+ return rc == IXGBE_QV_STATE_IDLE;
}
#else /* CONFIG_NET_RX_BUSY_POLL */
@@ -643,9 +610,7 @@ struct ixgbe_adapter {
* thus the additional *_CAPABLE flags.
*/
u32 flags;
-#define IXGBE_FLAG_MSI_CAPABLE (u32)(1 << 0)
#define IXGBE_FLAG_MSI_ENABLED (u32)(1 << 1)
-#define IXGBE_FLAG_MSIX_CAPABLE (u32)(1 << 2)
#define IXGBE_FLAG_MSIX_ENABLED (u32)(1 << 3)
#define IXGBE_FLAG_RX_1BUF_CAPABLE (u32)(1 << 4)
#define IXGBE_FLAG_RX_PS_CAPABLE (u32)(1 << 5)
@@ -760,8 +725,6 @@ struct ixgbe_adapter {
u8 __iomem *io_addr; /* Mainly for iounmap use */
u32 wol;
- u16 bd_number;
-
u16 eeprom_verh;
u16 eeprom_verl;
u16 eeprom_cap;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index e4100b5737b6..3ce4a258f945 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1303,7 +1303,7 @@ static const struct ixgbe_reg_test reg_test_82599[] = {
{ IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
{ IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x8001FFFF, 0x800CFFFF },
{ IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
- { 0, 0, 0, 0 }
+ { .reg = 0 }
};
/* default 82598 register test */
@@ -1331,7 +1331,7 @@ static const struct ixgbe_reg_test reg_test_82598[] = {
{ IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
{ IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x800CFFFF, 0x800CFFFF },
{ IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
- { 0, 0, 0, 0 }
+ { .reg = 0 }
};
static bool reg_pattern_test(struct ixgbe_adapter *adapter, u64 *data, int reg,
@@ -2267,7 +2267,6 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count)
adapter->tx_itr_setting = adapter->rx_itr_setting;
-#if IS_ENABLED(CONFIG_BQL)
/* detect ITR changes that require update of TXDCTL.WTHRESH */
if ((adapter->tx_itr_setting != 1) &&
(adapter->tx_itr_setting < IXGBE_100K_ITR)) {
@@ -2279,7 +2278,7 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
(tx_itr_prev < IXGBE_100K_ITR))
need_reset = true;
}
-#endif
+
/* check the old value and enable RSC if necessary */
need_reset |= ixgbe_update_rsc(adapter);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index 2d9451e39686..ce40c77381e9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -696,46 +696,83 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
ixgbe_set_rss_queues(adapter);
}
-static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
- int vectors)
+/**
+ * ixgbe_acquire_msix_vectors - acquire MSI-X vectors
+ * @adapter: board private structure
+ *
+ * Attempts to acquire a suitable range of MSI-X vector interrupts. Will
+ * return a negative error code if unable to acquire MSI-X vectors for any
+ * reason.
+ */
+static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter)
{
- int vector_threshold;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int i, vectors, vector_threshold;
+
+ /* We start by asking for one vector per queue pair */
+ vectors = max(adapter->num_rx_queues, adapter->num_tx_queues);
- /* We'll want at least 2 (vector_threshold):
- * 1) TxQ[0] + RxQ[0] handler
- * 2) Other (Link Status Change, etc.)
+ /* It is easy to be greedy for MSI-X vectors. However, it really
+ * doesn't do much good if we have a lot more vectors than CPUs. We'll
+ * be somewhat conservative and only ask for (roughly) the same number
+ * of vectors as there are CPUs.
*/
- vector_threshold = MIN_MSIX_COUNT;
+ vectors = min_t(int, vectors, num_online_cpus());
- /*
- * The more we get, the more we will assign to Tx/Rx Cleanup
- * for the separate queues...where Rx Cleanup >= Tx Cleanup.
- * Right now, we simply care about how many we'll get; we'll
- * set them up later while requesting irq's.
+ /* Some vectors are necessary for non-queue interrupts */
+ vectors += NON_Q_VECTORS;
+
+ /* Hardware can only support a maximum of hw.mac->max_msix_vectors.
+ * With features such as RSS and VMDq, we can easily surpass the
+ * number of Rx and Tx descriptor queues supported by our device.
+ * Thus, we cap the maximum in the rare cases where the CPU count also
+ * exceeds our vector limit
*/
+ vectors = min_t(int, vectors, hw->mac.max_msix_vectors);
+
+ /* We want a minimum of two MSI-X vectors for (1) a TxQ[0] + RxQ[0]
+ * handler, and (2) an Other (Link Status Change, etc.) handler.
+ */
+ vector_threshold = MIN_MSIX_COUNT;
+
+ adapter->msix_entries = kcalloc(vectors,
+ sizeof(struct msix_entry),
+ GFP_KERNEL);
+ if (!adapter->msix_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < vectors; i++)
+ adapter->msix_entries[i].entry = i;
+
vectors = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
vector_threshold, vectors);
if (vectors < 0) {
- /* Can't allocate enough MSI-X interrupts? Oh well.
- * This just means we'll go with either a single MSI
- * vector or fall back to legacy interrupts.
+ /* A negative count of allocated vectors indicates an error in
+ * acquiring within the specified range of MSI-X vectors
*/
- netif_printk(adapter, hw, KERN_DEBUG, adapter->netdev,
- "Unable to allocate MSI-X interrupts\n");
+ e_dev_warn("Failed to allocate MSI-X interrupts. Err: %d\n",
+ vectors);
+
adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
kfree(adapter->msix_entries);
adapter->msix_entries = NULL;
- } else {
- adapter->flags |= IXGBE_FLAG_MSIX_ENABLED; /* Woot! */
- /*
- * Adjust for only the vectors we'll use, which is minimum
- * of max_msix_q_vectors + NON_Q_VECTORS, or the number of
- * vectors we were allocated.
- */
- vectors -= NON_Q_VECTORS;
- adapter->num_q_vectors = min(vectors, adapter->max_q_vectors);
+
+ return vectors;
}
+
+ /* we successfully allocated some number of vectors within our
+ * requested range.
+ */
+ adapter->flags |= IXGBE_FLAG_MSIX_ENABLED;
+
+ /* Adjust for only the vectors we'll use, which is minimum
+ * of max_q_vectors, or the number of vectors we were allocated.
+ */
+ vectors -= NON_Q_VECTORS;
+ adapter->num_q_vectors = min_t(int, vectors, adapter->max_q_vectors);
+
+ return 0;
}
static void ixgbe_add_ring(struct ixgbe_ring *ring,
@@ -807,6 +844,11 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
ixgbe_poll, 64);
napi_hash_add(&q_vector->napi);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ /* initialize busy poll */
+ atomic_set(&q_vector->state, IXGBE_QV_STATE_DISABLE);
+
+#endif
/* tie q_vector and adapter together */
adapter->q_vector[v_idx] = q_vector;
q_vector->adapter = adapter;
@@ -1049,46 +1091,20 @@ static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
**/
static void ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
{
- struct ixgbe_hw *hw = &adapter->hw;
- int vector, v_budget, err;
+ int err;
- /*
- * It's easy to be greedy for MSI-X vectors, but it really
- * doesn't do us much good if we have a lot more vectors
- * than CPU's. So let's be conservative and only ask for
- * (roughly) the same number of vectors as there are CPU's.
- * The default is to use pairs of vectors.
- */
- v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
- v_budget = min_t(int, v_budget, num_online_cpus());
- v_budget += NON_Q_VECTORS;
+ /* We will try to get MSI-X interrupts first */
+ if (!ixgbe_acquire_msix_vectors(adapter))
+ return;
- /*
- * At the same time, hardware can only support a maximum of
- * hw.mac->max_msix_vectors vectors. With features
- * such as RSS and VMDq, we can easily surpass the number of Rx and Tx
- * descriptor queues supported by our device. Thus, we cap it off in
- * those rare cases where the cpu count also exceeds our vector limit.
+ /* At this point, we do not have MSI-X capabilities. We need to
+ * reconfigure or disable various features which require MSI-X
+ * capability.
*/
- v_budget = min_t(int, v_budget, hw->mac.max_msix_vectors);
-
- /* A failure in MSI-X entry allocation isn't fatal, but it does
- * mean we disable MSI-X capabilities of the adapter. */
- adapter->msix_entries = kcalloc(v_budget,
- sizeof(struct msix_entry), GFP_KERNEL);
- if (adapter->msix_entries) {
- for (vector = 0; vector < v_budget; vector++)
- adapter->msix_entries[vector].entry = vector;
-
- ixgbe_acquire_msix_vectors(adapter, v_budget);
-
- if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
- return;
- }
- /* disable DCB if number of TCs exceeds 1 */
+ /* Disable DCB unless we only have a single traffic class */
if (netdev_get_num_tc(adapter->netdev) > 1) {
- e_err(probe, "num TCs exceeds number of queues - disabling DCB\n");
+ e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n");
netdev_reset_tc(adapter->netdev);
if (adapter->hw.mac.type == ixgbe_mac_82598EB)
@@ -1098,26 +1114,30 @@ static void ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
adapter->temp_dcb_cfg.pfc_mode_enable = false;
adapter->dcb_cfg.pfc_mode_enable = false;
}
+
adapter->dcb_cfg.num_tcs.pg_tcs = 1;
adapter->dcb_cfg.num_tcs.pfc_tcs = 1;
- /* disable SR-IOV */
+ /* Disable SR-IOV support */
+ e_dev_warn("Disabling SR-IOV support\n");
ixgbe_disable_sriov(adapter);
- /* disable RSS */
+ /* Disable RSS */
+ e_dev_warn("Disabling RSS support\n");
adapter->ring_feature[RING_F_RSS].limit = 1;
+ /* recalculate number of queues now that many features have been
+ * changed or disabled.
+ */
ixgbe_set_num_queues(adapter);
adapter->num_q_vectors = 1;
err = pci_enable_msi(adapter->pdev);
- if (err) {
- netif_printk(adapter, hw, KERN_DEBUG, adapter->netdev,
- "Unable to allocate MSI interrupt, falling back to legacy. Error: %d\n",
- err);
- return;
- }
- adapter->flags |= IXGBE_FLAG_MSI_ENABLED;
+ if (err)
+ e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n",
+ err);
+ else
+ adapter->flags |= IXGBE_FLAG_MSI_ENABLED;
}
/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 87bd53fdd209..d677b5a23b58 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -440,7 +440,7 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
{IXGBE_TXDCTL(0), "TXDCTL"},
/* List Terminator */
- {}
+ { .name = NULL }
};
@@ -1094,7 +1094,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
total_packets += tx_buffer->gso_segs;
/* free the skb */
- dev_kfree_skb_any(tx_buffer->skb);
+ dev_consume_skb_any(tx_buffer->skb);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -1521,120 +1521,6 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count)
ixgbe_release_rx_desc(rx_ring, i);
}
-/**
- * ixgbe_get_headlen - determine size of header for RSC/LRO/GRO/FCOE
- * @data: pointer to the start of the headers
- * @max_len: total length of section to find headers in
- *
- * This function is meant to determine the length of headers that will
- * be recognized by hardware for LRO, GRO, and RSC offloads. The main
- * motivation of doing this is to only perform one pull for IPv4 TCP
- * packets so that we can do basic things like calculating the gso_size
- * based on the average data per packet.
- **/
-static unsigned int ixgbe_get_headlen(unsigned char *data,
- unsigned int max_len)
-{
- union {
- unsigned char *network;
- /* l2 headers */
- struct ethhdr *eth;
- struct vlan_hdr *vlan;
- /* l3 headers */
- struct iphdr *ipv4;
- struct ipv6hdr *ipv6;
- } hdr;
- __be16 protocol;
- u8 nexthdr = 0; /* default to not TCP */
- u8 hlen;
-
- /* this should never happen, but better safe than sorry */
- if (max_len < ETH_HLEN)
- return max_len;
-
- /* initialize network frame pointer */
- hdr.network = data;
-
- /* set first protocol and move network header forward */
- protocol = hdr.eth->h_proto;
- hdr.network += ETH_HLEN;
-
- /* handle any vlan tag if present */
- if (protocol == htons(ETH_P_8021Q)) {
- if ((hdr.network - data) > (max_len - VLAN_HLEN))
- return max_len;
-
- protocol = hdr.vlan->h_vlan_encapsulated_proto;
- hdr.network += VLAN_HLEN;
- }
-
- /* handle L3 protocols */
- if (protocol == htons(ETH_P_IP)) {
- if ((hdr.network - data) > (max_len - sizeof(struct iphdr)))
- return max_len;
-
- /* access ihl as a u8 to avoid unaligned access on ia64 */
- hlen = (hdr.network[0] & 0x0F) << 2;
-
- /* verify hlen meets minimum size requirements */
- if (hlen < sizeof(struct iphdr))
- return hdr.network - data;
-
- /* record next protocol if header is present */
- if (!(hdr.ipv4->frag_off & htons(IP_OFFSET)))
- nexthdr = hdr.ipv4->protocol;
- } else if (protocol == htons(ETH_P_IPV6)) {
- if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr)))
- return max_len;
-
- /* record next protocol */
- nexthdr = hdr.ipv6->nexthdr;
- hlen = sizeof(struct ipv6hdr);
-#ifdef IXGBE_FCOE
- } else if (protocol == htons(ETH_P_FCOE)) {
- if ((hdr.network - data) > (max_len - FCOE_HEADER_LEN))
- return max_len;
- hlen = FCOE_HEADER_LEN;
-#endif
- } else {
- return hdr.network - data;
- }
-
- /* relocate pointer to start of L4 header */
- hdr.network += hlen;
-
- /* finally sort out TCP/UDP */
- if (nexthdr == IPPROTO_TCP) {
- if ((hdr.network - data) > (max_len - sizeof(struct tcphdr)))
- return max_len;
-
- /* access doff as a u8 to avoid unaligned access on ia64 */
- hlen = (hdr.network[12] & 0xF0) >> 2;
-
- /* verify hlen meets minimum size requirements */
- if (hlen < sizeof(struct tcphdr))
- return hdr.network - data;
-
- hdr.network += hlen;
- } else if (nexthdr == IPPROTO_UDP) {
- if ((hdr.network - data) > (max_len - sizeof(struct udphdr)))
- return max_len;
-
- hdr.network += sizeof(struct udphdr);
- }
-
- /*
- * If everything has gone correctly hdr.network should be the
- * data section of the packet and will be the end of the header.
- * If not then it probably represents the end of the last recognized
- * header.
- */
- if ((hdr.network - data) < max_len)
- return hdr.network - data;
- else
- return max_len;
-}
-
static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring,
struct sk_buff *skb)
{
@@ -1793,7 +1679,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
* we need the header to contain the greater of either ETH_HLEN or
* 60 bytes if the skb->len is less than 60 for skb_pad.
*/
- pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE);
+ pull_len = eth_get_headlen(va, IXGBE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
@@ -2191,9 +2077,6 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
q_vector->rx.total_packets += total_rx_packets;
q_vector->rx.total_bytes += total_rx_bytes;
- if (cleaned_count)
- ixgbe_alloc_rx_buffers(rx_ring, cleaned_count);
-
return total_rx_packets;
}
@@ -3099,11 +2982,7 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
* to or less than the number of on chip descriptors, which is
* currently 40.
*/
-#if IS_ENABLED(CONFIG_BQL)
if (!ring->q_vector || (ring->q_vector->itr < IXGBE_100K_ITR))
-#else
- if (!ring->q_vector || (ring->q_vector->itr < 8))
-#endif
txdctl |= (1 << 16); /* WTHRESH = 1 */
else
txdctl |= (8 << 16); /* WTHRESH = 8 */
@@ -5300,15 +5179,15 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
{
struct device *dev = tx_ring->dev;
int orig_node = dev_to_node(dev);
- int numa_node = -1;
+ int ring_node = -1;
int size;
size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count;
if (tx_ring->q_vector)
- numa_node = tx_ring->q_vector->numa_node;
+ ring_node = tx_ring->q_vector->numa_node;
- tx_ring->tx_buffer_info = vzalloc_node(size, numa_node);
+ tx_ring->tx_buffer_info = vzalloc_node(size, ring_node);
if (!tx_ring->tx_buffer_info)
tx_ring->tx_buffer_info = vzalloc(size);
if (!tx_ring->tx_buffer_info)
@@ -5320,7 +5199,7 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
tx_ring->size = ALIGN(tx_ring->size, 4096);
- set_dev_node(dev, numa_node);
+ set_dev_node(dev, ring_node);
tx_ring->desc = dma_alloc_coherent(dev,
tx_ring->size,
&tx_ring->dma,
@@ -5384,15 +5263,15 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
{
struct device *dev = rx_ring->dev;
int orig_node = dev_to_node(dev);
- int numa_node = -1;
+ int ring_node = -1;
int size;
size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
if (rx_ring->q_vector)
- numa_node = rx_ring->q_vector->numa_node;
+ ring_node = rx_ring->q_vector->numa_node;
- rx_ring->rx_buffer_info = vzalloc_node(size, numa_node);
+ rx_ring->rx_buffer_info = vzalloc_node(size, ring_node);
if (!rx_ring->rx_buffer_info)
rx_ring->rx_buffer_info = vzalloc(size);
if (!rx_ring->rx_buffer_info)
@@ -5404,7 +5283,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
rx_ring->size = ALIGN(rx_ring->size, 4096);
- set_dev_node(dev, numa_node);
+ set_dev_node(dev, ring_node);
rx_ring->desc = dma_alloc_coherent(dev,
rx_ring->size,
&rx_ring->dma,
@@ -6319,25 +6198,55 @@ static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter *adapter)
ixgbe_ping_all_vfs(adapter);
}
+static bool ixgbe_ring_tx_pending(struct ixgbe_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
+
+ if (tx_ring->next_to_use != tx_ring->next_to_clean)
+ return true;
+ }
+
+ return false;
+}
+
+static bool ixgbe_vf_tx_pending(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+ u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
+
+ int i, j;
+
+ if (!adapter->num_vfs)
+ return false;
+
+ for (i = 0; i < adapter->num_vfs; i++) {
+ for (j = 0; j < q_per_pool; j++) {
+ u32 h, t;
+
+ h = IXGBE_READ_REG(hw, IXGBE_PVFTDHN(q_per_pool, i, j));
+ t = IXGBE_READ_REG(hw, IXGBE_PVFTDTN(q_per_pool, i, j));
+
+ if (h != t)
+ return true;
+ }
+ }
+
+ return false;
+}
+
/**
* ixgbe_watchdog_flush_tx - flush queues on link down
* @adapter: pointer to the device adapter structure
**/
static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter)
{
- int i;
- int some_tx_pending = 0;
-
if (!netif_carrier_ok(adapter->netdev)) {
- for (i = 0; i < adapter->num_tx_queues; i++) {
- struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
- if (tx_ring->next_to_use != tx_ring->next_to_clean) {
- some_tx_pending = 1;
- break;
- }
- }
-
- if (some_tx_pending) {
+ if (ixgbe_ring_tx_pending(adapter) ||
+ ixgbe_vf_tx_pending(adapter)) {
/* We've lost link, so the controller stops DMA,
* but we've got queued Tx work that's never going
* to get done, so reset controller to flush Tx.
@@ -6837,6 +6746,36 @@ static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc,
tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
}
+static int __ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
+{
+ netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+
+ /* Herbert's original patch had:
+ * smp_mb__after_netif_stop_queue();
+ * but since that doesn't exist yet, just open code it.
+ */
+ smp_mb();
+
+ /* We need to check again in a case another CPU has just
+ * made room available.
+ */
+ if (likely(ixgbe_desc_unused(tx_ring) < size))
+ return -EBUSY;
+
+ /* A reprieve! - use start_queue because it doesn't call schedule */
+ netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
+ ++tx_ring->tx_stats.restart_queue;
+ return 0;
+}
+
+static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
+{
+ if (likely(ixgbe_desc_unused(tx_ring) >= size))
+ return 0;
+
+ return __ixgbe_maybe_stop_tx(tx_ring, size);
+}
+
#define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \
IXGBE_TXD_CMD_RS)
@@ -6958,8 +6897,12 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring,
tx_ring->next_to_use = i;
- /* notify HW of packet */
- ixgbe_write_tail(tx_ring, i);
+ ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+ if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+ /* notify HW of packet */
+ ixgbe_write_tail(tx_ring, i);
+ }
return;
dma_error:
@@ -7067,32 +7010,6 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
input, common, ring->queue_index);
}
-static int __ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
-{
- netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
- /* Herbert's original patch had:
- * smp_mb__after_netif_stop_queue();
- * but since that doesn't exist yet, just open code it. */
- smp_mb();
-
- /* We need to check again in a case another CPU has just
- * made room available. */
- if (likely(ixgbe_desc_unused(tx_ring) < size))
- return -EBUSY;
-
- /* A reprieve! - use start_queue because it doesn't call schedule */
- netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
- ++tx_ring->tx_stats.restart_queue;
- return 0;
-}
-
-static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
-{
- if (likely(ixgbe_desc_unused(tx_ring) >= size))
- return 0;
- return __ixgbe_maybe_stop_tx(tx_ring, size);
-}
-
static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback)
{
@@ -7187,9 +7104,10 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
tx_flags |= IXGBE_TX_FLAGS_SW_VLAN;
}
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
- !test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS,
- &adapter->state))) {
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ adapter->ptp_clock &&
+ !test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS,
+ &adapter->state)) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
tx_flags |= IXGBE_TX_FLAGS_TSTAMP;
@@ -7261,8 +7179,6 @@ xmit_fcoe:
#endif /* IXGBE_FCOE */
ixgbe_tx_map(tx_ring, first, hdr_len);
- ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
-
return NETDEV_TX_OK;
out_drop:
@@ -7735,39 +7651,13 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
const unsigned char *addr,
u16 flags)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
- int err;
-
- if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
- return ndo_dflt_fdb_add(ndm, tb, dev, addr, flags);
-
- /* Hardware does not support aging addresses so if a
- * ndm_state is given only allow permanent addresses
- */
- if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
- pr_info("%s: FDB only supports static addresses\n",
- ixgbe_driver_name);
- return -EINVAL;
- }
-
+ /* guarantee we can provide a unique filter for the unicast address */
if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
- u32 rar_uc_entries = IXGBE_MAX_PF_MACVLANS;
-
- if (netdev_uc_count(dev) < rar_uc_entries)
- err = dev_uc_add_excl(dev, addr);
- else
- err = -ENOMEM;
- } else if (is_multicast_ether_addr(addr)) {
- err = dev_mc_add_excl(dev, addr);
- } else {
- err = -EINVAL;
+ if (IXGBE_MAX_PF_MACVLANS <= netdev_uc_count(dev))
+ return -ENOMEM;
}
- /* Only return duplicate errors if NLM_F_EXCL is set */
- if (err == -EEXIST && !(flags & NLM_F_EXCL))
- err = 0;
-
- return err;
+ return ndo_dflt_fdb_add(ndm, tb, dev, addr, flags);
}
static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
@@ -7830,9 +7720,17 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
{
struct ixgbe_fwd_adapter *fwd_adapter = NULL;
struct ixgbe_adapter *adapter = netdev_priv(pdev);
+ int used_pools = adapter->num_vfs + adapter->num_rx_pools;
unsigned int limit;
int pool, err;
+ /* Hardware has a limited number of available pools. Each VF, and the
+ * PF require a pool. Check to ensure we don't attempt to use more
+ * then the available number of pools.
+ */
+ if (used_pools >= IXGBE_MAX_VF_FUNCTIONS)
+ return ERR_PTR(-EINVAL);
+
#ifdef CONFIG_RPS
if (vdev->num_rx_queues != vdev->num_tx_queues) {
netdev_info(pdev, "%s: Only supports a single queue count for TX and RX\n",
@@ -8080,7 +7978,6 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct ixgbe_adapter *adapter = NULL;
struct ixgbe_hw *hw;
const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];
- static int cards_found;
int i, err, pci_using_dac, expected_gts;
unsigned int indices = MAX_TX_QUEUES;
u8 part_str[IXGBE_PBANUM_LENGTH];
@@ -8166,8 +8063,6 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->watchdog_timeo = 5 * HZ;
strlcpy(netdev->name, pci_name(pdev), sizeof(netdev->name));
- adapter->bd_number = cards_found;
-
/* Setup hw api */
memcpy(&hw->mac.ops, ii->mac_ops, sizeof(hw->mac.ops));
hw->mac.type = ii->mac;
@@ -8451,7 +8346,6 @@ skip_sriov:
ixgbe_add_sanmac_netdev(netdev);
e_dev_info("%s\n", ixgbe_default_device_descr);
- cards_found++;
#ifdef CONFIG_IXGBE_HWMON
if (ixgbe_sysfs_init(adapter))
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index 11f02ea78c4a..d47b19f27c35 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -445,8 +445,6 @@ s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
{
s32 status = 0;
- u32 time_out;
- u32 max_time_out = 10;
u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
bool autoneg = false;
ixgbe_link_speed speed;
@@ -514,25 +512,6 @@ s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
hw->phy.ops.write_reg(hw, MDIO_CTRL1,
MDIO_MMD_AN, autoneg_reg);
- /* Wait for autonegotiation to finish */
- for (time_out = 0; time_out < max_time_out; time_out++) {
- udelay(10);
- /* Restart PHY autonegotiation and wait for completion */
- status = hw->phy.ops.read_reg(hw, MDIO_STAT1,
- MDIO_MMD_AN,
- &autoneg_reg);
-
- autoneg_reg &= MDIO_AN_STAT1_COMPLETE;
- if (autoneg_reg == MDIO_AN_STAT1_COMPLETE) {
- break;
- }
- }
-
- if (time_out == max_time_out) {
- hw_dbg(hw, "ixgbe_setup_phy_link_generic: time out\n");
- return IXGBE_ERR_LINK_SETUP;
- }
-
return status;
}
@@ -657,8 +636,6 @@ s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw)
{
s32 status;
- u32 time_out;
- u32 max_time_out = 10;
u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
bool autoneg = false;
ixgbe_link_speed speed;
@@ -724,24 +701,6 @@ s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw)
hw->phy.ops.write_reg(hw, MDIO_CTRL1,
MDIO_MMD_AN, autoneg_reg);
- /* Wait for autonegotiation to finish */
- for (time_out = 0; time_out < max_time_out; time_out++) {
- udelay(10);
- /* Restart PHY autonegotiation and wait for completion */
- status = hw->phy.ops.read_reg(hw, MDIO_STAT1,
- MDIO_MMD_AN,
- &autoneg_reg);
-
- autoneg_reg &= MDIO_AN_STAT1_COMPLETE;
- if (autoneg_reg == MDIO_AN_STAT1_COMPLETE)
- break;
- }
-
- if (time_out == max_time_out) {
- hw_dbg(hw, "ixgbe_setup_phy_link_tnx: time out\n");
- return IXGBE_ERR_LINK_SETUP;
- }
-
return status;
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index c14d4d89672f..706fc69aa0c5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -250,13 +250,15 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs)
if (err)
return err;
- /* While the SR-IOV capability structure reports total VFs to be
- * 64 we limit the actual number that can be allocated to 63 so
- * that some transmit/receive resources can be reserved to the
- * PF. The PCI bus driver already checks for other values out of
- * range.
+ /* While the SR-IOV capability structure reports total VFs to be 64,
+ * we have to limit the actual number allocated based on two factors.
+ * First, we reserve some transmit/receive resources for the PF.
+ * Second, VMDQ also uses the same pools that SR-IOV does. We need to
+ * account for this, so that we don't accidentally allocate more VFs
+ * than we have available pools. The PCI bus driver already checks for
+ * other values out of range.
*/
- if (num_vfs > IXGBE_MAX_VFS_DRV_LIMIT)
+ if ((num_vfs + adapter->num_rx_pools) > IXGBE_MAX_VF_FUNCTIONS)
return -EPERM;
adapter->num_vfs = num_vfs;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index e6b07c2a01fe..dfd55d83bc03 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -2194,6 +2194,8 @@ enum {
#define IXGBE_VFLRE(_i) ((((_i) & 1) ? 0x001C0 : 0x00600))
#define IXGBE_VFLREC(_i) (0x00700 + ((_i) * 4))
/* Translated register #defines */
+#define IXGBE_PVFTDH(P) (0x06010 + (0x40 * (P)))
+#define IXGBE_PVFTDT(P) (0x06018 + (0x40 * (P)))
#define IXGBE_PVFTDWBAL(P) (0x06038 + (0x40 * (P)))
#define IXGBE_PVFTDWBAH(P) (0x0603C + (0x40 * (P)))
@@ -2202,6 +2204,11 @@ enum {
#define IXGBE_PVFTDWBAHn(q_per_pool, vf_number, vf_q_index) \
(IXGBE_PVFTDWBAH((q_per_pool)*(vf_number) + (vf_q_index)))
+#define IXGBE_PVFTDHN(q_per_pool, vf_number, vf_q_index) \
+ (IXGBE_PVFTDH((q_per_pool)*(vf_number) + (vf_q_index)))
+#define IXGBE_PVFTDTN(q_per_pool, vf_number, vf_q_index) \
+ (IXGBE_PVFTDT((q_per_pool)*(vf_number) + (vf_q_index)))
+
enum ixgbe_fdir_pballoc_type {
IXGBE_FDIR_PBALLOC_NONE = 0,
IXGBE_FDIR_PBALLOC_64K = 1,