summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 20:41:05 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 20:41:05 +0300
commitb0f85fa11aefc4f3e03306b4cd47f113bd57dcba (patch)
tree1333d36d99fde3f97210795941fc246f0ad08a75 /drivers/net/ethernet/intel/fm10k/fm10k_pci.c
parentccc9d4a6d640cbde05d519edeb727881646cf71b (diff)
parentf32bfb9a8ca083f8d148ea90ae5ba66f4831836e (diff)
downloadlinux-b0f85fa11aefc4f3e03306b4cd47f113bd57dcba.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: Changes of note: 1) Allow to schedule ICMP packets in IPVS, from Alex Gartrell. 2) Provide FIB table ID in ipv4 route dumps just as ipv6 does, from David Ahern. 3) Allow the user to ask for the statistics to be filtered out of ipv4/ipv6 address netlink dumps. From Sowmini Varadhan. 4) More work to pass the network namespace context around deep into various packet path APIs, starting with the netfilter hooks. From Eric W Biederman. 5) Add layer 2 TX/RX checksum offloading to qeth driver, from Thomas Richter. 6) Use usec resolution for SYN/ACK RTTs in TCP, from Yuchung Cheng. 7) Support Very High Throughput in wireless MESH code, from Bob Copeland. 8) Allow setting the ageing_time in switchdev/rocker. From Scott Feldman. 9) Properly autoload L2TP type modules, from Stephen Hemminger. 10) Fix and enable offload features by default in 8139cp driver, from David Woodhouse. 11) Support both ipv4 and ipv6 sockets in a single vxlan device, from Jiri Benc. 12) Fix CWND limiting of thin streams in TCP, from Bendik Rønning Opstad. 13) Fix IPSEC flowcache overflows on large systems, from Steffen Klassert. 14) Convert bridging to track VLANs using rhashtable entries rather than a bitmap. From Nikolay Aleksandrov. 15) Make TCP listener handling completely lockless, this is a major accomplishment. Incoming request sockets now live in the established hash table just like any other socket too. From Eric Dumazet. 15) Provide more bridging attributes to netlink, from Nikolay Aleksandrov. 16) Use hash based algorithm for ipv4 multipath routing, this was very long overdue. From Peter Nørlund. 17) Several y2038 cures, mostly avoiding timespec. From Arnd Bergmann. 18) Allow non-root execution of EBPF programs, from Alexei Starovoitov. 19) Support SO_INCOMING_CPU as setsockopt, from Eric Dumazet. This influences the port binding selection logic used by SO_REUSEPORT. 20) Add ipv6 support to VRF, from David Ahern. 21) Add support for Mellanox Spectrum switch ASIC, from Jiri Pirko. 22) Add rtl8xxxu Realtek wireless driver, from Jes Sorensen. 23) Implement RACK loss recovery in TCP, from Yuchung Cheng. 24) Support multipath routes in MPLS, from Roopa Prabhu. 25) Fix POLLOUT notification for listening sockets in AF_UNIX, from Eric Dumazet. 26) Add new QED Qlogic river, from Yuval Mintz, Manish Chopra, and Sudarsana Kalluru. 27) Don't fetch timestamps on AF_UNIX sockets, from Hannes Frederic Sowa. 28) Support ipv6 geneve tunnels, from John W Linville. 29) Add flood control support to switchdev layer, from Ido Schimmel. 30) Fix CHECKSUM_PARTIAL handling of potentially fragmented frames, from Hannes Frederic Sowa. 31) Support persistent maps and progs in bpf, from Daniel Borkmann. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1790 commits) sh_eth: use DMA barriers switchdev: respect SKIP_EOPNOTSUPP flag in case there is no recursion net: sched: kill dead code in sch_choke.c irda: Delete an unnecessary check before the function call "irlmp_unregister_service" net: dsa: mv88e6xxx: include DSA ports in VLANs net: dsa: mv88e6xxx: disable SA learning for DSA and CPU ports net/core: fix for_each_netdev_feature vlan: Invoke driver vlan hooks only if device is present arcnet/com20020: add LEDS_CLASS dependency bpf, verifier: annotate verbose printer with __printf dp83640: Only wait for timestamps for packets with timestamping enabled. ptp: Change ptp_class to a proper bitmask dp83640: Prune rx timestamp list before reading from it dp83640: Delay scheduled work. dp83640: Include hash in timestamp/packet matching ipv6: fix tunnel error handling net/mlx5e: Fix LSO vlan insertion net/mlx5e: Re-eanble client vlan TX acceleration net/mlx5e: Return error in case mlx5e_set_features() fails net/mlx5e: Don't allow more than max supported channels ...
Diffstat (limited to 'drivers/net/ethernet/intel/fm10k/fm10k_pci.c')
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pci.c198
1 files changed, 164 insertions, 34 deletions
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index ce53ff25f88d..74be792f3f1b 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -170,6 +170,21 @@ static void fm10k_reinit(struct fm10k_intfc *interface)
/* reassociate interrupts */
fm10k_mbx_request_irq(interface);
+ /* update hardware address for VFs if perm_addr has changed */
+ if (hw->mac.type == fm10k_mac_vf) {
+ if (is_valid_ether_addr(hw->mac.perm_addr)) {
+ ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
+ ether_addr_copy(netdev->perm_addr, hw->mac.perm_addr);
+ ether_addr_copy(netdev->dev_addr, hw->mac.perm_addr);
+ netdev->addr_assign_type &= ~NET_ADDR_RANDOM;
+ }
+
+ if (hw->mac.vlan_override)
+ netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+ else
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_RX;
+ }
+
/* reset clock */
fm10k_ts_reset(interface);
@@ -259,8 +274,6 @@ static void fm10k_watchdog_update_host_state(struct fm10k_intfc *interface)
* @interface: board private structure
*
* This function will process both the upstream and downstream mailboxes.
- * It is necessary for us to hold the rtnl_lock while doing this as the
- * mailbox accesses are protected by this lock.
**/
static void fm10k_mbx_subtask(struct fm10k_intfc *interface)
{
@@ -315,6 +328,9 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
{
struct net_device_stats *net_stats = &interface->netdev->stats;
struct fm10k_hw *hw = &interface->hw;
+ u64 hw_csum_tx_good = 0, hw_csum_rx_good = 0, rx_length_errors = 0;
+ u64 rx_switch_errors = 0, rx_drops = 0, rx_pp_errors = 0;
+ u64 rx_link_errors = 0;
u64 rx_errors = 0, rx_csum_errors = 0, tx_csum_errors = 0;
u64 restart_queue = 0, tx_busy = 0, alloc_failed = 0;
u64 rx_bytes_nic = 0, rx_pkts_nic = 0, rx_drops_nic = 0;
@@ -334,6 +350,7 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
tx_csum_errors += tx_ring->tx_stats.csum_err;
bytes += tx_ring->stats.bytes;
pkts += tx_ring->stats.packets;
+ hw_csum_tx_good += tx_ring->tx_stats.csum_good;
}
interface->restart_queue = restart_queue;
@@ -341,6 +358,8 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
net_stats->tx_bytes = bytes;
net_stats->tx_packets = pkts;
interface->tx_csum_errors = tx_csum_errors;
+ interface->hw_csum_tx_good = hw_csum_tx_good;
+
/* gather some stats to the interface struct that are per queue */
for (bytes = 0, pkts = 0, i = 0; i < interface->num_rx_queues; i++) {
struct fm10k_ring *rx_ring = interface->rx_ring[i];
@@ -350,12 +369,24 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
alloc_failed += rx_ring->rx_stats.alloc_failed;
rx_csum_errors += rx_ring->rx_stats.csum_err;
rx_errors += rx_ring->rx_stats.errors;
+ hw_csum_rx_good += rx_ring->rx_stats.csum_good;
+ rx_switch_errors += rx_ring->rx_stats.switch_errors;
+ rx_drops += rx_ring->rx_stats.drops;
+ rx_pp_errors += rx_ring->rx_stats.pp_errors;
+ rx_link_errors += rx_ring->rx_stats.link_errors;
+ rx_length_errors += rx_ring->rx_stats.length_errors;
}
net_stats->rx_bytes = bytes;
net_stats->rx_packets = pkts;
interface->alloc_failed = alloc_failed;
interface->rx_csum_errors = rx_csum_errors;
+ interface->hw_csum_rx_good = hw_csum_rx_good;
+ interface->rx_switch_errors = rx_switch_errors;
+ interface->rx_drops = rx_drops;
+ interface->rx_pp_errors = rx_pp_errors;
+ interface->rx_link_errors = rx_link_errors;
+ interface->rx_length_errors = rx_length_errors;
hw->mac.ops.update_hw_stats(hw, &interface->stats);
@@ -483,7 +514,7 @@ static void fm10k_service_task(struct work_struct *work)
interface = container_of(work, struct fm10k_intfc, service_task);
- /* tasks always capable of running, but must be rtnl protected */
+ /* tasks run even when interface is down */
fm10k_mbx_subtask(interface);
fm10k_detach_subtask(interface);
fm10k_reset_subtask(interface);
@@ -663,6 +694,10 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface,
/* assign default VLAN to queue */
ring->vid = hw->mac.default_vid;
+ /* if we have an active VLAN, disable default VID */
+ if (test_bit(hw->mac.default_vid, interface->active_vlans))
+ ring->vid |= FM10K_VLAN_CLEAR;
+
/* Map interrupt */
if (ring->q_vector) {
rxint = ring->q_vector->v_idx + NON_Q_VECTORS(hw);
@@ -861,10 +896,12 @@ void fm10k_netpoll(struct net_device *netdev)
#endif
#define FM10K_ERR_MSG(type) case (type): error = #type; break
-static void fm10k_print_fault(struct fm10k_intfc *interface, int type,
+static void fm10k_handle_fault(struct fm10k_intfc *interface, int type,
struct fm10k_fault *fault)
{
struct pci_dev *pdev = interface->pdev;
+ struct fm10k_hw *hw = &interface->hw;
+ struct fm10k_iov_data *iov_data = interface->iov_data;
char *error;
switch (type) {
@@ -918,6 +955,30 @@ static void fm10k_print_fault(struct fm10k_intfc *interface, int type,
"%s Address: 0x%llx SpecInfo: 0x%x Func: %02x.%0x\n",
error, fault->address, fault->specinfo,
PCI_SLOT(fault->func), PCI_FUNC(fault->func));
+
+ /* For VF faults, clear out the respective LPORT, reset the queue
+ * resources, and then reconnect to the mailbox. This allows the
+ * VF in question to resume behavior. For transient faults that are
+ * the result of non-malicious behavior this will log the fault and
+ * allow the VF to resume functionality. Obviously for malicious VFs
+ * they will be able to attempt malicious behavior again. In this
+ * case, the system administrator will need to step in and manually
+ * remove or disable the VF in question.
+ */
+ if (fault->func && iov_data) {
+ int vf = fault->func - 1;
+ struct fm10k_vf_info *vf_info = &iov_data->vf_info[vf];
+
+ hw->iov.ops.reset_lport(hw, vf_info);
+ hw->iov.ops.reset_resources(hw, vf_info);
+
+ /* reset_lport disables the VF, so re-enable it */
+ hw->iov.ops.set_lport(hw, vf_info, vf,
+ FM10K_VF_FLAG_MULTI_CAPABLE);
+
+ /* reset_resources will disconnect from the mbx */
+ vf_info->mbx.ops.connect(hw, &vf_info->mbx);
+ }
}
static void fm10k_report_fault(struct fm10k_intfc *interface, u32 eicr)
@@ -941,7 +1002,7 @@ static void fm10k_report_fault(struct fm10k_intfc *interface, u32 eicr)
continue;
}
- fm10k_print_fault(interface, type, &fault);
+ fm10k_handle_fault(interface, type, &fault);
}
}
@@ -1705,22 +1766,86 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
static void fm10k_slot_warn(struct fm10k_intfc *interface)
{
- struct device *dev = &interface->pdev->dev;
+ enum pcie_link_width width = PCIE_LNK_WIDTH_UNKNOWN;
+ enum pci_bus_speed speed = PCI_SPEED_UNKNOWN;
struct fm10k_hw *hw = &interface->hw;
+ int max_gts = 0, expected_gts = 0;
+
+ if (pcie_get_minimum_link(interface->pdev, &speed, &width) ||
+ speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) {
+ dev_warn(&interface->pdev->dev,
+ "Unable to determine PCI Express bandwidth.\n");
+ return;
+ }
+
+ switch (speed) {
+ case PCIE_SPEED_2_5GT:
+ /* 8b/10b encoding reduces max throughput by 20% */
+ max_gts = 2 * width;
+ break;
+ case PCIE_SPEED_5_0GT:
+ /* 8b/10b encoding reduces max throughput by 20% */
+ max_gts = 4 * width;
+ break;
+ case PCIE_SPEED_8_0GT:
+ /* 128b/130b encoding has less than 2% impact on throughput */
+ max_gts = 8 * width;
+ break;
+ default:
+ dev_warn(&interface->pdev->dev,
+ "Unable to determine PCI Express bandwidth.\n");
+ return;
+ }
+
+ dev_info(&interface->pdev->dev,
+ "PCI Express bandwidth of %dGT/s available\n",
+ max_gts);
+ dev_info(&interface->pdev->dev,
+ "(Speed:%s, Width: x%d, Encoding Loss:%s, Payload:%s)\n",
+ (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" :
+ speed == PCIE_SPEED_5_0GT ? "5.0GT/s" :
+ speed == PCIE_SPEED_2_5GT ? "2.5GT/s" :
+ "Unknown"),
+ hw->bus.width,
+ (speed == PCIE_SPEED_2_5GT ? "20%" :
+ speed == PCIE_SPEED_5_0GT ? "20%" :
+ speed == PCIE_SPEED_8_0GT ? "<2%" :
+ "Unknown"),
+ (hw->bus.payload == fm10k_bus_payload_128 ? "128B" :
+ hw->bus.payload == fm10k_bus_payload_256 ? "256B" :
+ hw->bus.payload == fm10k_bus_payload_512 ? "512B" :
+ "Unknown"));
- if (hw->mac.ops.is_slot_appropriate(hw))
+ switch (hw->bus_caps.speed) {
+ case fm10k_bus_speed_2500:
+ /* 8b/10b encoding reduces max throughput by 20% */
+ expected_gts = 2 * hw->bus_caps.width;
+ break;
+ case fm10k_bus_speed_5000:
+ /* 8b/10b encoding reduces max throughput by 20% */
+ expected_gts = 4 * hw->bus_caps.width;
+ break;
+ case fm10k_bus_speed_8000:
+ /* 128b/130b encoding has less than 2% impact on throughput */
+ expected_gts = 8 * hw->bus_caps.width;
+ break;
+ default:
+ dev_warn(&interface->pdev->dev,
+ "Unable to determine expected PCI Express bandwidth.\n");
return;
+ }
- dev_warn(dev,
- "For optimal performance, a %s %s slot is recommended.\n",
- (hw->bus_caps.width == fm10k_bus_width_pcie_x1 ? "x1" :
- hw->bus_caps.width == fm10k_bus_width_pcie_x4 ? "x4" :
- "x8"),
- (hw->bus_caps.speed == fm10k_bus_speed_2500 ? "2.5GT/s" :
- hw->bus_caps.speed == fm10k_bus_speed_5000 ? "5.0GT/s" :
- "8.0GT/s"));
- dev_warn(dev,
- "A slot with more lanes and/or higher speed is suggested.\n");
+ if (max_gts < expected_gts) {
+ dev_warn(&interface->pdev->dev,
+ "This device requires %dGT/s of bandwidth for optimal performance.\n",
+ expected_gts);
+ dev_warn(&interface->pdev->dev,
+ "A %sslot with x%d lanes is suggested.\n",
+ (hw->bus_caps.speed == fm10k_bus_speed_2500 ? "2.5GT/s " :
+ hw->bus_caps.speed == fm10k_bus_speed_5000 ? "5.0GT/s " :
+ hw->bus_caps.speed == fm10k_bus_speed_8000 ? "8.0GT/s " : ""),
+ hw->bus_caps.width);
+ }
}
/**
@@ -1739,7 +1864,6 @@ static int fm10k_probe(struct pci_dev *pdev,
{
struct net_device *netdev;
struct fm10k_intfc *interface;
- struct fm10k_hw *hw;
int err;
err = pci_enable_device_mem(pdev);
@@ -1783,7 +1907,6 @@ static int fm10k_probe(struct pci_dev *pdev,
interface->netdev = netdev;
interface->pdev = pdev;
- hw = &interface->hw;
interface->uc_addr = ioremap(pci_resource_start(pdev, 0),
FM10K_UC_ADDR_SIZE);
@@ -1825,24 +1948,12 @@ static int fm10k_probe(struct pci_dev *pdev,
/* Register PTP interface */
fm10k_ptp_register(interface);
- /* print bus type/speed/width info */
- dev_info(&pdev->dev, "(PCI Express:%s Width: %s Payload: %s)\n",
- (hw->bus.speed == fm10k_bus_speed_8000 ? "8.0GT/s" :
- hw->bus.speed == fm10k_bus_speed_5000 ? "5.0GT/s" :
- hw->bus.speed == fm10k_bus_speed_2500 ? "2.5GT/s" :
- "Unknown"),
- (hw->bus.width == fm10k_bus_width_pcie_x8 ? "x8" :
- hw->bus.width == fm10k_bus_width_pcie_x4 ? "x4" :
- hw->bus.width == fm10k_bus_width_pcie_x1 ? "x1" :
- "Unknown"),
- (hw->bus.payload == fm10k_bus_payload_128 ? "128B" :
- hw->bus.payload == fm10k_bus_payload_256 ? "256B" :
- hw->bus.payload == fm10k_bus_payload_512 ? "512B" :
- "Unknown"));
-
/* print warning for non-optimal configurations */
fm10k_slot_warn(interface);
+ /* report MAC address for logging */
+ dev_info(&pdev->dev, "%pM\n", netdev->dev_addr);
+
/* enable SR-IOV after registering netdev to enforce PF/VF ordering */
fm10k_iov_configure(pdev, 0);
@@ -1983,6 +2094,16 @@ static int fm10k_resume(struct pci_dev *pdev)
if (err)
return err;
+ /* assume host is not ready, to prevent race with watchdog in case we
+ * actually don't have connection to the switch
+ */
+ interface->host_ready = false;
+ fm10k_watchdog_host_not_ready(interface);
+
+ /* clear the service task disable bit to allow service task to start */
+ clear_bit(__FM10K_SERVICE_DISABLE, &interface->state);
+ fm10k_service_event_schedule(interface);
+
/* restore SR-IOV interface */
fm10k_iov_resume(pdev);
@@ -2010,6 +2131,15 @@ static int fm10k_suspend(struct pci_dev *pdev,
fm10k_iov_suspend(pdev);
+ /* the watchdog tasks may read registers, which will appear like a
+ * surprise-remove event once the PCI device is disabled. This will
+ * cause us to close the netdevice, so we don't retain the open/closed
+ * state post-resume. Prevent this by disabling the service task while
+ * suspended, until we actually resume.
+ */
+ set_bit(__FM10K_SERVICE_DISABLE, &interface->state);
+ cancel_work_sync(&interface->service_task);
+
rtnl_lock();
if (netif_running(netdev))