summaryrefslogtreecommitdiff
path: root/drivers/net/hyperv
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 21:57:23 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 21:57:23 +0300
commit9d31d2338950293ec19d9b095fbaa9030899dcb4 (patch)
treee688040d0557c24a2eeb9f6c9c223d949f6f7ef9 /drivers/net/hyperv
parent635de956a7f5a6ffcb04f29d70630c64c717b56b (diff)
parent4a52dd8fefb45626dace70a63c0738dbd83b7edb (diff)
downloadlinux-9d31d2338950293ec19d9b095fbaa9030899dcb4.tar.xz
Merge tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - bpf: - allow bpf programs calling kernel functions (initially to reuse TCP congestion control implementations) - enable task local storage for tracing programs - remove the need to store per-task state in hash maps, and allow tracing programs access to task local storage previously added for BPF_LSM - add bpf_for_each_map_elem() helper, allowing programs to walk all map elements in a more robust and easier to verify fashion - sockmap: support UDP and cross-protocol BPF_SK_SKB_VERDICT redirection - lpm: add support for batched ops in LPM trie - add BTF_KIND_FLOAT support - mostly to allow use of BTF on s390 which has floats in its headers files - improve BPF syscall documentation and extend the use of kdoc parsing scripts we already employ for bpf-helpers - libbpf, bpftool: support static linking of BPF ELF files - improve support for encapsulation of L2 packets - xdp: restructure redirect actions to avoid a runtime lookup, improving performance by 4-8% in microbenchmarks - xsk: build skb by page (aka generic zerocopy xmit) - improve performance of software AF_XDP path by 33% for devices which don't need headers in the linear skb part (e.g. virtio) - nexthop: resilient next-hop groups - improve path stability on next-hops group changes (incl. offload for mlxsw) - ipv6: segment routing: add support for IPv4 decapsulation - icmp: add support for RFC 8335 extended PROBE messages - inet: use bigger hash table for IP ID generation - tcp: deal better with delayed TX completions - make sure we don't give up on fast TCP retransmissions only because driver is slow in reporting that it completed transmitting the original - tcp: reorder tcp_congestion_ops for better cache locality - mptcp: - add sockopt support for common TCP options - add support for common TCP msg flags - include multiple address ids in RM_ADDR - add reset option support for resetting one subflow - udp: GRO L4 improvements - improve 'forward' / 'frag_list' co-existence with UDP tunnel GRO, allowing the first to take place correctly even for encapsulated UDP traffic - micro-optimize dev_gro_receive() and flow dissection, avoid retpoline overhead on VLAN and TEB GRO - use less memory for sysctls, add a new sysctl type, to allow using u8 instead of "int" and "long" and shrink networking sysctls - veth: allow GRO without XDP - this allows aggregating UDP packets before handing them off to routing, bridge, OvS, etc. - allow specifing ifindex when device is moved to another namespace - netfilter: - nft_socket: add support for cgroupsv2 - nftables: add catch-all set element - special element used to define a default action in case normal lookup missed - use net_generic infra in many modules to avoid allocating per-ns memory unnecessarily - xps: improve the xps handling to avoid potential out-of-bound accesses and use-after-free when XPS change race with other re-configuration under traffic - add a config knob to turn off per-cpu netdev refcnt to catch underflows in testing Device APIs: - add WWAN subsystem to organize the WWAN interfaces better and hopefully start driving towards more unified and vendor- independent APIs - ethtool: - add interface for reading IEEE MIB stats (incl. mlx5 and bnxt support) - allow network drivers to dump arbitrary SFP EEPROM data, current offset+length API was a poor fit for modern SFP which define EEPROM in terms of pages (incl. mlx5 support) - act_police, flow_offload: add support for packet-per-second policing (incl. offload for nfp) - psample: add additional metadata attributes like transit delay for packets sampled from switch HW (and corresponding egress and policy-based sampling in the mlxsw driver) - dsa: improve support for sandwiched LAGs with bridge and DSA - netfilter: - flowtable: use direct xmit in topologies with IP forwarding, bridging, vlans etc. - nftables: counter hardware offload support - Bluetooth: - improvements for firmware download w/ Intel devices - add support for reading AOSP vendor capabilities - add support for virtio transport driver - mac80211: - allow concurrent monitor iface and ethernet rx decap - set priority and queue mapping for injected frames - phy: add support for Clause-45 PHY Loopback - pci/iov: add sysfs MSI-X vector assignment interface to distribute MSI-X resources to VFs (incl. mlx5 support) New hardware/drivers: - dsa: mv88e6xxx: add support for Marvell mv88e6393x - 11-port Ethernet switch with 8x 1-Gigabit Ethernet and 3x 10-Gigabit interfaces. - dsa: support for legacy Broadcom tags used on BCM5325, BCM5365 and BCM63xx switches - Microchip KSZ8863 and KSZ8873; 3x 10/100Mbps Ethernet switches - ath11k: support for QCN9074 a 802.11ax device - Bluetooth: Broadcom BCM4330 and BMC4334 - phy: Marvell 88X2222 transceiver support - mdio: add BCM6368 MDIO mux bus controller - r8152: support RTL8153 and RTL8156 (USB Ethernet) chips - mana: driver for Microsoft Azure Network Adapter (MANA) - Actions Semi Owl Ethernet MAC - can: driver for ETAS ES58X CAN/USB interfaces Pure driver changes: - add XDP support to: enetc, igc, stmmac - add AF_XDP support to: stmmac - virtio: - page_to_skb() use build_skb when there's sufficient tailroom (21% improvement for 1000B UDP frames) - support XDP even without dedicated Tx queues - share the Tx queues with the stack when necessary - mlx5: - flow rules: add support for mirroring with conntrack, matching on ICMP, GTP, flex filters and more - support packet sampling with flow offloads - persist uplink representor netdev across eswitch mode changes - allow coexistence of CQE compression and HW time-stamping - add ethtool extended link error state reporting - ice, iavf: support flow filters, UDP Segmentation Offload - dpaa2-switch: - move the driver out of staging - add spanning tree (STP) support - add rx copybreak support - add tc flower hardware offload on ingress traffic - ionic: - implement Rx page reuse - support HW PTP time-stamping - octeon: support TC hardware offloads - flower matching on ingress and egress ratelimitting. - stmmac: - add RX frame steering based on VLAN priority in tc flower - support frame preemption (FPE) - intel: add cross time-stamping freq difference adjustment - ocelot: - support forwarding of MRP frames in HW - support multiple bridges - support PTP Sync one-step timestamping - dsa: mv88e6xxx, dpaa2-switch: offload bridge port flags like learning, flooding etc. - ipa: add IPA v4.5, v4.9 and v4.11 support (Qualcomm SDX55, SM8350, SC7280 SoCs) - mt7601u: enable TDLS support - mt76: - add support for 802.3 rx frames (mt7915/mt7615) - mt7915 flash pre-calibration support - mt7921/mt7663 runtime power management fixes" * tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2451 commits) net: selftest: fix build issue if INET is disabled net: netrom: nr_in: Remove redundant assignment to ns net: tun: Remove redundant assignment to ret net: phy: marvell: add downshift support for M88E1240 net: dsa: ksz: Make reg_mib_cnt a u8 as it never exceeds 255 net/sched: act_ct: Remove redundant ct get and check icmp: standardize naming of RFC 8335 PROBE constants bpf, selftests: Update array map tests for per-cpu batched ops bpf: Add batched ops support for percpu array bpf: Implement formatted output helpers with bstr_printf seq_file: Add a seq_bprintf function sfc: adjust efx->xdp_tx_queue_count with the real number of initialized queues net:nfc:digital: Fix a double free in digital_tg_recv_dep_req net: fix a concurrency bug in l2tp_tunnel_register() net/smc: Remove redundant assignment to rc mpls: Remove redundant assignment to err llc2: Remove redundant assignment to rc net/tls: Remove redundant initialization of record rds: Remove redundant assignment to nr_sig dt-bindings: net: mdio-gpio: add compatible for microchip,mdio-smi0 ...
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r--drivers/net/hyperv/hyperv_net.h6
-rw-r--r--drivers/net/hyperv/netvsc.c55
-rw-r--r--drivers/net/hyperv/netvsc_drv.c65
3 files changed, 91 insertions, 35 deletions
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 59ac04a610ad..442c520ab8f3 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -269,7 +269,7 @@ int rndis_filter_receive(struct net_device *ndev,
int rndis_filter_set_device_mac(struct netvsc_device *ndev,
const char *mac);
-void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
+int netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
#define NVSP_INVALID_PROTOCOL_VERSION ((u32)0xFFFFFFFF)
@@ -1718,4 +1718,8 @@ struct rndis_message {
#define TRANSPORT_INFO_IPV6_TCP 0x10
#define TRANSPORT_INFO_IPV6_UDP 0x20
+#define RETRY_US_LO 5000
+#define RETRY_US_HI 10000
+#define RETRY_MAX 2000 /* >10 sec */
+
#endif /* _HYPERV_NET_H */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index c64cc7639c39..9d07c9ce4be2 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -31,12 +31,13 @@
* Switch the data path from the synthetic interface to the VF
* interface.
*/
-void netvsc_switch_datapath(struct net_device *ndev, bool vf)
+int netvsc_switch_datapath(struct net_device *ndev, bool vf)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct hv_device *dev = net_device_ctx->device_ctx;
struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
+ int ret, retry = 0;
/* Block sending traffic to VF if it's about to be gone */
if (!vf)
@@ -51,15 +52,41 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
init_pkt->msg.v4_msg.active_dp.active_datapath =
NVSP_DATAPATH_SYNTHETIC;
+again:
trace_nvsp_send(ndev, init_pkt);
- vmbus_sendpacket(dev->channel, init_pkt,
+ ret = vmbus_sendpacket(dev->channel, init_pkt,
sizeof(struct nvsp_message),
- (unsigned long)init_pkt,
- VM_PKT_DATA_INBAND,
+ (unsigned long)init_pkt, VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+ /* If failed to switch to/from VF, let data_path_is_vf stay false,
+ * so we use synthetic path to send data.
+ */
+ if (ret) {
+ if (ret != -EAGAIN) {
+ netdev_err(ndev,
+ "Unable to send sw datapath msg, err: %d\n",
+ ret);
+ return ret;
+ }
+
+ if (retry++ < RETRY_MAX) {
+ usleep_range(RETRY_US_LO, RETRY_US_HI);
+ goto again;
+ } else {
+ netdev_err(
+ ndev,
+ "Retry failed to send sw datapath msg, err: %d\n",
+ ret);
+ return ret;
+ }
+ }
+
wait_for_completion(&nv_dev->channel_init_wait);
net_device_ctx->data_path_is_vf = vf;
+
+ return 0;
}
/* Worker to setup sub channels on initial setup
@@ -1017,6 +1044,26 @@ static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
}
/* RCU already held by caller */
+/* Batching/bouncing logic is designed to attempt to optimize
+ * performance.
+ *
+ * For small, non-LSO packets we copy the packet to a send buffer
+ * which is pre-registered with the Hyper-V side. This enables the
+ * hypervisor to avoid remapping the aperture to access the packet
+ * descriptor and data.
+ *
+ * If we already started using a buffer and the netdev is transmitting
+ * a burst of packets, keep on copying into the buffer until it is
+ * full or we are done collecting a burst. If there is an existing
+ * buffer with space for the RNDIS descriptor but not the packet, copy
+ * the RNDIS descriptor to the buffer, keeping the packet in place.
+ *
+ * If we do batching and send more than one packet using a single
+ * NetVSC message, free the SKBs of the packets copied, except for the
+ * last packet. This is done to streamline the handling of the case
+ * where the last packet only had the RNDIS descriptor copied to the
+ * send buffer, with the data pointers included in the NetVSC message.
+ */
int netvsc_send(struct net_device *ndev,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 15f262b70489..f682a5572d84 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -38,9 +38,6 @@
#include "hyperv_net.h"
#define RING_SIZE_MIN 64
-#define RETRY_US_LO 5000
-#define RETRY_US_HI 10000
-#define RETRY_MAX 2000 /* >10 sec */
#define LINKCHANGE_INT (2 * HZ)
#define VF_TAKEOVER_INT (HZ / 10)
@@ -1612,34 +1609,23 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
switch (stringset) {
case ETH_SS_STATS:
- for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) {
- memcpy(p, netvsc_stats[i].name, ETH_GSTRING_LEN);
- p += ETH_GSTRING_LEN;
- }
+ for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
+ ethtool_sprintf(&p, netvsc_stats[i].name);
- for (i = 0; i < ARRAY_SIZE(vf_stats); i++) {
- memcpy(p, vf_stats[i].name, ETH_GSTRING_LEN);
- p += ETH_GSTRING_LEN;
- }
+ for (i = 0; i < ARRAY_SIZE(vf_stats); i++)
+ ethtool_sprintf(&p, vf_stats[i].name);
for (i = 0; i < nvdev->num_chn; i++) {
- sprintf(p, "tx_queue_%u_packets", i);
- p += ETH_GSTRING_LEN;
- sprintf(p, "tx_queue_%u_bytes", i);
- p += ETH_GSTRING_LEN;
- sprintf(p, "rx_queue_%u_packets", i);
- p += ETH_GSTRING_LEN;
- sprintf(p, "rx_queue_%u_bytes", i);
- p += ETH_GSTRING_LEN;
- sprintf(p, "rx_queue_%u_xdp_drop", i);
- p += ETH_GSTRING_LEN;
+ ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+ ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+ ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+ ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
+ ethtool_sprintf(&p, "rx_queue_%u_xdp_drop", i);
}
for_each_present_cpu(cpu) {
- for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++) {
- sprintf(p, pcpu_stats[i].name, cpu);
- p += ETH_GSTRING_LEN;
- }
+ for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++)
+ ethtool_sprintf(&p, pcpu_stats[i].name, cpu);
}
break;
@@ -2311,6 +2297,7 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
{
struct device *parent = vf_netdev->dev.parent;
struct net_device_context *ndev_ctx;
+ struct net_device *ndev;
struct pci_dev *pdev;
u32 serial;
@@ -2333,8 +2320,17 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
if (!ndev_ctx->vf_alloc)
continue;
- if (ndev_ctx->vf_serial == serial)
- return hv_get_drvdata(ndev_ctx->device_ctx);
+ if (ndev_ctx->vf_serial != serial)
+ continue;
+
+ ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+ if (ndev->addr_len != vf_netdev->addr_len ||
+ memcmp(ndev->perm_addr, vf_netdev->perm_addr,
+ ndev->addr_len) != 0)
+ continue;
+
+ return ndev;
+
}
netdev_notice(vf_netdev,
@@ -2413,6 +2409,7 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
struct netvsc_device *netvsc_dev;
struct net_device *ndev;
bool vf_is_up = false;
+ int ret;
if (event != NETDEV_GOING_DOWN)
vf_is_up = netif_running(vf_netdev);
@@ -2429,9 +2426,17 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
if (net_device_ctx->data_path_is_vf == vf_is_up)
return NOTIFY_OK;
- netvsc_switch_datapath(ndev, vf_is_up);
- netdev_info(ndev, "Data path switched %s VF: %s\n",
- vf_is_up ? "to" : "from", vf_netdev->name);
+ ret = netvsc_switch_datapath(ndev, vf_is_up);
+
+ if (ret) {
+ netdev_err(ndev,
+ "Data path failed to switch %s VF: %s, err: %d\n",
+ vf_is_up ? "to" : "from", vf_netdev->name, ret);
+ return NOTIFY_DONE;
+ } else {
+ netdev_info(ndev, "Data path switched %s VF: %s\n",
+ vf_is_up ? "to" : "from", vf_netdev->name);
+ }
return NOTIFY_OK;
}