From 41a6328b2c55276f89ea3812069fd7521e348bbf Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 12 May 2025 17:06:02 -0700 Subject: hv_netvsc: Preserve contiguous PFN grouping in the page buffer array Starting with commit dca5161f9bd0 ("hv_netvsc: Check status in SEND_RNDIS_PKT completion message") in the 6.3 kernel, the Linux driver for Hyper-V synthetic networking (netvsc) occasionally reports "nvsp_rndis_pkt_complete error status: 2".[1] This error indicates that Hyper-V has rejected a network packet transmit request from the guest, and the outgoing network packet is dropped. Higher level network protocols presumably recover and resend the packet so there is no functional error, but performance is slightly impacted. Commit dca5161f9bd0 is not the cause of the error -- it only added reporting of an error that was already happening without any notice. The error has presumably been present since the netvsc driver was originally introduced into Linux. The root cause of the problem is that the netvsc driver in Linux may send an incorrectly formatted VMBus message to Hyper-V when transmitting the network packet. The incorrect formatting occurs when the rndis header of the VMBus message crosses a page boundary due to how the Linux skb head memory is aligned. In such a case, two PFNs are required to describe the location of the rndis header, even though they are contiguous in guest physical address (GPA) space. Hyper-V requires that two rndis header PFNs be in a single "GPA range" data struture, but current netvsc code puts each PFN in its own GPA range, which Hyper-V rejects as an error. The incorrect formatting occurs only for larger packets that netvsc must transmit via a VMBus "GPA Direct" message. There's no problem when netvsc transmits a smaller packet by copying it into a pre- allocated send buffer slot because the pre-allocated slots don't have page crossing issues. After commit 14ad6ed30a10 ("net: allow small head cache usage with large MAX_SKB_FRAGS values") in the 6.14-rc4 kernel, the error occurs much more frequently in VMs with 16 or more vCPUs. It may occur every few seconds, or even more frequently, in an ssh session that outputs a lot of text. Commit 14ad6ed30a10 subtly changes how skb head memory is allocated, making it much more likely that the rndis header will cross a page boundary when the vCPU count is 16 or more. The changes in commit 14ad6ed30a10 are perfectly valid -- they just had the side effect of making the netvsc bug more prominent. Current code in init_page_array() creates a separate page buffer array entry for each PFN required to identify the data to be transmitted. Contiguous PFNs get separate entries in the page buffer array, and any information about contiguity is lost. Fix the core issue by having init_page_array() construct the page buffer array to represent contiguous ranges rather than individual pages. When these ranges are subsequently passed to netvsc_build_mpb_array(), it can build GPA ranges that contain multiple PFNs, as required to avoid the error "nvsp_rndis_pkt_complete error status: 2". If instead the network packet is sent by copying into a pre-allocated send buffer slot, the copy proceeds using the contiguous ranges rather than individual pages, but the result of the copying is the same. Also fix rndis_filter_send_request() to construct a contiguous range, since it has its own page buffer array. This change has a side benefit in CoCo VMs in that netvsc_dma_map() calls dma_map_single() on each contiguous range instead of on each page. This results in fewer calls to dma_map_single() but on larger chunks of memory, which should reduce contention on the swiotlb. Since the page buffer array now contains one entry for each contiguous range instead of for each individual page, the number of entries in the array can be reduced, saving 208 bytes of stack space in netvsc_xmit() when MAX_SKG_FRAGS has the default value of 17. [1] https://bugzilla.kernel.org/show_bug.cgi?id=217503 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217503 Cc: # 6.1.x Signed-off-by: Michael Kelley Link: https://patch.msgid.link/20250513000604.1396-4-mhklinux@outlook.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc_drv.c | 63 ++++++++++------------------------------- 1 file changed, 15 insertions(+), 48 deletions(-) (limited to 'drivers/net/hyperv/netvsc_drv.c') diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c51b318b8a72..929f6b3de768 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -326,43 +326,10 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, return txq; } -static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len, - struct hv_page_buffer *pb) -{ - int j = 0; - - hvpfn += offset >> HV_HYP_PAGE_SHIFT; - offset = offset & ~HV_HYP_PAGE_MASK; - - while (len > 0) { - unsigned long bytes; - - bytes = HV_HYP_PAGE_SIZE - offset; - if (bytes > len) - bytes = len; - pb[j].pfn = hvpfn; - pb[j].offset = offset; - pb[j].len = bytes; - - offset += bytes; - len -= bytes; - - if (offset == HV_HYP_PAGE_SIZE && len) { - hvpfn++; - offset = 0; - j++; - } - } - - return j + 1; -} - static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, struct hv_netvsc_packet *packet, struct hv_page_buffer *pb) { - u32 slots_used = 0; - char *data = skb->data; int frags = skb_shinfo(skb)->nr_frags; int i; @@ -371,28 +338,28 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, * 2. skb linear data * 3. skb fragment data */ - slots_used += fill_pg_buf(virt_to_hvpfn(hdr), - offset_in_hvpage(hdr), - len, - &pb[slots_used]); + pb[0].offset = offset_in_hvpage(hdr); + pb[0].len = len; + pb[0].pfn = virt_to_hvpfn(hdr); packet->rmsg_size = len; - packet->rmsg_pgcnt = slots_used; + packet->rmsg_pgcnt = 1; - slots_used += fill_pg_buf(virt_to_hvpfn(data), - offset_in_hvpage(data), - skb_headlen(skb), - &pb[slots_used]); + pb[1].offset = offset_in_hvpage(skb->data); + pb[1].len = skb_headlen(skb); + pb[1].pfn = virt_to_hvpfn(skb->data); for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; + struct hv_page_buffer *cur_pb = &pb[i + 2]; + u64 pfn = page_to_hvpfn(skb_frag_page(frag)); + u32 offset = skb_frag_off(frag); - slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)), - skb_frag_off(frag), - skb_frag_size(frag), - &pb[slots_used]); + cur_pb->offset = offset_in_hvpage(offset); + cur_pb->len = skb_frag_size(frag); + cur_pb->pfn = pfn + (offset >> HV_HYP_PAGE_SHIFT); } - return slots_used; + return frags + 2; } static int count_skb_frag_slots(struct sk_buff *skb) @@ -483,7 +450,7 @@ static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx) struct net_device *vf_netdev; u32 rndis_msg_size; u32 hash; - struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; + struct hv_page_buffer pb[MAX_DATA_RANGES]; /* If VF is present and up then redirect packets to it. * Skip the VF if it is marked down or has no carrier. -- cgit v1.2.3 From 5bbc644bbf4e97a05bc0cb052189004588ff8a09 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 12 May 2025 17:06:03 -0700 Subject: hv_netvsc: Remove rmsg_pgcnt init_page_array() now always creates a single page buffer array entry for the rndis message, even if the rndis message crosses a page boundary. As such, the number of page buffer array entries used for the rndis message must no longer be tracked -- it is always just 1. Remove the rmsg_pgcnt field and use "1" where the value is needed. Cc: # 6.1.x Signed-off-by: Michael Kelley Link: https://patch.msgid.link/20250513000604.1396-5-mhklinux@outlook.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/hyperv_net.h | 1 - drivers/net/hyperv/netvsc.c | 7 +++---- drivers/net/hyperv/netvsc_drv.c | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/net/hyperv/netvsc_drv.c') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 76725f25abd5..cb6f5482d203 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -158,7 +158,6 @@ struct hv_netvsc_packet { u8 cp_partial; /* partial copy into send buffer */ u8 rmsg_size; /* RNDIS header and PPI size */ - u8 rmsg_pgcnt; /* page count of RNDIS header and PPI */ u8 page_buf_cnt; u16 q_idx; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 5882d6670200..720104661d7f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -953,8 +953,7 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, + pend_size; int i; u32 padding = 0; - u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : - packet->page_buf_cnt; + u32 page_count = packet->cp_partial ? 1 : packet->page_buf_cnt; u32 remain; /* Add padding */ @@ -1137,7 +1136,7 @@ static inline int netvsc_send_pkt( u32 desc_size; if (packet->cp_partial) - pb += packet->rmsg_pgcnt; + pb++; ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb); if (ret) { @@ -1299,7 +1298,7 @@ int netvsc_send(struct net_device *ndev, packet->send_buf_index = section_index; if (packet->cp_partial) { - packet->page_buf_cnt -= packet->rmsg_pgcnt; + packet->page_buf_cnt--; packet->total_data_buflen = msd_len + packet->rmsg_size; } else { packet->page_buf_cnt = 0; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 929f6b3de768..d8b169ac0343 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -343,7 +343,6 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, pb[0].len = len; pb[0].pfn = virt_to_hvpfn(hdr); packet->rmsg_size = len; - packet->rmsg_pgcnt = 1; pb[1].offset = offset_in_hvpage(skb->data); pb[1].len = skb_headlen(skb); -- cgit v1.2.3 From 9ca6804ab7c34f65fcf2e29333a39e7807c30b60 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 May 2025 13:46:14 -0700 Subject: net: core: Convert dev_set_mac_address() to struct sockaddr_storage All users of dev_set_mac_address() are now using a struct sockaddr_storage. Convert the internal data type to struct sockaddr_storage, drop the casts, and update pointer types. Acked-by: Gustavo A. R. Silva Signed-off-by: Kees Cook Link: https://patch.msgid.link/20250521204619.2301870-6-kees@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_alb.c | 8 +++----- drivers/net/bonding/bond_main.c | 15 ++++++--------- drivers/net/hyperv/netvsc_drv.c | 6 +++--- drivers/net/macvlan.c | 18 +++++++++--------- drivers/net/team/team_core.c | 2 +- drivers/net/usb/r8152.c | 2 +- include/linux/netdevice.h | 2 +- net/core/dev.c | 1 + net/core/dev_api.c | 6 +++--- net/ieee802154/nl-phy.c | 2 +- net/ncsi/ncsi-manage.c | 2 +- 11 files changed, 30 insertions(+), 34 deletions(-) (limited to 'drivers/net/hyperv/netvsc_drv.c') diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 7edf0fd58c34..2d37b07c8215 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1035,7 +1035,7 @@ static int alb_set_slave_mac_addr(struct slave *slave, const u8 addr[], */ memcpy(ss.__data, addr, len); ss.ss_family = dev->type; - if (dev_set_mac_address(dev, (struct sockaddr *)&ss, NULL)) { + if (dev_set_mac_address(dev, &ss, NULL)) { slave_err(slave->bond->dev, dev, "dev_set_mac_address on slave failed! ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n"); return -EOPNOTSUPP; } @@ -1273,8 +1273,7 @@ unwind: break; bond_hw_addr_copy(tmp_addr, rollback_slave->dev->dev_addr, rollback_slave->dev->addr_len); - dev_set_mac_address(rollback_slave->dev, - (struct sockaddr *)&ss, NULL); + dev_set_mac_address(rollback_slave->dev, &ss, NULL); dev_addr_set(rollback_slave->dev, tmp_addr); } @@ -1763,8 +1762,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave bond->dev->addr_len); ss.ss_family = bond->dev->type; /* we don't care if it can't change its mac, best effort */ - dev_set_mac_address(new_slave->dev, (struct sockaddr *)&ss, - NULL); + dev_set_mac_address(new_slave->dev, &ss, NULL); dev_addr_set(new_slave->dev, tmp_addr); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 98cf4486fcee..c4d53e8e7c15 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1112,8 +1112,7 @@ static void bond_do_fail_over_mac(struct bonding *bond, ss.ss_family = bond->dev->type; } - rv = dev_set_mac_address(new_active->dev, - (struct sockaddr *)&ss, NULL); + rv = dev_set_mac_address(new_active->dev, &ss, NULL); if (rv) { slave_err(bond->dev, new_active->dev, "Error %d setting MAC of new active slave\n", -rv); @@ -1127,8 +1126,7 @@ static void bond_do_fail_over_mac(struct bonding *bond, new_active->dev->addr_len); ss.ss_family = old_active->dev->type; - rv = dev_set_mac_address(old_active->dev, - (struct sockaddr *)&ss, NULL); + rv = dev_set_mac_address(old_active->dev, &ss, NULL); if (rv) slave_err(bond->dev, old_active->dev, "Error %d setting MAC of old active slave\n", -rv); @@ -2127,7 +2125,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, } ss.ss_family = slave_dev->type; - res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, extack); + res = dev_set_mac_address(slave_dev, &ss, extack); if (res) { slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res); goto err_restore_mtu; @@ -2455,7 +2453,7 @@ err_restore_mac: bond_hw_addr_copy(ss.__data, new_slave->perm_hwaddr, new_slave->dev->addr_len); ss.ss_family = slave_dev->type; - dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL); + dev_set_mac_address(slave_dev, &ss, NULL); } err_restore_mtu: @@ -2649,7 +2647,7 @@ static int __bond_release_one(struct net_device *bond_dev, bond_hw_addr_copy(ss.__data, slave->perm_hwaddr, slave->dev->addr_len); ss.ss_family = slave_dev->type; - dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL); + dev_set_mac_address(slave_dev, &ss, NULL); } if (unregister) { @@ -4936,8 +4934,7 @@ unwind: if (rollback_slave == slave) break; - tmp_res = dev_set_mac_address(rollback_slave->dev, - (struct sockaddr *)&tmp_ss, NULL); + tmp_res = dev_set_mac_address(rollback_slave->dev, &tmp_ss, NULL); if (tmp_res) { slave_dbg(bond_dev, rollback_slave->dev, "%s: unwind err %d\n", __func__, tmp_res); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index d8b169ac0343..14a0d04e21ae 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1371,7 +1371,7 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p) struct net_device_context *ndc = netdev_priv(ndev); struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev); struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); - struct sockaddr *addr = p; + struct sockaddr_storage *addr = p; int err; err = eth_prepare_mac_addr_change(ndev, p); @@ -1387,12 +1387,12 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p) return err; } - err = rndis_filter_set_device_mac(nvdev, addr->sa_data); + err = rndis_filter_set_device_mac(nvdev, addr->__data); if (!err) { eth_commit_mac_addr_change(ndev, p); } else if (vf_netdev) { /* rollback change on VF */ - memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN); + memcpy(addr->__data, ndev->dev_addr, ETH_ALEN); dev_set_mac_address(vf_netdev, addr, NULL); } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 7045b1d58754..4df991e494bd 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -754,13 +754,13 @@ static int macvlan_sync_address(struct net_device *dev, static int macvlan_set_mac_address(struct net_device *dev, void *p) { struct macvlan_dev *vlan = netdev_priv(dev); - struct sockaddr *addr = p; + struct sockaddr_storage *addr = p; - if (!is_valid_ether_addr(addr->sa_data)) + if (!is_valid_ether_addr(addr->__data)) return -EADDRNOTAVAIL; /* If the addresses are the same, this is a no-op */ - if (ether_addr_equal(dev->dev_addr, addr->sa_data)) + if (ether_addr_equal(dev->dev_addr, addr->__data)) return 0; if (vlan->mode == MACVLAN_MODE_PASSTHRU) { @@ -768,10 +768,10 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p) return dev_set_mac_address(vlan->lowerdev, addr, NULL); } - if (macvlan_addr_busy(vlan->port, addr->sa_data)) + if (macvlan_addr_busy(vlan->port, addr->__data)) return -EADDRINUSE; - return macvlan_sync_address(dev, addr->sa_data); + return macvlan_sync_address(dev, addr->__data); } static void macvlan_change_rx_flags(struct net_device *dev, int change) @@ -1295,11 +1295,11 @@ static void macvlan_port_destroy(struct net_device *dev) */ if (macvlan_passthru(port) && !ether_addr_equal(port->dev->dev_addr, port->perm_addr)) { - struct sockaddr sa; + struct sockaddr_storage ss; - sa.sa_family = port->dev->type; - memcpy(&sa.sa_data, port->perm_addr, port->dev->addr_len); - dev_set_mac_address(port->dev, &sa, NULL); + ss.ss_family = port->dev->type; + memcpy(&ss.__data, port->perm_addr, port->dev->addr_len); + dev_set_mac_address(port->dev, &ss, NULL); } kfree(port); diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index b75ceb90359f..8bc56186b2a3 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -55,7 +55,7 @@ static int __set_port_dev_addr(struct net_device *port_dev, memcpy(addr.__data, dev_addr, port_dev->addr_len); addr.ss_family = port_dev->type; - return dev_set_mac_address(port_dev, (struct sockaddr *)&addr, NULL); + return dev_set_mac_address(port_dev, &addr, NULL); } static int team_port_set_orig_dev_addr(struct team_port *port) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index b18dee1b1bb3..d6589b24c68d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -8432,7 +8432,7 @@ static int rtl8152_post_reset(struct usb_interface *intf) /* reset the MAC address in case of policy change */ if (determine_ethernet_addr(tp, &ss) >= 0) - dev_set_mac_address(tp->netdev, (struct sockaddr *)&ss, NULL); + dev_set_mac_address(tp->netdev, &ss, NULL); netdev = tp->netdev; if (!netif_running(netdev)) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 47200a394a02..b4242b997373 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4214,7 +4214,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); -int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, +int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); diff --git a/net/core/dev.c b/net/core/dev.c index 6a0560546eeb..2b514d95c528 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9697,6 +9697,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, DECLARE_RWSEM(dev_addr_sem); +/* "sa" is a true struct sockaddr with limited "sa_data" member. */ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { size_t size = sizeof(sa->sa_data_min); diff --git a/net/core/dev_api.c b/net/core/dev_api.c index b5f293e637d9..6011a5ef649d 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -319,20 +319,20 @@ EXPORT_SYMBOL(dev_set_allmulti); /** * dev_set_mac_address() - change Media Access Control Address * @dev: device - * @sa: new address + * @ss: new address * @extack: netlink extended ack * * Change the hardware (MAC) address of the device * * Return: 0 on success, -errno on failure. */ -int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, +int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack) { int ret; netdev_lock_ops(dev); - ret = netif_set_mac_address(dev, (struct sockaddr_storage *)sa, extack); + ret = netif_set_mac_address(dev, ss, extack); netdev_unlock_ops(dev); return ret; diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index ee2b190e8e0d..4c07a475c567 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -234,7 +234,7 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) * dev_set_mac_address require RTNL_LOCK */ rtnl_lock(); - rc = dev_set_mac_address(dev, (struct sockaddr *)&addr, NULL); + rc = dev_set_mac_address(dev, &addr, NULL); rtnl_unlock(); if (rc) goto dev_unregister; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 0202db2aea3e..b36947063783 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1058,7 +1058,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) break; case ncsi_dev_state_config_apply_mac: rtnl_lock(); - ret = dev_set_mac_address(dev, (struct sockaddr *)&ndp->pending_mac, NULL); + ret = dev_set_mac_address(dev, &ndp->pending_mac, NULL); rtnl_unlock(); if (ret < 0) netdev_warn(dev, "NCSI: 'Writing MAC address to device failed\n"); -- cgit v1.2.3 From 3ec523304976648b45a3eef045e97d17122ff1b2 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Thu, 29 May 2025 03:18:30 -0700 Subject: hv_netvsc: fix potential deadlock in netvsc_vf_setxdp() The MANA driver's probe registers netdevice via the following call chain: mana_probe() register_netdev() register_netdevice() register_netdevice() calls notifier callback for netvsc driver, holding the netdev mutex via netdev_lock_ops(). Further this netvsc notifier callback end up attempting to acquire the same lock again in dev_xdp_propagate() leading to deadlock. netvsc_netdev_event() netvsc_vf_setxdp() dev_xdp_propagate() This deadlock was not observed so far because net_shaper_ops was never set, and thus the lock was effectively a no-op in this case. Fix this by using netif_xdp_propagate() instead of dev_xdp_propagate() to avoid recursive locking in this path. And, since no deadlock is observed on the other path which is via netvsc_probe, add the lock exclusivly for that path. Also, clean up the unregistration path by removing the unnecessary call to netvsc_vf_setxdp(), since unregister_netdevice_many_notify() already performs this cleanup via dev_xdp_uninstall(). Fixes: 97246d6d21c2 ("net: hold netdev instance lock during ndo_bpf") Cc: stable@vger.kernel.org Signed-off-by: Saurabh Sengar Tested-by: Erni Sri Satya Vennela Reviewed-by: Haiyang Zhang Reviewed-by: Subbaraya Sundeep Link: https://patch.msgid.link/1748513910-23963-1-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc_bpf.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 4 ++-- net/core/dev.c | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/net/hyperv/netvsc_drv.c') diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c index e01c5997a551..1dd3755d9e6d 100644 --- a/drivers/net/hyperv/netvsc_bpf.c +++ b/drivers/net/hyperv/netvsc_bpf.c @@ -183,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) xdp.command = XDP_SETUP_PROG; xdp.prog = prog; - ret = dev_xdp_propagate(vf_netdev, &xdp); + ret = netif_xdp_propagate(vf_netdev, &xdp); if (ret && prog) bpf_prog_put(prog); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 14a0d04e21ae..c41a025c66f0 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2462,8 +2462,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - netvsc_vf_setxdp(vf_netdev, NULL); - reinit_completion(&net_device_ctx->vf_add); netdev_rx_handler_unregister(vf_netdev); netdev_upper_dev_unlink(vf_netdev, ndev); @@ -2631,7 +2629,9 @@ static int netvsc_probe(struct hv_device *dev, continue; netvsc_prepare_bonding(vf_netdev); + netdev_lock_ops(vf_netdev); netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); + netdev_unlock_ops(vf_netdev); __netvsc_vf_setup(net, vf_netdev); break; } diff --git a/net/core/dev.c b/net/core/dev.c index 2b514d95c528..a388f459a366 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9968,6 +9968,7 @@ int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) return dev->netdev_ops->ndo_bpf(dev, bpf); } +EXPORT_SYMBOL_GPL(netif_xdp_propagate); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { -- cgit v1.2.3