diff options
73 files changed, 679 insertions, 551 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 34738d054f8d..0bf83d50aaba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2523,7 +2523,7 @@ S: Supported F: drivers/scsi/esas2r ATUSB IEEE 802.15.4 RADIO DRIVER -M: Stefan Schmidt <stefan@osg.samsung.com> +M: Stefan Schmidt <stefan@datenfreihafen.org> L: linux-wpan@vger.kernel.org S: Maintained F: drivers/net/ieee802154/atusb.c @@ -6908,7 +6908,7 @@ F: drivers/clk/clk-versaclock5.c IEEE 802.15.4 SUBSYSTEM M: Alexander Aring <alex.aring@gmail.com> -M: Stefan Schmidt <stefan@osg.samsung.com> +M: Stefan Schmidt <stefan@datenfreihafen.org> L: linux-wpan@vger.kernel.org W: http://wpan.cakelab.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan.git diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index fc7383106946..91eb8910b1c9 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -63,8 +63,6 @@ #define AQ_CFG_NAPI_WEIGHT 64U -#define AQ_CFG_MULTICAST_ADDRESS_MAX 32U - /*#define AQ_CFG_MAC_ADDR_PERMANENT {0x30, 0x0E, 0xE3, 0x12, 0x34, 0x56}*/ #define AQ_NIC_FC_OFF 0U diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index a2d416b24ffc..2c6ebd91a9f2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -98,6 +98,8 @@ struct aq_stats_s { #define AQ_HW_MEDIA_TYPE_TP 1U #define AQ_HW_MEDIA_TYPE_FIBRE 2U +#define AQ_HW_MULTICAST_ADDRESS_MAX 32U + struct aq_hw_s { atomic_t flags; u8 rbl_enabled:1; @@ -177,7 +179,7 @@ struct aq_hw_ops { unsigned int packet_filter); int (*hw_multicast_list_set)(struct aq_hw_s *self, - u8 ar_mac[AQ_CFG_MULTICAST_ADDRESS_MAX] + u8 ar_mac[AQ_HW_MULTICAST_ADDRESS_MAX] [ETH_ALEN], u32 count); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index ba5fe8c4125d..e3ae29e523f0 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -135,17 +135,10 @@ err_exit: static void aq_ndev_set_multicast_settings(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); - int err = 0; - err = aq_nic_set_packet_filter(aq_nic, ndev->flags); - if (err < 0) - return; + aq_nic_set_packet_filter(aq_nic, ndev->flags); - if (netdev_mc_count(ndev)) { - err = aq_nic_set_multicast_list(aq_nic, ndev); - if (err < 0) - return; - } + aq_nic_set_multicast_list(aq_nic, ndev); } static const struct net_device_ops aq_ndev_ops = { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 1a1a6380c128..7a22d0257e04 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -563,34 +563,41 @@ err_exit: int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev) { + unsigned int packet_filter = self->packet_filter; struct netdev_hw_addr *ha = NULL; unsigned int i = 0U; - self->mc_list.count = 0U; - - netdev_for_each_mc_addr(ha, ndev) { - ether_addr_copy(self->mc_list.ar[i++], ha->addr); - ++self->mc_list.count; + self->mc_list.count = 0; + if (netdev_uc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) { + packet_filter |= IFF_PROMISC; + } else { + netdev_for_each_uc_addr(ha, ndev) { + ether_addr_copy(self->mc_list.ar[i++], ha->addr); - if (i >= AQ_CFG_MULTICAST_ADDRESS_MAX) - break; + if (i >= AQ_HW_MULTICAST_ADDRESS_MAX) + break; + } } - if (i >= AQ_CFG_MULTICAST_ADDRESS_MAX) { - /* Number of filters is too big: atlantic does not support this. - * Force all multi filter to support this. - * With this we disable all UC filters and setup "all pass" - * multicast mask - */ - self->packet_filter |= IFF_ALLMULTI; - self->aq_nic_cfg.mc_list_count = 0; - return self->aq_hw_ops->hw_packet_filter_set(self->aq_hw, - self->packet_filter); + if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) { + packet_filter |= IFF_ALLMULTI; } else { - return self->aq_hw_ops->hw_multicast_list_set(self->aq_hw, - self->mc_list.ar, - self->mc_list.count); + netdev_for_each_mc_addr(ha, ndev) { + ether_addr_copy(self->mc_list.ar[i++], ha->addr); + + if (i >= AQ_HW_MULTICAST_ADDRESS_MAX) + break; + } + } + + if (i > 0 && i < AQ_HW_MULTICAST_ADDRESS_MAX) { + packet_filter |= IFF_MULTICAST; + self->mc_list.count = i; + self->aq_hw_ops->hw_multicast_list_set(self->aq_hw, + self->mc_list.ar, + self->mc_list.count); } + return aq_nic_set_packet_filter(self, packet_filter); } int aq_nic_set_mtu(struct aq_nic_s *self, int new_mtu) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index faa533a0ec47..fecfc401f95d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -75,7 +75,7 @@ struct aq_nic_s { struct aq_hw_link_status_s link_status; struct { u32 count; - u8 ar[AQ_CFG_MULTICAST_ADDRESS_MAX][ETH_ALEN]; + u8 ar[AQ_HW_MULTICAST_ADDRESS_MAX][ETH_ALEN]; } mc_list; struct pci_dev *pdev; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index 67e2f9fb9402..8cc6abadc03b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -765,7 +765,7 @@ static int hw_atl_a0_hw_packet_filter_set(struct aq_hw_s *self, static int hw_atl_a0_hw_multicast_list_set(struct aq_hw_s *self, u8 ar_mac - [AQ_CFG_MULTICAST_ADDRESS_MAX] + [AQ_HW_MULTICAST_ADDRESS_MAX] [ETH_ALEN], u32 count) { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 819f6bcf9b4e..956860a69797 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -784,7 +784,7 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, static int hw_atl_b0_hw_multicast_list_set(struct aq_hw_s *self, u8 ar_mac - [AQ_CFG_MULTICAST_ADDRESS_MAX] + [AQ_HW_MULTICAST_ADDRESS_MAX] [ETH_ALEN], u32 count) { @@ -812,7 +812,7 @@ static int hw_atl_b0_hw_multicast_list_set(struct aq_hw_s *self, hw_atl_rpfl2_uc_flr_en_set(self, (self->aq_nic_cfg->is_mc_list_enabled), - HW_ATL_B0_MAC_MIN + i); + HW_ATL_B0_MAC_MIN + i); } err = aq_hw_err_from_flags(self); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 176fc9f4d7de..4394c1162be4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5712,7 +5712,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) } vnic->uc_filter_count = 1; - vnic->rx_mask = CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; + vnic->rx_mask = 0; + if (bp->dev->flags & IFF_BROADCAST) + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; if ((bp->dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp)) vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; @@ -5917,7 +5919,7 @@ unsigned int bnxt_get_max_func_irqs(struct bnxt *bp) return min_t(unsigned int, hw_resc->max_irqs, hw_resc->max_cp_rings); } -void bnxt_set_max_func_irqs(struct bnxt *bp, unsigned int max_irqs) +static void bnxt_set_max_func_irqs(struct bnxt *bp, unsigned int max_irqs) { bp->hw_resc.max_irqs = max_irqs; } @@ -6888,7 +6890,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) rc = bnxt_request_irq(bp); if (rc) { netdev_err(bp->dev, "bnxt_request_irq err: %x\n", rc); - goto open_err; + goto open_err_irq; } } @@ -6928,6 +6930,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) open_err: bnxt_debug_dev_exit(bp); bnxt_disable_napi(bp); + +open_err_irq: bnxt_del_napi(bp); open_err_free_mem: @@ -7214,13 +7218,16 @@ static void bnxt_set_rx_mode(struct net_device *dev) mask &= ~(CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS | CFA_L2_SET_RX_MASK_REQ_MASK_MCAST | - CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST); + CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST | + CFA_L2_SET_RX_MASK_REQ_MASK_BCAST); if ((dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp)) mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; uc_update = bnxt_uc_list_updated(bp); + if (dev->flags & IFF_BROADCAST) + mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; if (dev->flags & IFF_ALLMULTI) { mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; vnic->mc_list_count = 0; @@ -8502,11 +8509,11 @@ int bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, bool shared) int rx, tx, cp; _bnxt_get_max_rings(bp, &rx, &tx, &cp); + *max_rx = rx; + *max_tx = tx; if (!rx || !tx || !cp) return -ENOMEM; - *max_rx = rx; - *max_tx = tx; return bnxt_trim_rings(bp, max_rx, max_tx, cp, shared); } @@ -8520,8 +8527,11 @@ static int bnxt_get_dflt_rings(struct bnxt *bp, int *max_rx, int *max_tx, /* Not enough rings, try disabling agg rings. */ bp->flags &= ~BNXT_FLAG_AGG_RINGS; rc = bnxt_get_max_rings(bp, max_rx, max_tx, shared); - if (rc) + if (rc) { + /* set BNXT_FLAG_AGG_RINGS back for consistency */ + bp->flags |= BNXT_FLAG_AGG_RINGS; return rc; + } bp->flags |= BNXT_FLAG_NO_AGG_RINGS; bp->dev->hw_features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW); bp->dev->features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 9b14eb610b9f..91575ef97c8c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1470,7 +1470,6 @@ void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max); unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp); void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max); unsigned int bnxt_get_max_func_irqs(struct bnxt *bp); -void bnxt_set_max_func_irqs(struct bnxt *bp, unsigned int max); int bnxt_get_avail_msix(struct bnxt *bp, int num); int bnxt_reserve_rings(struct bnxt *bp); void bnxt_tx_disable(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 795f45024c20..491bd40a254d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -27,6 +27,15 @@ #define BNXT_FID_INVALID 0xffff #define VLAN_TCI(vid, prio) ((vid) | ((prio) << VLAN_PRIO_SHIFT)) +#define is_vlan_pcp_wildcarded(vlan_tci_mask) \ + ((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == 0x0000) +#define is_vlan_pcp_exactmatch(vlan_tci_mask) \ + ((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == VLAN_PRIO_MASK) +#define is_vlan_pcp_zero(vlan_tci) \ + ((ntohs(vlan_tci) & VLAN_PRIO_MASK) == 0x0000) +#define is_vid_exactmatch(vlan_tci_mask) \ + ((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK) + /* Return the dst fid of the func for flow forwarding * For PFs: src_fid is the fid of the PF * For VF-reps: src_fid the fid of the VF @@ -389,6 +398,21 @@ static bool is_exactmatch(void *mask, int len) return true; } +static bool is_vlan_tci_allowed(__be16 vlan_tci_mask, + __be16 vlan_tci) +{ + /* VLAN priority must be either exactly zero or fully wildcarded and + * VLAN id must be exact match. + */ + if (is_vid_exactmatch(vlan_tci_mask) && + ((is_vlan_pcp_exactmatch(vlan_tci_mask) && + is_vlan_pcp_zero(vlan_tci)) || + is_vlan_pcp_wildcarded(vlan_tci_mask))) + return true; + + return false; +} + static bool bits_set(void *key, int len) { const u8 *p = key; @@ -803,9 +827,9 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow) /* Currently VLAN fields cannot be partial wildcard */ if (bits_set(&flow->l2_key.inner_vlan_tci, sizeof(flow->l2_key.inner_vlan_tci)) && - !is_exactmatch(&flow->l2_mask.inner_vlan_tci, - sizeof(flow->l2_mask.inner_vlan_tci))) { - netdev_info(bp->dev, "Wildcard match unsupported for VLAN TCI\n"); + !is_vlan_tci_allowed(flow->l2_mask.inner_vlan_tci, + flow->l2_key.inner_vlan_tci)) { + netdev_info(bp->dev, "Unsupported VLAN TCI\n"); return false; } if (bits_set(&flow->l2_key.inner_vlan_tpid, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 347e4f946eb2..840f6e505f73 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -169,7 +169,6 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id, edev->ulp_tbl[ulp_id].msix_requested = avail_msix; } bnxt_fill_msix_vecs(bp, ent); - bnxt_set_max_func_irqs(bp, bnxt_get_max_func_irqs(bp) - avail_msix); bnxt_set_max_func_cp_rings(bp, max_cp_rings - avail_msix); edev->flags |= BNXT_EN_FLAG_MSIX_REQUESTED; return avail_msix; @@ -192,7 +191,6 @@ static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id) msix_requested = edev->ulp_tbl[ulp_id].msix_requested; bnxt_set_max_func_cp_rings(bp, max_cp_rings + msix_requested); edev->ulp_tbl[ulp_id].msix_requested = 0; - bnxt_set_max_func_irqs(bp, bnxt_get_max_func_irqs(bp) + msix_requested); edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED; if (netif_running(dev)) { bnxt_close_nic(bp, true, false); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 86659823b259..3d45f4c92cf6 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -166,6 +166,7 @@ #define GEM_DCFG6 0x0294 /* Design Config 6 */ #define GEM_DCFG7 0x0298 /* Design Config 7 */ #define GEM_DCFG8 0x029C /* Design Config 8 */ +#define GEM_DCFG10 0x02A4 /* Design Config 10 */ #define GEM_TXBDCTRL 0x04cc /* TX Buffer Descriptor control register */ #define GEM_RXBDCTRL 0x04d0 /* RX Buffer Descriptor control register */ @@ -490,6 +491,12 @@ #define GEM_SCR2CMP_OFFSET 0 #define GEM_SCR2CMP_SIZE 8 +/* Bitfields in DCFG10 */ +#define GEM_TXBD_RDBUFF_OFFSET 12 +#define GEM_TXBD_RDBUFF_SIZE 4 +#define GEM_RXBD_RDBUFF_OFFSET 8 +#define GEM_RXBD_RDBUFF_SIZE 4 + /* Bitfields in TISUBN */ #define GEM_SUBNSINCR_OFFSET 0 #define GEM_SUBNSINCR_SIZE 16 @@ -635,6 +642,7 @@ #define MACB_CAPS_USRIO_DISABLED 0x00000010 #define MACB_CAPS_JUMBO 0x00000020 #define MACB_CAPS_GEM_HAS_PTP 0x00000040 +#define MACB_CAPS_BD_RD_PREFETCH 0x00000080 #define MACB_CAPS_FIFO_MODE 0x10000000 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 @@ -1203,6 +1211,9 @@ struct macb { unsigned int max_tuples; struct tasklet_struct hresp_err_tasklet; + + int rx_bd_rd_prefetch; + int tx_bd_rd_prefetch; }; #ifdef CONFIG_MACB_USE_HWSTAMP diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 96cc03a6d942..a6c911bb5ce2 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1811,23 +1811,25 @@ static void macb_free_consistent(struct macb *bp) { struct macb_queue *queue; unsigned int q; + int size; - queue = &bp->queues[0]; bp->macbgem_ops.mog_free_rx_buffers(bp); - if (queue->rx_ring) { - dma_free_coherent(&bp->pdev->dev, RX_RING_BYTES(bp), - queue->rx_ring, queue->rx_ring_dma); - queue->rx_ring = NULL; - } for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { kfree(queue->tx_skb); queue->tx_skb = NULL; if (queue->tx_ring) { - dma_free_coherent(&bp->pdev->dev, TX_RING_BYTES(bp), + size = TX_RING_BYTES(bp) + bp->tx_bd_rd_prefetch; + dma_free_coherent(&bp->pdev->dev, size, queue->tx_ring, queue->tx_ring_dma); queue->tx_ring = NULL; } + if (queue->rx_ring) { + size = RX_RING_BYTES(bp) + bp->rx_bd_rd_prefetch; + dma_free_coherent(&bp->pdev->dev, size, + queue->rx_ring, queue->rx_ring_dma); + queue->rx_ring = NULL; + } } } @@ -1874,7 +1876,7 @@ static int macb_alloc_consistent(struct macb *bp) int size; for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - size = TX_RING_BYTES(bp); + size = TX_RING_BYTES(bp) + bp->tx_bd_rd_prefetch; queue->tx_ring = dma_alloc_coherent(&bp->pdev->dev, size, &queue->tx_ring_dma, GFP_KERNEL); @@ -1890,7 +1892,7 @@ static int macb_alloc_consistent(struct macb *bp) if (!queue->tx_skb) goto out_err; - size = RX_RING_BYTES(bp); + size = RX_RING_BYTES(bp) + bp->rx_bd_rd_prefetch; queue->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size, &queue->rx_ring_dma, GFP_KERNEL); if (!queue->rx_ring) @@ -3797,7 +3799,7 @@ static const struct macb_config np4_config = { static const struct macb_config zynqmp_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | - MACB_CAPS_GEM_HAS_PTP, + MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, @@ -3858,7 +3860,7 @@ static int macb_probe(struct platform_device *pdev) void __iomem *mem; const char *mac; struct macb *bp; - int err; + int err, val; regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); mem = devm_ioremap_resource(&pdev->dev, regs); @@ -3947,6 +3949,18 @@ static int macb_probe(struct platform_device *pdev) else dev->max_mtu = ETH_DATA_LEN; + if (bp->caps & MACB_CAPS_BD_RD_PREFETCH) { + val = GEM_BFEXT(RXBD_RDBUFF, gem_readl(bp, DCFG10)); + if (val) + bp->rx_bd_rd_prefetch = (2 << (val - 1)) * + macb_dma_desc_get_size(bp); + + val = GEM_BFEXT(TXBD_RDBUFF, gem_readl(bp, DCFG10)); + if (val) + bp->tx_bd_rd_prefetch = (2 << (val - 1)) * + macb_dma_desc_get_size(bp); + } + mac = of_get_mac_address(np); if (mac) { ether_addr_copy(bp->dev->dev_addr, mac); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 8a815bb57177..7e8454d3b1ad 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -91,6 +91,9 @@ static int octeon_console_debug_enabled(u32 console) */ #define LIO_SYNC_OCTEON_TIME_INTERVAL_MS 60000 +/* time to wait for possible in-flight requests in milliseconds */ +#define WAIT_INFLIGHT_REQUEST msecs_to_jiffies(1000) + struct lio_trusted_vf_ctx { struct completion complete; int status; @@ -259,7 +262,7 @@ static inline void pcierror_quiesce_device(struct octeon_device *oct) force_io_queues_off(oct); /* To allow for in-flight requests */ - schedule_timeout_uninterruptible(100); + schedule_timeout_uninterruptible(WAIT_INFLIGHT_REQUEST); if (wait_for_pending_requests(oct)) dev_err(&oct->pci_dev->dev, "There were pending requests\n"); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 974a868a4824..3720c3e11ebb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -8702,7 +8702,7 @@ static int t4_get_flash_params(struct adapter *adap) }; unsigned int part, manufacturer; - unsigned int density, size; + unsigned int density, size = 0; u32 flashid = 0; int ret; @@ -8772,11 +8772,6 @@ static int t4_get_flash_params(struct adapter *adap) case 0x22: /* 256MB */ size = 1 << 28; break; - - default: - dev_err(adap->pdev_dev, "Micron Flash Part has bad size, ID = %#x, Density code = %#x\n", - flashid, density); - return -EINVAL; } break; } @@ -8792,10 +8787,6 @@ static int t4_get_flash_params(struct adapter *adap) case 0x17: /* 64MB */ size = 1 << 26; break; - default: - dev_err(adap->pdev_dev, "ISSI Flash Part has bad size, ID = %#x, Density code = %#x\n", - flashid, density); - return -EINVAL; } break; } @@ -8811,10 +8802,6 @@ static int t4_get_flash_params(struct adapter *adap) case 0x18: /* 16MB */ size = 1 << 24; break; - default: - dev_err(adap->pdev_dev, "Macronix Flash Part has bad size, ID = %#x, Density code = %#x\n", - flashid, density); - return -EINVAL; } break; } @@ -8830,17 +8817,21 @@ static int t4_get_flash_params(struct adapter *adap) case 0x18: /* 16MB */ size = 1 << 24; break; - default: - dev_err(adap->pdev_dev, "Winbond Flash Part has bad size, ID = %#x, Density code = %#x\n", - flashid, density); - return -EINVAL; } break; } - default: - dev_err(adap->pdev_dev, "Unsupported Flash Part, ID = %#x\n", - flashid); - return -EINVAL; + } + + /* If we didn't recognize the FLASH part, that's no real issue: the + * Hardware/Software contract says that Hardware will _*ALWAYS*_ + * use a FLASH part which is at least 4MB in size and has 64KB + * sectors. The unrecognized FLASH part is likely to be much larger + * than 4MB, but that's all we really need. + */ + if (size == 0) { + dev_warn(adap->pdev_dev, "Unknown Flash Part, ID = %#x, assuming 4MB\n", + flashid); + size = 1 << 22; } /* Store decoded Flash size and fall through into vetting code. */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index a14e48489029..4340c4c90bcb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -6723,7 +6723,7 @@ static enum dbg_status qed_parse_mcp_trace_buf(u8 *trace_buf, format_idx = header & MFW_TRACE_EVENTID_MASK; /* Skip message if its index doesn't exist in the meta data */ - if (format_idx > s_mcp_trace_meta.formats_num) { + if (format_idx >= s_mcp_trace_meta.formats_num) { u8 format_size = (u8)((header & MFW_TRACE_PRM_SIZE_MASK) >> MFW_TRACE_PRM_SIZE_SHIFT); diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 0cbc74d6ca8b..758a9a5127fa 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -371,7 +371,7 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev, goto err2; } - DP_INFO(cdev, "qed_probe completed successffuly\n"); + DP_INFO(cdev, "qed_probe completed successfully\n"); return cdev; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 68f122140966..0d811c02ff34 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -980,6 +980,13 @@ static void ravb_adjust_link(struct net_device *ndev) struct ravb_private *priv = netdev_priv(ndev); struct phy_device *phydev = ndev->phydev; bool new_state = false; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + /* Disable TX and RX right over here, if E-MAC change is ignored */ + if (priv->no_avb_link) + ravb_rcv_snd_disable(ndev); if (phydev->link) { if (phydev->duplex != priv->duplex) { @@ -997,18 +1004,21 @@ static void ravb_adjust_link(struct net_device *ndev) ravb_modify(ndev, ECMR, ECMR_TXF, 0); new_state = true; priv->link = phydev->link; - if (priv->no_avb_link) - ravb_rcv_snd_enable(ndev); } } else if (priv->link) { new_state = true; priv->link = 0; priv->speed = 0; priv->duplex = -1; - if (priv->no_avb_link) - ravb_rcv_snd_disable(ndev); } + /* Enable TX and RX right over here, if E-MAC change is ignored */ + if (priv->no_avb_link && phydev->link) + ravb_rcv_snd_enable(ndev); + + mmiowb(); + spin_unlock_irqrestore(&priv->lock, flags); + if (new_state && netif_msg_link(priv)) phy_print_status(phydev); } @@ -1096,75 +1106,6 @@ static int ravb_phy_start(struct net_device *ndev) return 0; } -static int ravb_get_link_ksettings(struct net_device *ndev, - struct ethtool_link_ksettings *cmd) -{ - struct ravb_private *priv = netdev_priv(ndev); - unsigned long flags; - - if (!ndev->phydev) - return -ENODEV; - - spin_lock_irqsave(&priv->lock, flags); - phy_ethtool_ksettings_get(ndev->phydev, cmd); - spin_unlock_irqrestore(&priv->lock, flags); - - return 0; -} - -static int ravb_set_link_ksettings(struct net_device *ndev, - const struct ethtool_link_ksettings *cmd) -{ - struct ravb_private *priv = netdev_priv(ndev); - unsigned long flags; - int error; - - if (!ndev->phydev) - return -ENODEV; - - spin_lock_irqsave(&priv->lock, flags); - - /* Disable TX and RX */ - ravb_rcv_snd_disable(ndev); - - error = phy_ethtool_ksettings_set(ndev->phydev, cmd); - if (error) - goto error_exit; - - if (cmd->base.duplex == DUPLEX_FULL) - priv->duplex = 1; - else - priv->duplex = 0; - - ravb_set_duplex(ndev); - -error_exit: - mdelay(1); - - /* Enable TX and RX */ - ravb_rcv_snd_enable(ndev); - - mmiowb(); - spin_unlock_irqrestore(&priv->lock, flags); - - return error; -} - -static int ravb_nway_reset(struct net_device *ndev) -{ - struct ravb_private *priv = netdev_priv(ndev); - int error = -ENODEV; - unsigned long flags; - - if (ndev->phydev) { - spin_lock_irqsave(&priv->lock, flags); - error = phy_start_aneg(ndev->phydev); - spin_unlock_irqrestore(&priv->lock, flags); - } - - return error; -} - static u32 ravb_get_msglevel(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); @@ -1377,7 +1318,7 @@ static int ravb_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) } static const struct ethtool_ops ravb_ethtool_ops = { - .nway_reset = ravb_nway_reset, + .nway_reset = phy_ethtool_nway_reset, .get_msglevel = ravb_get_msglevel, .set_msglevel = ravb_set_msglevel, .get_link = ethtool_op_get_link, @@ -1387,8 +1328,8 @@ static const struct ethtool_ops ravb_ethtool_ops = { .get_ringparam = ravb_get_ringparam, .set_ringparam = ravb_set_ringparam, .get_ts_info = ravb_get_ts_info, - .get_link_ksettings = ravb_get_link_ksettings, - .set_link_ksettings = ravb_set_link_ksettings, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_wol = ravb_get_wol, .set_wol = ravb_set_wol, }; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index e9007b613f17..5614fd231bbe 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1927,8 +1927,15 @@ static void sh_eth_adjust_link(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); struct phy_device *phydev = ndev->phydev; + unsigned long flags; int new_state = 0; + spin_lock_irqsave(&mdp->lock, flags); + + /* Disable TX and RX right over here, if E-MAC change is ignored */ + if (mdp->cd->no_psr || mdp->no_ether_link) + sh_eth_rcv_snd_disable(ndev); + if (phydev->link) { if (phydev->duplex != mdp->duplex) { new_state = 1; @@ -1947,18 +1954,21 @@ static void sh_eth_adjust_link(struct net_device *ndev) sh_eth_modify(ndev, ECMR, ECMR_TXF, 0); new_state = 1; mdp->link = phydev->link; - if (mdp->cd->no_psr || mdp->no_ether_link) - sh_eth_rcv_snd_enable(ndev); } } else if (mdp->link) { new_state = 1; mdp->link = 0; mdp->speed = 0; mdp->duplex = -1; - if (mdp->cd->no_psr || mdp->no_ether_link) - sh_eth_rcv_snd_disable(ndev); } + /* Enable TX and RX right over here, if E-MAC change is ignored */ + if ((mdp->cd->no_psr || mdp->no_ether_link) && phydev->link) + sh_eth_rcv_snd_enable(ndev); + + mmiowb(); + spin_unlock_irqrestore(&mdp->lock, flags); + if (new_state && netif_msg_link(mdp)) phy_print_status(phydev); } @@ -2030,60 +2040,6 @@ static int sh_eth_phy_start(struct net_device *ndev) return 0; } -static int sh_eth_get_link_ksettings(struct net_device *ndev, - struct ethtool_link_ksettings *cmd) -{ - struct sh_eth_private *mdp = netdev_priv(ndev); - unsigned long flags; - - if (!ndev->phydev) - return -ENODEV; - - spin_lock_irqsave(&mdp->lock, flags); - phy_ethtool_ksettings_get(ndev->phydev, cmd); - spin_unlock_irqrestore(&mdp->lock, flags); - - return 0; -} - -static int sh_eth_set_link_ksettings(struct net_device *ndev, - const struct ethtool_link_ksettings *cmd) -{ - struct sh_eth_private *mdp = netdev_priv(ndev); - unsigned long flags; - int ret; - - if (!ndev->phydev) - return -ENODEV; - - spin_lock_irqsave(&mdp->lock, flags); - - /* disable tx and rx */ - sh_eth_rcv_snd_disable(ndev); - - ret = phy_ethtool_ksettings_set(ndev->phydev, cmd); - if (ret) - goto error_exit; - - if (cmd->base.duplex == DUPLEX_FULL) - mdp->duplex = 1; - else - mdp->duplex = 0; - - if (mdp->cd->set_duplex) - mdp->cd->set_duplex(ndev); - -error_exit: - mdelay(1); - - /* enable tx and rx */ - sh_eth_rcv_snd_enable(ndev); - - spin_unlock_irqrestore(&mdp->lock, flags); - - return ret; -} - /* If it is ever necessary to increase SH_ETH_REG_DUMP_MAX_REGS, the * version must be bumped as well. Just adding registers up to that * limit is fine, as long as the existing register indices don't @@ -2263,22 +2219,6 @@ static void sh_eth_get_regs(struct net_device *ndev, struct ethtool_regs *regs, pm_runtime_put_sync(&mdp->pdev->dev); } -static int sh_eth_nway_reset(struct net_device *ndev) -{ - struct sh_eth_private *mdp = netdev_priv(ndev); - unsigned long flags; - int ret; - - if (!ndev->phydev) - return -ENODEV; - - spin_lock_irqsave(&mdp->lock, flags); - ret = phy_start_aneg(ndev->phydev); - spin_unlock_irqrestore(&mdp->lock, flags); - - return ret; -} - static u32 sh_eth_get_msglevel(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); @@ -2429,7 +2369,7 @@ static int sh_eth_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) static const struct ethtool_ops sh_eth_ethtool_ops = { .get_regs_len = sh_eth_get_regs_len, .get_regs = sh_eth_get_regs, - .nway_reset = sh_eth_nway_reset, + .nway_reset = phy_ethtool_nway_reset, .get_msglevel = sh_eth_get_msglevel, .set_msglevel = sh_eth_set_msglevel, .get_link = ethtool_op_get_link, @@ -2438,8 +2378,8 @@ static const struct ethtool_ops sh_eth_ethtool_ops = { .get_sset_count = sh_eth_get_sset_count, .get_ringparam = sh_eth_get_ringparam, .set_ringparam = sh_eth_set_ringparam, - .get_link_ksettings = sh_eth_get_link_ksettings, - .set_link_ksettings = sh_eth_set_link_ksettings, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_wol = sh_eth_get_wol, .set_wol = sh_eth_set_wol, }; diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index d437f4f5ed52..740655261e5b 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -349,7 +349,6 @@ static int sfp_register_bus(struct sfp_bus *bus) } if (bus->started) bus->socket_ops->start(bus->sfp); - bus->netdev->sfp_bus = bus; bus->registered = true; return 0; } @@ -364,7 +363,6 @@ static void sfp_unregister_bus(struct sfp_bus *bus) if (bus->phydev && ops && ops->disconnect_phy) ops->disconnect_phy(bus->upstream); } - bus->netdev->sfp_bus = NULL; bus->registered = false; } @@ -436,6 +434,14 @@ void sfp_upstream_stop(struct sfp_bus *bus) } EXPORT_SYMBOL_GPL(sfp_upstream_stop); +static void sfp_upstream_clear(struct sfp_bus *bus) +{ + bus->upstream_ops = NULL; + bus->upstream = NULL; + bus->netdev->sfp_bus = NULL; + bus->netdev = NULL; +} + /** * sfp_register_upstream() - Register the neighbouring device * @fwnode: firmware node for the SFP bus @@ -461,9 +467,13 @@ struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode, bus->upstream_ops = ops; bus->upstream = upstream; bus->netdev = ndev; + ndev->sfp_bus = bus; - if (bus->sfp) + if (bus->sfp) { ret = sfp_register_bus(bus); + if (ret) + sfp_upstream_clear(bus); + } rtnl_unlock(); } @@ -488,8 +498,7 @@ void sfp_unregister_upstream(struct sfp_bus *bus) rtnl_lock(); if (bus->sfp) sfp_unregister_bus(bus); - bus->upstream = NULL; - bus->netdev = NULL; + sfp_upstream_clear(bus); rtnl_unlock(); sfp_bus_put(bus); @@ -561,6 +570,13 @@ void sfp_module_remove(struct sfp_bus *bus) } EXPORT_SYMBOL_GPL(sfp_module_remove); +static void sfp_socket_clear(struct sfp_bus *bus) +{ + bus->sfp_dev = NULL; + bus->sfp = NULL; + bus->socket_ops = NULL; +} + struct sfp_bus *sfp_register_socket(struct device *dev, struct sfp *sfp, const struct sfp_socket_ops *ops) { @@ -573,8 +589,11 @@ struct sfp_bus *sfp_register_socket(struct device *dev, struct sfp *sfp, bus->sfp = sfp; bus->socket_ops = ops; - if (bus->netdev) + if (bus->netdev) { ret = sfp_register_bus(bus); + if (ret) + sfp_socket_clear(bus); + } rtnl_unlock(); } @@ -592,9 +611,7 @@ void sfp_unregister_socket(struct sfp_bus *bus) rtnl_lock(); if (bus->netdev) sfp_unregister_bus(bus); - bus->sfp_dev = NULL; - bus->sfp = NULL; - bus->socket_ops = NULL; + sfp_socket_clear(bus); rtnl_unlock(); sfp_bus_put(bus); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 8fac8e132c5b..38502809420b 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1253,6 +1253,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 300baad62c88..c73dd7396886 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -765,8 +765,8 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); -static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, - struct net_device *fwd) +static inline int xdp_ok_fwd_dev(const struct net_device *fwd, + unsigned int pktlen) { unsigned int len; @@ -774,7 +774,7 @@ static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, return -ENETDOWN; len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; - if (skb->len > len) + if (pktlen > len) return -EMSGSIZE; return 0; diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 7843b98e1c6e..c20c7e197d07 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -105,13 +105,13 @@ static inline bool br_vlan_enabled(const struct net_device *dev) static inline int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) { - return -1; + return -EINVAL; } static inline int br_vlan_get_info(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo) { - return -1; + return -EINVAL; } #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 16475c269749..d02881e4ad1f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -355,14 +355,7 @@ struct ipv6_txoptions *ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, int newtype, - struct ipv6_opt_hdr __user *newopt, - int newoptlen); -struct ipv6_txoptions * -ipv6_renew_options_kern(struct sock *sk, - struct ipv6_txoptions *opt, - int newtype, - struct ipv6_opt_hdr *newopt, - int newoptlen); + struct ipv6_opt_hdr *newopt); struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt); diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index e0c0c2558ec4..a05134507e7b 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -65,4 +65,10 @@ extern const struct nft_expr_ops nft_payload_fast_ops; extern struct static_key_false nft_counters_enabled; extern struct static_key_false nft_trace_enabled; +extern struct nft_set_type nft_set_rhash_type; +extern struct nft_set_type nft_set_hash_type; +extern struct nft_set_type nft_set_hash_fast_type; +extern struct nft_set_type nft_set_rbtree_type; +extern struct nft_set_type nft_set_bitmap_type; + #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h index 9754a50ecde9..4cc64c8446eb 100644 --- a/include/net/netfilter/nf_tproxy.h +++ b/include/net/netfilter/nf_tproxy.h @@ -64,7 +64,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, * belonging to established connections going through that one. */ struct sock * -nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, +nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, @@ -103,7 +103,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, struct sock *sk); struct sock * -nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, +nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, const u8 protocol, const struct in6_addr *saddr, const struct in6_addr *daddr, const __be16 sport, const __be16 dport, diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index 9470fd7e4350..32d2454c0479 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -7,7 +7,6 @@ #include <linux/tc_act/tc_csum.h> struct tcf_csum_params { - int action; u32 update_flags; struct rcu_head rcu; }; diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index efef0b4b1b2b..46b8c7f1c8d5 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -18,7 +18,6 @@ struct tcf_tunnel_key_params { struct rcu_head rcu; int tcft_action; - int action; struct metadata_dst *tcft_enc_metadata; }; diff --git a/include/net/tcp.h b/include/net/tcp.h index 800582b5dd54..af3ec72d5d41 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -828,6 +828,10 @@ struct tcp_skb_cb { #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) +static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb) +{ + TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); +} #if IS_ENABLED(CONFIG_IPV6) /* This is the variant of inet6_iif() that must be used by TCP, diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 9fe472f2ac95..7161856bcf9c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -60,6 +60,10 @@ struct xdp_sock { bool zc; /* Protects multiple processes in the control path */ struct mutex mutex; + /* Mutual exclusion of NAPI TX thread and sendmsg error paths + * in the SKB destructor callback. + */ + spinlock_t tx_completion_lock; u64 rx_dropped; }; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 642c97f6d1b8..d361fc1e3bf3 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -334,10 +334,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, { struct net_device *dev = dst->dev; struct xdp_frame *xdpf; + int err; if (!dev->netdev_ops->ndo_xdp_xmit) return -EOPNOTSUPP; + err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); + if (unlikely(err)) + return err; + xdpf = convert_to_xdp_frame(xdp); if (unlikely(!xdpf)) return -EOVERFLOW; @@ -350,7 +355,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, { int err; - err = __xdp_generic_ok_fwd_dev(skb, dst->dev); + err = xdp_ok_fwd_dev(dst->dev, skb->len); if (unlikely(err)) return err; skb->dev = dst->dev; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3ca2198a6d22..513d9dfcf4ee 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -747,13 +747,15 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, * old element will be freed immediately. * Otherwise return an error */ - atomic_dec(&htab->count); - return ERR_PTR(-E2BIG); + l_new = ERR_PTR(-E2BIG); + goto dec_count; } l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, htab->map.numa_node); - if (!l_new) - return ERR_PTR(-ENOMEM); + if (!l_new) { + l_new = ERR_PTR(-ENOMEM); + goto dec_count; + } } memcpy(l_new->key, key, key_size); @@ -766,7 +768,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, GFP_ATOMIC | __GFP_NOWARN); if (!pptr) { kfree(l_new); - return ERR_PTR(-ENOMEM); + l_new = ERR_PTR(-ENOMEM); + goto dec_count; } } @@ -780,6 +783,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, l_new->hash = hash; return l_new; +dec_count: + atomic_dec(&htab->count); + return l_new; } static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index cf7b6a6dbd1f..98fb7938beea 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -312,10 +312,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout) struct smap_psock *psock; struct sock *osk; + lock_sock(sk); rcu_read_lock(); psock = smap_psock_sk(sk); if (unlikely(!psock)) { rcu_read_unlock(); + release_sock(sk); return sk->sk_prot->close(sk, timeout); } @@ -371,6 +373,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout) e = psock_map_pop(sk, psock); } rcu_read_unlock(); + release_sock(sk); close_fun(sk, timeout); } @@ -568,7 +571,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) while (sg[i].length) { free += sg[i].length; sk_mem_uncharge(sk, sg[i].length); - put_page(sg_page(&sg[i])); + if (!md->skb) + put_page(sg_page(&sg[i])); sg[i].length = 0; sg[i].page_link = 0; sg[i].offset = 0; @@ -577,6 +581,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) if (i == MAX_SKB_FRAGS) i = 0; } + if (md->skb) + consume_skb(md->skb); return free; } @@ -1230,7 +1236,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) */ TCP_SKB_CB(skb)->bpf.sk_redir = NULL; skb->sk = psock->sock; - bpf_compute_data_pointers(skb); + bpf_compute_data_end_sk_skb(skb); preempt_disable(); rc = (*prog->bpf_func)(skb, prog->insnsi); preempt_enable(); @@ -1485,7 +1491,7 @@ static int smap_parse_func_strparser(struct strparser *strp, * any socket yet. */ skb->sk = psock->sock; - bpf_compute_data_pointers(skb); + bpf_compute_data_end_sk_skb(skb); rc = (*prog->bpf_func)(skb, prog->insnsi); skb->sk = NULL; rcu_read_unlock(); @@ -1896,7 +1902,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN); if (!e) { err = -ENOMEM; - goto out_progs; + goto out_free; } } @@ -2069,7 +2075,13 @@ static int sock_map_update_elem(struct bpf_map *map, return -EOPNOTSUPP; } + lock_sock(skops.sk); + preempt_disable(); + rcu_read_lock(); err = sock_map_ctx_update_elem(&skops, map, key, flags); + rcu_read_unlock(); + preempt_enable(); + release_sock(skops.sk); fput(socket->file); return err; } @@ -2342,7 +2354,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, if (err) goto err; - /* bpf_map_update_elem() can be called in_irq() */ + /* psock is valid here because otherwise above *ctx_update_elem would + * have thrown an error. It is safe to skip error check. + */ + psock = smap_psock_sk(sock); raw_spin_lock_bh(&b->lock); l_old = lookup_elem_raw(head, hash, key, key_size); if (l_old && map_flags == BPF_NOEXIST) { @@ -2360,12 +2375,6 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, goto bucket_err; } - psock = smap_psock_sk(sock); - if (unlikely(!psock)) { - err = -EINVAL; - goto bucket_err; - } - rcu_assign_pointer(e->hash_link, l_new); rcu_assign_pointer(e->htab, container_of(map, struct bpf_htab, map)); @@ -2388,12 +2397,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, raw_spin_unlock_bh(&b->lock); return 0; bucket_err: + smap_release_sock(psock, sock); raw_spin_unlock_bh(&b->lock); err: kfree(e); - psock = smap_psock_sk(sock); - if (psock) - smap_release_sock(psock, sock); return err; } @@ -2415,7 +2422,13 @@ static int sock_hash_update_elem(struct bpf_map *map, return -EINVAL; } + lock_sock(skops.sk); + preempt_disable(); + rcu_read_lock(); err = sock_hash_ctx_update_elem(&skops, map, key, flags); + rcu_read_unlock(); + preempt_enable(); + release_sock(skops.sk); fput(socket->file); return err; } @@ -2472,10 +2485,8 @@ struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key) b = __select_bucket(htab, hash); head = &b->head; - raw_spin_lock_bh(&b->lock); l = lookup_elem_raw(head, hash, key, key_size); sk = l ? l->sk : NULL; - raw_spin_unlock_bh(&b->lock); return sk; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d10ecd78105f..a31a1ba0f8ea 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -735,7 +735,9 @@ static int map_update_elem(union bpf_attr *attr) if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_update_elem(map, key, value, attr->flags); goto out; - } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) { + } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || + map->map_type == BPF_MAP_TYPE_SOCKHASH || + map->map_type == BPF_MAP_TYPE_SOCKMAP) { err = map->ops->map_update_elem(map, key, value, attr->flags); goto out; } diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 3109b2e1d552..0183d07a9b4d 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -1143,13 +1143,14 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), void *arg) { - struct bucket_table *tbl; + struct bucket_table *tbl, *next_tbl; unsigned int i; cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); +restart: if (free_fn) { for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; @@ -1166,7 +1167,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, } } + next_tbl = rht_dereference(tbl->future_tbl, ht); bucket_table_free(tbl); + if (next_tbl) { + tbl = next_tbl; + goto restart; + } mutex_unlock(&ht->mutex); } EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); diff --git a/net/core/filter.c b/net/core/filter.c index 0ca6907d7efe..5fa66a33927f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1762,6 +1762,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = { .arg2_type = ARG_ANYTHING, }; +static inline int sk_skb_try_make_writable(struct sk_buff *skb, + unsigned int write_len) +{ + int err = __bpf_try_make_writable(skb, write_len); + + bpf_compute_data_end_sk_skb(skb); + return err; +} + +BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len) +{ + /* Idea is the following: should the needed direct read/write + * test fail during runtime, we can pull in more data and redo + * again, since implicitly, we invalidate previous checks here. + * + * Or, since we know how much we need to make read/writeable, + * this can be done once at the program beginning for direct + * access case. By this we overcome limitations of only current + * headroom being accessible. + */ + return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb)); +} + +static const struct bpf_func_proto sk_skb_pull_data_proto = { + .func = sk_skb_pull_data, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, u64, from, u64, to, u64, flags) { @@ -2779,7 +2810,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) static u32 __bpf_skb_max_len(const struct sk_buff *skb) { - return skb->dev->mtu + skb->dev->hard_header_len; + return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len : + SKB_MAX_ALLOC; } static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) @@ -2863,8 +2895,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len) return __skb_trim_rcsum(skb, new_len); } -BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, - u64, flags) +static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len, + u64 flags) { u32 max_len = __bpf_skb_max_len(skb); u32 min_len = __bpf_skb_min_len(skb); @@ -2900,6 +2932,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, if (!ret && skb_is_gso(skb)) skb_gso_reset(skb); } + return ret; +} + +BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, + u64, flags) +{ + int ret = __bpf_skb_change_tail(skb, new_len, flags); bpf_compute_data_pointers(skb); return ret; @@ -2914,9 +2953,27 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = { .arg3_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, +BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len, u64, flags) { + int ret = __bpf_skb_change_tail(skb, new_len, flags); + + bpf_compute_data_end_sk_skb(skb); + return ret; +} + +static const struct bpf_func_proto sk_skb_change_tail_proto = { + .func = sk_skb_change_tail, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + +static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, + u64 flags) +{ u32 max_len = __bpf_skb_max_len(skb); u32 new_len = skb->len + head_room; int ret; @@ -2941,8 +2998,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, skb_reset_mac_header(skb); } + return ret; +} + +BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, + u64, flags) +{ + int ret = __bpf_skb_change_head(skb, head_room, flags); + bpf_compute_data_pointers(skb); - return 0; + return ret; } static const struct bpf_func_proto bpf_skb_change_head_proto = { @@ -2954,6 +3019,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = { .arg3_type = ARG_ANYTHING, }; +BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room, + u64, flags) +{ + int ret = __bpf_skb_change_head(skb, head_room, flags); + + bpf_compute_data_end_sk_skb(skb); + return ret; +} + +static const struct bpf_func_proto sk_skb_change_head_proto = { + .func = sk_skb_change_head, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) { return xdp_data_meta_unsupported(xdp) ? 0 : @@ -3046,12 +3128,16 @@ static int __bpf_tx_xdp(struct net_device *dev, u32 index) { struct xdp_frame *xdpf; - int sent; + int err, sent; if (!dev->netdev_ops->ndo_xdp_xmit) { return -EOPNOTSUPP; } + err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); + if (unlikely(err)) + return err; + xdpf = convert_to_xdp_frame(xdp); if (unlikely(!xdpf)) return -EOVERFLOW; @@ -3285,7 +3371,8 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, goto err; } - if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) + err = xdp_ok_fwd_dev(fwd, skb->len); + if (unlikely(err)) goto err; skb->dev = fwd; @@ -4617,9 +4704,12 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_skb_store_bytes || func == bpf_skb_change_proto || func == bpf_skb_change_head || + func == sk_skb_change_head || func == bpf_skb_change_tail || + func == sk_skb_change_tail || func == bpf_skb_adjust_room || func == bpf_skb_pull_data || + func == sk_skb_pull_data || func == bpf_clone_redirect || func == bpf_l3_csum_replace || func == bpf_l4_csum_replace || @@ -4871,11 +4961,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_load_bytes: return &bpf_skb_load_bytes_proto; case BPF_FUNC_skb_pull_data: - return &bpf_skb_pull_data_proto; + return &sk_skb_pull_data_proto; case BPF_FUNC_skb_change_tail: - return &bpf_skb_change_tail_proto; + return &sk_skb_change_tail_proto; case BPF_FUNC_skb_change_head: - return &bpf_skb_change_head_proto; + return &sk_skb_change_head_proto; case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_cookie_proto; case BPF_FUNC_get_socket_uid: diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b21833651394..e46cdd310e5f 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -300,6 +300,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, + .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), .flowi4_scope = scope, diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c9e35b81d093..1e4cf3ab560f 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -90,7 +90,7 @@ static void inet_frags_free_cb(void *ptr, void *arg) void inet_frags_exit_net(struct netns_frags *nf) { - nf->low_thresh = 0; /* prevent creation of new frags */ + nf->high_thresh = 0; /* prevent creation of new frags */ rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ca0dad90803a..e77872c93c20 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1898,6 +1898,7 @@ static struct xt_match ipt_builtin_mt[] __read_mostly = { .checkentry = icmp_checkentry, .proto = IPPROTO_ICMP, .family = NFPROTO_IPV4, + .me = THIS_MODULE, }, }; diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c index 805e83ec3ad9..164714104965 100644 --- a/net/ipv4/netfilter/nf_tproxy_ipv4.c +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -37,7 +37,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, + sk2 = nf_tproxy_get_sock_v4(net, skb, iph->protocol, iph->saddr, laddr ? laddr : iph->daddr, hp->source, lport ? lport : hp->dest, skb->dev, NF_TPROXY_LOOKUP_LISTENER); @@ -71,7 +71,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) EXPORT_SYMBOL_GPL(nf_tproxy_laddr4); struct sock * -nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, +nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, @@ -79,16 +79,21 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, const enum nf_tproxy_lookup_t lookup_type) { struct sock *sk; - struct tcphdr *tcph; switch (protocol) { - case IPPROTO_TCP: + case IPPROTO_TCP: { + struct tcphdr _hdr, *hp; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), + sizeof(struct tcphdr), &_hdr); + if (hp == NULL) + return NULL; + switch (lookup_type) { case NF_TPROXY_LOOKUP_LISTENER: - tcph = hp; sk = inet_lookup_listener(net, &tcp_hashinfo, skb, ip_hdrlen(skb) + - __tcp_hdrlen(tcph), + __tcp_hdrlen(hp), saddr, sport, daddr, dport, in->ifindex, 0); @@ -110,6 +115,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, BUG(); } break; + } case IPPROTO_UDP: sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, in->ifindex); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index af0a857d8352..5fa335fd3852 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -189,8 +189,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, if (write && ret == 0) { low = make_kgid(user_ns, urange[0]); high = make_kgid(user_ns, urange[1]); - if (!gid_valid(low) || !gid_valid(high) || - (urange[1] < urange[0]) || gid_lt(high, low)) { + if (!gid_valid(low) || !gid_valid(high)) + return -EINVAL; + if (urange[1] < urange[0] || gid_lt(high, low)) { low = make_kgid(&init_user_ns, 1); high = make_kgid(&init_user_ns, 0); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e7b53d2a971f..0d43705dd001 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2562,6 +2562,8 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); + tp->copied_seq = tp->rcv_nxt; + tp->urg_data = 0; tcp_write_queue_purge(sk); tcp_fastopen_active_disable_ofo_check(sk); skb_rbtree_purge(&tp->out_of_order_queue); @@ -3720,8 +3722,7 @@ int tcp_abort(struct sock *sk, int err) struct request_sock *req = inet_reqsk(sk); local_bh_disable(); - inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, - req); + inet_csk_reqsk_queue_drop(req->rsk_listener, req); local_bh_enable(); return 0; } diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 1323b9679cf7..1c0bb9fb76e6 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -799,8 +799,7 @@ static int calipso_opt_update(struct sock *sk, struct ipv6_opt_hdr *hop) { struct ipv6_txoptions *old = txopt_get(inet6_sk(sk)), *txopts; - txopts = ipv6_renew_options_kern(sk, old, IPV6_HOPOPTS, - hop, hop ? ipv6_optlen(hop) : 0); + txopts = ipv6_renew_options(sk, old, IPV6_HOPOPTS, hop); txopt_put(old); if (IS_ERR(txopts)) return PTR_ERR(txopts); @@ -1222,8 +1221,7 @@ static int calipso_req_setattr(struct request_sock *req, if (IS_ERR(new)) return PTR_ERR(new); - txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, - new, new ? ipv6_optlen(new) : 0); + txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new); kfree(new); @@ -1260,8 +1258,7 @@ static void calipso_req_delattr(struct request_sock *req) if (calipso_opt_del(req_inet->ipv6_opt->hopopt, &new)) return; /* Nothing to do */ - txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, - new, new ? ipv6_optlen(new) : 0); + txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new); if (!IS_ERR(txopts)) { txopts = xchg(&req_inet->ipv6_opt, txopts); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5bc2bf3733ab..20291c2036fc 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -1015,29 +1015,21 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) } EXPORT_SYMBOL_GPL(ipv6_dup_options); -static int ipv6_renew_option(void *ohdr, - struct ipv6_opt_hdr __user *newopt, int newoptlen, - int inherit, - struct ipv6_opt_hdr **hdr, - char **p) +static void ipv6_renew_option(int renewtype, + struct ipv6_opt_hdr **dest, + struct ipv6_opt_hdr *old, + struct ipv6_opt_hdr *new, + int newtype, char **p) { - if (inherit) { - if (ohdr) { - memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr)); - *hdr = (struct ipv6_opt_hdr *)*p; - *p += CMSG_ALIGN(ipv6_optlen(*hdr)); - } - } else { - if (newopt) { - if (copy_from_user(*p, newopt, newoptlen)) - return -EFAULT; - *hdr = (struct ipv6_opt_hdr *)*p; - if (ipv6_optlen(*hdr) > newoptlen) - return -EINVAL; - *p += CMSG_ALIGN(newoptlen); - } - } - return 0; + struct ipv6_opt_hdr *src; + + src = (renewtype == newtype ? new : old); + if (!src) + return; + + memcpy(*p, src, ipv6_optlen(src)); + *dest = (struct ipv6_opt_hdr *)*p; + *p += CMSG_ALIGN(ipv6_optlen(*dest)); } /** @@ -1063,13 +1055,11 @@ static int ipv6_renew_option(void *ohdr, */ struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, - int newtype, - struct ipv6_opt_hdr __user *newopt, int newoptlen) + int newtype, struct ipv6_opt_hdr *newopt) { int tot_len = 0; char *p; struct ipv6_txoptions *opt2; - int err; if (opt) { if (newtype != IPV6_HOPOPTS && opt->hopopt) @@ -1082,8 +1072,8 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt)); } - if (newopt && newoptlen) - tot_len += CMSG_ALIGN(newoptlen); + if (newopt) + tot_len += CMSG_ALIGN(ipv6_optlen(newopt)); if (!tot_len) return NULL; @@ -1098,29 +1088,19 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, opt2->tot_len = tot_len; p = (char *)(opt2 + 1); - err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen, - newtype != IPV6_HOPOPTS, - &opt2->hopopt, &p); - if (err) - goto out; - - err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen, - newtype != IPV6_RTHDRDSTOPTS, - &opt2->dst0opt, &p); - if (err) - goto out; - - err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen, - newtype != IPV6_RTHDR, - (struct ipv6_opt_hdr **)&opt2->srcrt, &p); - if (err) - goto out; - - err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen, - newtype != IPV6_DSTOPTS, - &opt2->dst1opt, &p); - if (err) - goto out; + ipv6_renew_option(IPV6_HOPOPTS, &opt2->hopopt, + (opt ? opt->hopopt : NULL), + newopt, newtype, &p); + ipv6_renew_option(IPV6_RTHDRDSTOPTS, &opt2->dst0opt, + (opt ? opt->dst0opt : NULL), + newopt, newtype, &p); + ipv6_renew_option(IPV6_RTHDR, + (struct ipv6_opt_hdr **)&opt2->srcrt, + (opt ? (struct ipv6_opt_hdr *)opt->srcrt : NULL), + newopt, newtype, &p); + ipv6_renew_option(IPV6_DSTOPTS, &opt2->dst1opt, + (opt ? opt->dst1opt : NULL), + newopt, newtype, &p); opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) + (opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) + @@ -1128,37 +1108,6 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0); return opt2; -out: - sock_kfree_s(sk, opt2, opt2->tot_len); - return ERR_PTR(err); -} - -/** - * ipv6_renew_options_kern - replace a specific ext hdr with a new one. - * - * @sk: sock from which to allocate memory - * @opt: original options - * @newtype: option type to replace in @opt - * @newopt: new option of type @newtype to replace (kernel-mem) - * @newoptlen: length of @newopt - * - * See ipv6_renew_options(). The difference is that @newopt is - * kernel memory, rather than user memory. - */ -struct ipv6_txoptions * -ipv6_renew_options_kern(struct sock *sk, struct ipv6_txoptions *opt, - int newtype, struct ipv6_opt_hdr *newopt, - int newoptlen) -{ - struct ipv6_txoptions *ret_val; - const mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - ret_val = ipv6_renew_options(sk, opt, newtype, - (struct ipv6_opt_hdr __user *)newopt, - newoptlen); - set_fs(old_fs); - return ret_val; } struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4d780c7f0130..c95c3486d904 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -398,6 +398,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_DSTOPTS: { struct ipv6_txoptions *opt; + struct ipv6_opt_hdr *new = NULL; + + /* hop-by-hop / destination options are privileged option */ + retv = -EPERM; + if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) + break; /* remove any sticky options header with a zero option * length, per RFC3542. @@ -409,17 +415,22 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, else if (optlen < sizeof(struct ipv6_opt_hdr) || optlen & 0x7 || optlen > 8 * 255) goto e_inval; - - /* hop-by-hop / destination options are privileged option */ - retv = -EPERM; - if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) - break; + else { + new = memdup_user(optval, optlen); + if (IS_ERR(new)) { + retv = PTR_ERR(new); + break; + } + if (unlikely(ipv6_optlen(new) > optlen)) { + kfree(new); + goto e_inval; + } + } opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); - opt = ipv6_renew_options(sk, opt, optname, - (struct ipv6_opt_hdr __user *)optval, - optlen); + opt = ipv6_renew_options(sk, opt, optname, new); + kfree(new); if (IS_ERR(opt)) { retv = PTR_ERR(opt); break; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 7eab959734bc..daf2e9e9193d 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1909,6 +1909,7 @@ static struct xt_match ip6t_builtin_mt[] __read_mostly = { .checkentry = icmp6_checkentry, .proto = IPPROTO_ICMPV6, .family = NFPROTO_IPV6, + .me = THIS_MODULE, }, }; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index a452d99c9f52..e4d9e6976d3c 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -585,6 +585,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) fq->q.meat == fq->q.len && nf_ct_frag6_reasm(fq, skb, dev)) ret = 0; + else + skb_dst_drop(skb); out_unlock: spin_unlock_bh(&fq->q.lock); diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c index bf1d6c421e3b..5dfd33af6451 100644 --- a/net/ipv6/netfilter/nf_tproxy_ipv6.c +++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c @@ -55,7 +55,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, hp, tproto, + sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, tproto, &iph->saddr, nf_tproxy_laddr6(skb, laddr, &iph->daddr), hp->source, @@ -72,7 +72,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait6); struct sock * -nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, +nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, const u8 protocol, const struct in6_addr *saddr, const struct in6_addr *daddr, const __be16 sport, const __be16 dport, @@ -80,15 +80,20 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, const enum nf_tproxy_lookup_t lookup_type) { struct sock *sk; - struct tcphdr *tcph; switch (protocol) { - case IPPROTO_TCP: + case IPPROTO_TCP: { + struct tcphdr _hdr, *hp; + + hp = skb_header_pointer(skb, thoff, + sizeof(struct tcphdr), &_hdr); + if (hp == NULL) + return NULL; + switch (lookup_type) { case NF_TPROXY_LOOKUP_LISTENER: - tcph = hp; sk = inet6_lookup_listener(net, &tcp_hashinfo, skb, - thoff + __tcp_hdrlen(tcph), + thoff + __tcp_hdrlen(hp), saddr, sport, daddr, ntohs(dport), in->ifindex, 0); @@ -110,6 +115,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, BUG(); } break; + } case IPPROTO_UDP: sk = udp6_lib_lookup(net, saddr, sport, daddr, dport, in->ifindex); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index dbd7d1fad277..f0a1c536ef15 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -460,6 +460,13 @@ config NF_TABLES if NF_TABLES +config NF_TABLES_SET + tristate "Netfilter nf_tables set infrastructure" + help + This option enables the nf_tables set infrastructure that allows to + look up for elements in a set and to build one-way mappings between + matchings and actions. + config NF_TABLES_INET depends on IPV6 select NF_TABLES_IPV4 @@ -493,24 +500,6 @@ config NFT_FLOW_OFFLOAD This option adds the "flow_offload" expression that you can use to choose what flows are placed into the hardware. -config NFT_SET_RBTREE - tristate "Netfilter nf_tables rbtree set module" - help - This option adds the "rbtree" set type (Red Black tree) that is used - to build interval-based sets. - -config NFT_SET_HASH - tristate "Netfilter nf_tables hash set module" - help - This option adds the "hash" set type that is used to build one-way - mappings between matchings and actions. - -config NFT_SET_BITMAP - tristate "Netfilter nf_tables bitmap set module" - help - This option adds the "bitmap" set type that is used to build sets - whose keys are smaller or equal to 16 bits. - config NFT_COUNTER tristate "Netfilter nf_tables counter module" help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 44449389e527..8a76dced974d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -78,7 +78,11 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o +nf_tables_set-objs := nf_tables_set_core.o \ + nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o + obj-$(CONFIG_NF_TABLES) += nf_tables.o +obj-$(CONFIG_NF_TABLES_SET) += nf_tables_set.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o @@ -91,9 +95,6 @@ obj-$(CONFIG_NFT_QUEUE) += nft_queue.o obj-$(CONFIG_NFT_QUOTA) += nft_quota.o obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o -obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o -obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o -obj-$(CONFIG_NFT_SET_BITMAP) += nft_set_bitmap.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3465da2a98bd..3d5280425027 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2043,7 +2043,7 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp) return -EOPNOTSUPP; /* On boot, we can set this without any fancy locking. */ - if (!nf_conntrack_htable_size) + if (!nf_conntrack_hash) return param_set_uint(val, kp); rc = kstrtouint(val, 0, &hashsize); diff --git a/net/netfilter/nf_tables_set_core.c b/net/netfilter/nf_tables_set_core.c new file mode 100644 index 000000000000..814789644bd3 --- /dev/null +++ b/net/netfilter/nf_tables_set_core.c @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <net/netfilter/nf_tables_core.h> + +static int __init nf_tables_set_module_init(void) +{ + nft_register_set(&nft_set_hash_fast_type); + nft_register_set(&nft_set_hash_type); + nft_register_set(&nft_set_rhash_type); + nft_register_set(&nft_set_bitmap_type); + nft_register_set(&nft_set_rbtree_type); + + return 0; +} + +static void __exit nf_tables_set_module_exit(void) +{ + nft_unregister_set(&nft_set_rbtree_type); + nft_unregister_set(&nft_set_bitmap_type); + nft_unregister_set(&nft_set_rhash_type); + nft_unregister_set(&nft_set_hash_type); + nft_unregister_set(&nft_set_hash_fast_type); +} + +module_init(nf_tables_set_module_init); +module_exit(nf_tables_set_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 8d1ff654e5af..32535eea51b2 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -832,10 +832,18 @@ nft_target_select_ops(const struct nft_ctx *ctx, rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV])); family = ctx->family; + if (strcmp(tg_name, XT_ERROR_TARGET) == 0 || + strcmp(tg_name, XT_STANDARD_TARGET) == 0 || + strcmp(tg_name, "standard") == 0) + return ERR_PTR(-EINVAL); + /* Re-use the existing target if it's already loaded. */ list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; + if (!target->target) + continue; + if (nft_target_cmp(target, tg_name, rev, family)) return &nft_target->ops; } @@ -844,6 +852,11 @@ nft_target_select_ops(const struct nft_ctx *ctx, if (IS_ERR(target)) return ERR_PTR(-ENOENT); + if (!target->target) { + err = -EINVAL; + goto err; + } + if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) { err = -EINVAL; goto err; diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index d6626e01c7ee..128bc16f52dd 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -296,7 +296,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, return true; } -static struct nft_set_type nft_bitmap_type __read_mostly = { +struct nft_set_type nft_set_bitmap_type __read_mostly = { .owner = THIS_MODULE, .ops = { .privsize = nft_bitmap_privsize, @@ -314,20 +314,3 @@ static struct nft_set_type nft_bitmap_type __read_mostly = { .get = nft_bitmap_get, }, }; - -static int __init nft_bitmap_module_init(void) -{ - return nft_register_set(&nft_bitmap_type); -} - -static void __exit nft_bitmap_module_exit(void) -{ - nft_unregister_set(&nft_bitmap_type); -} - -module_init(nft_bitmap_module_init); -module_exit(nft_bitmap_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); -MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 6f9a1365a09f..72ef35b51cac 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -654,7 +654,7 @@ static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features return true; } -static struct nft_set_type nft_rhash_type __read_mostly = { +struct nft_set_type nft_set_rhash_type __read_mostly = { .owner = THIS_MODULE, .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT | NFT_SET_EVAL, @@ -677,7 +677,7 @@ static struct nft_set_type nft_rhash_type __read_mostly = { }, }; -static struct nft_set_type nft_hash_type __read_mostly = { +struct nft_set_type nft_set_hash_type __read_mostly = { .owner = THIS_MODULE, .features = NFT_SET_MAP | NFT_SET_OBJECT, .ops = { @@ -697,7 +697,7 @@ static struct nft_set_type nft_hash_type __read_mostly = { }, }; -static struct nft_set_type nft_hash_fast_type __read_mostly = { +struct nft_set_type nft_set_hash_fast_type __read_mostly = { .owner = THIS_MODULE, .features = NFT_SET_MAP | NFT_SET_OBJECT, .ops = { @@ -716,26 +716,3 @@ static struct nft_set_type nft_hash_fast_type __read_mostly = { .get = nft_hash_get, }, }; - -static int __init nft_hash_module_init(void) -{ - if (nft_register_set(&nft_hash_fast_type) || - nft_register_set(&nft_hash_type) || - nft_register_set(&nft_rhash_type)) - return 1; - return 0; -} - -static void __exit nft_hash_module_exit(void) -{ - nft_unregister_set(&nft_rhash_type); - nft_unregister_set(&nft_hash_type); - nft_unregister_set(&nft_hash_fast_type); -} - -module_init(nft_hash_module_init); -module_exit(nft_hash_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 7f3a9a211034..1f8f257cb518 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -462,7 +462,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, return true; } -static struct nft_set_type nft_rbtree_type __read_mostly = { +struct nft_set_type nft_set_rbtree_type __read_mostly = { .owner = THIS_MODULE, .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .ops = { @@ -481,20 +481,3 @@ static struct nft_set_type nft_rbtree_type __read_mostly = { .get = nft_rbtree_get, }, }; - -static int __init nft_rbtree_module_init(void) -{ - return nft_register_set(&nft_rbtree_type); -} - -static void __exit nft_rbtree_module_exit(void) -{ - nft_unregister_set(&nft_rbtree_type); -} - -module_init(nft_rbtree_module_init); -module_exit(nft_rbtree_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 58fce4e749a9..d76550a8b642 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -61,7 +61,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, + sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol, iph->saddr, iph->daddr, hp->source, hp->dest, skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED); @@ -77,7 +77,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, else if (!sk) /* no, there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, + sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol, iph->saddr, laddr, hp->source, lport, skb->dev, NF_TPROXY_LOOKUP_LISTENER); @@ -150,7 +150,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, + sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED); @@ -171,7 +171,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, + sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto, &iph->saddr, laddr, hp->source, lport, xt_in(par), NF_TPROXY_LOOKUP_LISTENER); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 2aa07b547b16..86e1e37eb4e8 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -191,8 +191,13 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, hdr->type = cpu_to_le32(type); hdr->src_node_id = cpu_to_le32(from->sq_node); hdr->src_port_id = cpu_to_le32(from->sq_port); - hdr->dst_node_id = cpu_to_le32(to->sq_node); - hdr->dst_port_id = cpu_to_le32(to->sq_port); + if (to->sq_port == QRTR_PORT_CTRL) { + hdr->dst_node_id = cpu_to_le32(node->nid); + hdr->dst_port_id = cpu_to_le32(QRTR_NODE_BCAST); + } else { + hdr->dst_node_id = cpu_to_le32(to->sq_node); + hdr->dst_port_id = cpu_to_le32(to->sq_port); + } hdr->size = cpu_to_le32(len); hdr->confirm_rx = 0; @@ -764,6 +769,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) node = NULL; if (addr->sq_node == QRTR_NODE_BCAST) { enqueue_fn = qrtr_bcast_enqueue; + if (addr->sq_port != QRTR_PORT_CTRL) { + release_sock(sk); + return -ENOTCONN; + } } else if (addr->sq_node == ipc->us.sq_node) { enqueue_fn = qrtr_local_enqueue; } else { diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 526a8e491626..6e7124e57918 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -91,7 +91,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } params_old = rtnl_dereference(p->params); - params_new->action = parm->action; + p->tcf_action = parm->action; params_new->update_flags = parm->update_flags; rcu_assign_pointer(p->params, params_new); if (params_old) @@ -561,7 +561,7 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&p->tcf_tm); bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb); - action = params->action; + action = READ_ONCE(p->tcf_action); if (unlikely(action == TC_ACT_SHOT)) goto drop_stats; @@ -599,11 +599,11 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, .index = p->tcf_index, .refcnt = p->tcf_refcnt - ref, .bindcnt = p->tcf_bindcnt - bind, + .action = p->tcf_action, }; struct tcf_t t; params = rtnl_dereference(p->params); - opt.action = params->action; opt.update_flags = params->update_flags; if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt)) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 626dac81a48a..9bc6c2ae98a5 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -36,7 +36,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&t->tcf_tm); bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); - action = params->action; + action = READ_ONCE(t->tcf_action); switch (params->tcft_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: @@ -182,7 +182,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, params_old = rtnl_dereference(t->params); - params_new->action = parm->action; + t->tcf_action = parm->action; params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; @@ -254,13 +254,13 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, .index = t->tcf_index, .refcnt = t->tcf_refcnt - ref, .bindcnt = t->tcf_bindcnt - bind, + .action = t->tcf_action, }; struct tcf_t tm; params = rtnl_dereference(t->params); opt.t_action = params->tcft_action; - opt.action = params->action; if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) goto nla_put_failure; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e017b6a4452b..5334157f5065 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -147,7 +147,8 @@ static int smc_release(struct socket *sock) smc->clcsock = NULL; } if (smc->use_fallback) { - sock_put(sk); /* passive closing */ + if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) + sock_put(sk); /* passive closing */ sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); } @@ -417,12 +418,18 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) { int rc; - if (reason_code < 0) /* error, fallback is not possible */ + if (reason_code < 0) { /* error, fallback is not possible */ + if (smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ return reason_code; + } if (reason_code != SMC_CLC_DECL_REPLY) { rc = smc_clc_send_decline(smc, reason_code); - if (rc < 0) + if (rc < 0) { + if (smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ return rc; + } } return smc_connect_fallback(smc); } @@ -435,8 +442,6 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code, smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); - if (reason_code < 0 && smc->sk.sk_state == SMC_INIT) - sock_put(&smc->sk); /* passive closing */ return reason_code; } diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index fa41d9881741..ac961dfb1ea1 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -107,6 +107,8 @@ static void smc_close_active_abort(struct smc_sock *smc) } switch (sk->sk_state) { case SMC_INIT: + sk->sk_state = SMC_PEERABORTWAIT; + break; case SMC_ACTIVE: sk->sk_state = SMC_PEERABORTWAIT; release_sock(sk); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 9f666e0650e2..2830709957bd 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -133,6 +133,8 @@ static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr, } /* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer + * Returns true if message should be dropped by caller, i.e., if it is a + * trial message or we are inside trial period. Otherwise false. */ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, struct tipc_media_addr *maddr, @@ -168,8 +170,9 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); } + /* Accept regular link requests/responses only after trial period */ if (mtyp != DSC_TRIAL_MSG) - return false; + return trial; sugg_addr = tipc_node_try_addr(net, peer_id, src); if (sugg_addr) @@ -284,7 +287,6 @@ static void tipc_disc_timeout(struct timer_list *t) { struct tipc_discoverer *d = from_timer(d, t, timer); struct tipc_net *tn = tipc_net(d->net); - u32 self = tipc_own_addr(d->net); struct tipc_media_addr maddr; struct sk_buff *skb = NULL; struct net *net = d->net; @@ -298,12 +300,14 @@ static void tipc_disc_timeout(struct timer_list *t) goto exit; } - /* Did we just leave the address trial period ? */ - if (!self && !time_before(jiffies, tn->addr_trial_end)) { - self = tn->trial_addr; - tipc_net_finalize(net, self); - msg_set_prevnode(buf_msg(d->skb), self); + /* Trial period over ? */ + if (!time_before(jiffies, tn->addr_trial_end)) { + /* Did we just leave it ? */ + if (!tipc_own_addr(net)) + tipc_net_finalize(net, tn->trial_addr); + msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); + msg_set_prevnode(buf_msg(d->skb), tipc_own_addr(net)); } /* Adjust timeout interval according to discovery phase */ diff --git a/net/tipc/net.c b/net/tipc/net.c index 4fbaa0464405..a7f6964c3a4b 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -121,12 +121,17 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr) void tipc_net_finalize(struct net *net, u32 addr) { - tipc_set_node_addr(net, addr); - smp_mb(); - tipc_named_reinit(net); - tipc_sk_reinit(net); - tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, - TIPC_CLUSTER_SCOPE, 0, addr); + struct tipc_net *tn = tipc_net(net); + + spin_lock_bh(&tn->node_list_lock); + if (!tipc_own_addr(net)) { + tipc_set_node_addr(net, addr); + tipc_named_reinit(net); + tipc_sk_reinit(net); + tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, + TIPC_CLUSTER_SCOPE, 0, addr); + } + spin_unlock_bh(&tn->node_list_lock); } void tipc_net_stop(struct net *net) diff --git a/net/tipc/node.c b/net/tipc/node.c index 6a44eb812baf..0453bd451ce8 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -797,6 +797,7 @@ static u32 tipc_node_suggest_addr(struct net *net, u32 addr) } /* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not + * Returns suggested address if any, otherwise 0 */ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) { @@ -819,12 +820,14 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) if (n) { addr = n->addr; tipc_node_put(n); + return addr; } - /* Even this node may be in trial phase */ + + /* Even this node may be in conflict */ if (tn->trial_addr == addr) return tipc_node_suggest_addr(net, addr); - return addr; + return 0; } void tipc_node_check_dest(struct net *net, u32 addr, diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 59fb7d3c36a3..7d220cbd09b6 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -199,8 +199,11 @@ static void xsk_destruct_skb(struct sk_buff *skb) { u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg; struct xdp_sock *xs = xdp_sk(skb->sk); + unsigned long flags; + spin_lock_irqsave(&xs->tx_completion_lock, flags); WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr)); + spin_unlock_irqrestore(&xs->tx_completion_lock, flags); sock_wfree(skb); } @@ -268,15 +271,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, skb->destructor = xsk_destruct_skb; err = dev_direct_xmit(skb, xs->queue_id); + xskq_discard_desc(xs->tx); /* Ignore NET_XMIT_CN as packet might have been sent */ if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) { - err = -EAGAIN; - /* SKB consumed by dev_direct_xmit() */ + /* SKB completed but not sent */ + err = -EBUSY; goto out; } sent_frame = true; - xskq_discard_desc(xs->tx); } out: @@ -755,6 +758,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, xs = xdp_sk(sk); mutex_init(&xs->mutex); + spin_lock_init(&xs->tx_completion_lock); local_bh_disable(); sock_prot_inuse_add(net, &xsk_proto, 1); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index ef6a6f0ec949..52ecaf770642 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -62,14 +62,9 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt) return (entries > dcnt) ? dcnt : entries; } -static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer) -{ - return q->nentries - (producer - q->cons_tail); -} - static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) { - u32 free_entries = xskq_nb_free_lazy(q, producer); + u32 free_entries = q->nentries - (producer - q->cons_tail); if (free_entries >= dcnt) return free_entries; @@ -129,7 +124,7 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; - if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0) + if (xskq_nb_free(q, q->prod_tail, 1) == 0) return -ENOSPC; ring->desc[q->prod_tail++ & q->ring_mask] = addr; diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore new file mode 100644 index 000000000000..8ae4940025f8 --- /dev/null +++ b/samples/bpf/.gitignore @@ -0,0 +1,49 @@ +cpustat +fds_example +lathist +load_sock_ops +lwt_len_hist +map_perf_test +offwaketime +per_socket_stats_example +sampleip +sock_example +sockex1 +sockex2 +sockex3 +spintest +syscall_nrs.h +syscall_tp +task_fd_query +tc_l2_redirect +test_cgrp2_array_pin +test_cgrp2_attach +test_cgrp2_attach2 +test_cgrp2_sock +test_cgrp2_sock2 +test_current_task_under_cgroup +test_lru_dist +test_map_in_map +test_overhead +test_probe_write_user +trace_event +trace_output +tracex1 +tracex2 +tracex3 +tracex4 +tracex5 +tracex6 +tracex7 +xdp1 +xdp2 +xdp_adjust_tail +xdp_fwd +xdp_monitor +xdp_redirect +xdp_redirect_cpu +xdp_redirect_map +xdp_router_ipv4 +xdp_rxq_info +xdp_tx_iptunnel +xdpsock diff --git a/samples/bpf/parse_varlen.c b/samples/bpf/parse_varlen.c index 95c16324760c..0b6f22feb2c9 100644 --- a/samples/bpf/parse_varlen.c +++ b/samples/bpf/parse_varlen.c @@ -6,6 +6,7 @@ */ #define KBUILD_MODNAME "foo" #include <linux/if_ether.h> +#include <linux/if_vlan.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/in.h> @@ -108,11 +109,6 @@ static int parse_ipv6(void *data, uint64_t nh_off, void *data_end) return 0; } -struct vlan_hdr { - uint16_t h_vlan_TCI; - uint16_t h_vlan_encapsulated_proto; -}; - SEC("varlen") int handle_ingress(struct __sk_buff *skb) { diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c index 6caf47afa635..9d6dcaa9db92 100644 --- a/samples/bpf/test_overhead_user.c +++ b/samples/bpf/test_overhead_user.c @@ -6,6 +6,7 @@ */ #define _GNU_SOURCE #include <sched.h> +#include <errno.h> #include <stdio.h> #include <sys/types.h> #include <asm/unistd.h> @@ -44,8 +45,13 @@ static void test_task_rename(int cpu) exit(1); } start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) - write(fd, buf, sizeof(buf)); + for (i = 0; i < MAX_CNT; i++) { + if (write(fd, buf, sizeof(buf)) < 0) { + printf("task rename failed: %s\n", strerror(errno)); + close(fd); + return; + } + } printf("task_rename:%d: %lld events per sec\n", cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); close(fd); @@ -63,8 +69,13 @@ static void test_urandom_read(int cpu) exit(1); } start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) - read(fd, buf, sizeof(buf)); + for (i = 0; i < MAX_CNT; i++) { + if (read(fd, buf, sizeof(buf)) < 0) { + printf("failed to read from /dev/urandom: %s\n", strerror(errno)); + close(fd); + return; + } + } printf("urandom_read:%d: %lld events per sec\n", cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); close(fd); diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 1fa1becfa641..d08046ab81f0 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -122,6 +122,16 @@ static void print_stacks(void) } } +static inline int generate_load(void) +{ + if (system("dd if=/dev/zero of=/dev/null count=5000k status=none") < 0) { + printf("failed to generate some load with dd: %s\n", strerror(errno)); + return -1; + } + + return 0; +} + static void test_perf_event_all_cpu(struct perf_event_attr *attr) { int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); @@ -142,7 +152,11 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0); } - system("dd if=/dev/zero of=/dev/null count=5000k status=none"); + + if (generate_load() < 0) { + error = 1; + goto all_cpu_err; + } print_stacks(); all_cpu_err: for (i--; i >= 0; i--) { @@ -156,7 +170,7 @@ all_cpu_err: static void test_perf_event_task(struct perf_event_attr *attr) { - int pmu_fd; + int pmu_fd, error = 0; /* per task perf event, enable inherit so the "dd ..." command can be traced properly. * Enabling inherit will cause bpf_perf_prog_read_time helper failure. @@ -171,10 +185,17 @@ static void test_perf_event_task(struct perf_event_attr *attr) } assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0); - system("dd if=/dev/zero of=/dev/null count=5000k status=none"); + + if (generate_load() < 0) { + error = 1; + goto err; + } print_stacks(); +err: ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); close(pmu_fd); + if (error) + int_exit(0); } static void test_bpf_perf_event(void) diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index d69c8d78d3fd..5904b1543831 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -729,7 +729,7 @@ static void kick_tx(int fd) int ret; ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0); - if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN) + if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY) return; lassert(0); } |