diff options
127 files changed, 1802 insertions, 544 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 8c68de3cfd80..825dc2b7453d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2848,6 +2848,9 @@ F:	include/uapi/linux/if_bonding.h  BPF (Safe dynamic programs and tools)  M:	Alexei Starovoitov <ast@kernel.org>  M:	Daniel Borkmann <daniel@iogearbox.net> +R:	Martin KaFai Lau <kafai@fb.com> +R:	Song Liu <songliubraving@fb.com> +R:	Yonghong Song <yhs@fb.com>  L:	netdev@vger.kernel.org  L:	linux-kernel@vger.kernel.org  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git @@ -2873,6 +2876,8 @@ F:	samples/bpf/  F:	tools/bpf/  F:	tools/lib/bpf/  F:	tools/testing/selftests/bpf/ +K:	bpf +N:	bpf  BPF JIT for ARM  M:	Shubham Bansal <illusionist.neo@gmail.com> @@ -12868,6 +12873,13 @@ F:	Documentation/devicetree/bindings/net/dsa/realtek-smi.txt  F:	drivers/net/dsa/realtek-smi*  F:	drivers/net/dsa/rtl83* +REDPINE WIRELESS DRIVER +M:	Amitkumar Karwar <amitkarwar@gmail.com> +M:	Siva Rebbagondla <siva8118@gmail.com> +L:	linux-wireless@vger.kernel.org +S:	Maintained +F:	drivers/net/wireless/rsi/ +  REGISTER MAP ABSTRACTION  M:	Mark Brown <broonie@kernel.org>  L:	linux-kernel@vger.kernel.org @@ -13696,6 +13708,15 @@ L:	netdev@vger.kernel.org  S:	Supported  F:	drivers/net/ethernet/sfc/ +SFF/SFP/SFP+ MODULE SUPPORT +M:	Russell King <linux@armlinux.org.uk> +L:	netdev@vger.kernel.org +S:	Maintained +F:	drivers/net/phy/phylink.c +F:	drivers/net/phy/sfp* +F:	include/linux/phylink.h +F:	include/linux/sfp.h +  SGI GRU DRIVER  M:	Dimitri Sivanich <sivanich@sgi.com>  S:	Maintained diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c index 211ed6cffd10..578978711887 100644 --- a/drivers/isdn/mISDN/timerdev.c +++ b/drivers/isdn/mISDN/timerdev.c @@ -170,8 +170,8 @@ dev_expire_timer(struct timer_list *t)  	spin_lock_irqsave(&timer->dev->lock, flags);  	if (timer->id >= 0)  		list_move_tail(&timer->list, &timer->dev->expired); -	spin_unlock_irqrestore(&timer->dev->lock, flags);  	wake_up_interruptible(&timer->dev->wait); +	spin_unlock_irqrestore(&timer->dev->lock, flags);  }  static int diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index 90f514252987..d9c56a779c08 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -511,9 +511,6 @@ static void b53_srab_prepare_irq(struct platform_device *pdev)  	/* Clear all pending interrupts */  	writel(0xffffffff, priv->regs + B53_SRAB_INTR); -	if (dev->pdata && dev->pdata->chip_id != BCM58XX_DEVICE_ID) -		return; -  	for (i = 0; i < B53_N_PORTS; i++) {  		port = &priv->port_intrs[i]; diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c index 5200e4bdce93..ea243840ee0f 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_atu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c @@ -314,6 +314,7 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)  {  	struct mv88e6xxx_chip *chip = dev_id;  	struct mv88e6xxx_atu_entry entry; +	int spid;  	int err;  	u16 val; @@ -336,6 +337,8 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)  	if (err)  		goto out; +	spid = entry.state; +  	if (val & MV88E6XXX_G1_ATU_OP_AGE_OUT_VIOLATION) {  		dev_err_ratelimited(chip->dev,  				    "ATU age out violation for %pM\n", @@ -344,23 +347,23 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)  	if (val & MV88E6XXX_G1_ATU_OP_MEMBER_VIOLATION) {  		dev_err_ratelimited(chip->dev, -				    "ATU member violation for %pM portvec %x\n", -				    entry.mac, entry.portvec); -		chip->ports[entry.portvec].atu_member_violation++; +				    "ATU member violation for %pM portvec %x spid %d\n", +				    entry.mac, entry.portvec, spid); +		chip->ports[spid].atu_member_violation++;  	}  	if (val & MV88E6XXX_G1_ATU_OP_MISS_VIOLATION) {  		dev_err_ratelimited(chip->dev, -				    "ATU miss violation for %pM portvec %x\n", -				    entry.mac, entry.portvec); -		chip->ports[entry.portvec].atu_miss_violation++; +				    "ATU miss violation for %pM portvec %x spid %d\n", +				    entry.mac, entry.portvec, spid); +		chip->ports[spid].atu_miss_violation++;  	}  	if (val & MV88E6XXX_G1_ATU_OP_FULL_VIOLATION) {  		dev_err_ratelimited(chip->dev, -				    "ATU full violation for %pM portvec %x\n", -				    entry.mac, entry.portvec); -		chip->ports[entry.portvec].atu_full_violation++; +				    "ATU full violation for %pM portvec %x spid %d\n", +				    entry.mac, entry.portvec, spid); +		chip->ports[spid].atu_full_violation++;  	}  	mutex_unlock(&chip->reg_lock); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f9521d0274b7..28c9b0bdf2f6 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -520,7 +520,6 @@ static void bcm_sysport_get_wol(struct net_device *dev,  				struct ethtool_wolinfo *wol)  {  	struct bcm_sysport_priv *priv = netdev_priv(dev); -	u32 reg;  	wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;  	wol->wolopts = priv->wolopts; @@ -528,11 +527,7 @@ static void bcm_sysport_get_wol(struct net_device *dev,  	if (!(priv->wolopts & WAKE_MAGICSECURE))  		return; -	/* Return the programmed SecureOn password */ -	reg = umac_readl(priv, UMAC_PSW_MS); -	put_unaligned_be16(reg, &wol->sopass[0]); -	reg = umac_readl(priv, UMAC_PSW_LS); -	put_unaligned_be32(reg, &wol->sopass[2]); +	memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass));  }  static int bcm_sysport_set_wol(struct net_device *dev, @@ -548,13 +543,8 @@ static int bcm_sysport_set_wol(struct net_device *dev,  	if (wol->wolopts & ~supported)  		return -EINVAL; -	/* Program the SecureOn password */ -	if (wol->wolopts & WAKE_MAGICSECURE) { -		umac_writel(priv, get_unaligned_be16(&wol->sopass[0]), -			    UMAC_PSW_MS); -		umac_writel(priv, get_unaligned_be32(&wol->sopass[2]), -			    UMAC_PSW_LS); -	} +	if (wol->wolopts & WAKE_MAGICSECURE) +		memcpy(priv->sopass, wol->sopass, sizeof(priv->sopass));  	/* Flag the device and relevant IRQ as wakeup capable */  	if (wol->wolopts) { @@ -2649,13 +2639,18 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv)  	unsigned int index, i = 0;  	u32 reg; -	/* Password has already been programmed */  	reg = umac_readl(priv, UMAC_MPD_CTRL);  	if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE))  		reg |= MPD_EN;  	reg &= ~PSW_EN; -	if (priv->wolopts & WAKE_MAGICSECURE) +	if (priv->wolopts & WAKE_MAGICSECURE) { +		/* Program the SecureOn password */ +		umac_writel(priv, get_unaligned_be16(&priv->sopass[0]), +			    UMAC_PSW_MS); +		umac_writel(priv, get_unaligned_be32(&priv->sopass[2]), +			    UMAC_PSW_LS);  		reg |= PSW_EN; +	}  	umac_writel(priv, reg, UMAC_MPD_CTRL);  	if (priv->wolopts & WAKE_FILTER) { diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 0887e6356649..0b192fea9c5d 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -12,6 +12,7 @@  #define __BCM_SYSPORT_H  #include <linux/bitmap.h> +#include <linux/ethtool.h>  #include <linux/if_vlan.h>  #include <linux/net_dim.h> @@ -778,6 +779,7 @@ struct bcm_sysport_priv {  	unsigned int		crc_fwd:1;  	u16			rev;  	u32			wolopts; +	u8			sopass[SOPASS_MAX];  	unsigned int		wol_irq_disabled:1;  	/* MIB related fields */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6a512871176b..8bc7e495b027 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4973,12 +4973,18 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)  		struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;  		struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;  		u32 map_idx = ring->map_idx; +		unsigned int vector; +		vector = bp->irq_tbl[map_idx].vector; +		disable_irq_nosync(vector);  		rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx); -		if (rc) +		if (rc) { +			enable_irq(vector);  			goto err_out; +		}  		bnxt_set_db(bp, &cpr->cp_db, type, map_idx, ring->fw_ring_id);  		bnxt_db_nq(bp, &cpr->cp_db, cpr->cp_raw_cons); +		enable_irq(vector);  		bp->grp_info[i].cp_fw_ring_id = ring->fw_ring_id;  		if (!i) { diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 5db9f4158e62..134ae2862efa 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -1288,7 +1288,7 @@ static void sbdma_tx_process(struct sbmac_softc *sc, struct sbmacdma *d,  		 * for transmits, we just free buffers.  		 */ -		dev_kfree_skb_irq(sb); +		dev_consume_skb_irq(sb);  		/*  		 * .. and advance to the next buffer. diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index 5f03199a3acf..05f4a3b21e29 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -54,7 +54,6 @@ config CAVIUM_PTP  	tristate "Cavium PTP coprocessor as PTP clock"  	depends on 64BIT && PCI  	imply PTP_1588_CLOCK -	default y  	---help---  	  This driver adds support for the Precision Time Protocol Clocks and  	  Timestamping coprocessor (PTP) found on Cavium processors. diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 60641e202534..9a7f70db20c7 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1434,7 +1434,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,  		 * csum is correct or is zero.  		 */  		if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && -		    tcp_udp_csum_ok && ipv4_csum_ok && outer_csum_ok) { +		    tcp_udp_csum_ok && outer_csum_ok && +		    (ipv4_csum_ok || ipv6)) {  			skb->ip_summed = CHECKSUM_UNNECESSARY;  			skb->csum_level = encap;  		} diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index 13430f75496c..f1a2da15dd0a 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -585,7 +585,7 @@ static void de_tx (struct de_private *de)  				netif_dbg(de, tx_done, de->dev,  					  "tx done, slot %d\n", tx_tail);  			} -			dev_kfree_skb_irq(skb); +			dev_consume_skb_irq(skb);  		}  next: diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index b90bab72efdb..c1968b3ecec8 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -369,7 +369,7 @@ static irqreturn_t mpc52xx_fec_tx_interrupt(int irq, void *dev_id)  		dma_unmap_single(dev->dev.parent, bd->skb_pa, skb->len,  				 DMA_TO_DEVICE); -		dev_kfree_skb_irq(skb); +		dev_consume_skb_irq(skb);  	}  	spin_unlock(&priv->lock); diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index c3d539e209ed..eb3e65e8868f 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1879,6 +1879,8 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth)  	u16 i, j;  	u8 __iomem *bd; +	netdev_reset_queue(ugeth->ndev); +  	ug_info = ugeth->ug_info;  	uf_info = &ug_info->uf_info; diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 04fd1f135011..654ac534b10e 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -152,8 +152,10 @@ static void skge_get_regs(struct net_device *dev, struct ethtool_regs *regs,  	memset(p, 0, regs->len);  	memcpy_fromio(p, io, B3_RAM_ADDR); -	memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1, -		      regs->len - B3_RI_WTO_R1); +	if (regs->len > B3_RI_WTO_R1) { +		memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1, +			      regs->len - B3_RI_WTO_R1); +	}  }  /* Wake on Lan only supported on Yukon chips with rev 1 or above */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 046948ead152..f3c7ab6faea5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -256,6 +256,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,  	e->m_neigh.family = n->ops->family;  	memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);  	e->out_dev = out_dev; +	e->route_dev = route_dev;  	/* It's important to add the neigh to the hash table before checking  	 * the neigh validity state. So if we'll get a notification, in case the @@ -369,6 +370,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,  	e->m_neigh.family = n->ops->family;  	memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);  	e->out_dev = out_dev; +	e->route_dev = route_dev;  	/* It's importent to add the neigh to the hash table before checking  	 * the neigh validity state. So if we'll get a notification, in case the @@ -612,16 +614,18 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,  		       struct mlx5_flow_spec *spec,  		       struct tc_cls_flower_offload *f,  		       void *headers_c, -		       void *headers_v) +		       void *headers_v, u8 *match_level)  {  	int tunnel_type;  	int err = 0;  	tunnel_type = mlx5e_tc_tun_get_type(filter_dev);  	if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { +		*match_level = MLX5_MATCH_L4;  		err = mlx5e_tc_tun_parse_vxlan(priv, spec, f,  					       headers_c, headers_v);  	} else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { +		*match_level = MLX5_MATCH_L3;  		err = mlx5e_tc_tun_parse_gretap(priv, spec, f,  						headers_c, headers_v);  	} else { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 706ce7bf15e7..b63f15de899d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -39,6 +39,6 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,  		       struct mlx5_flow_spec *spec,  		       struct tc_cls_flower_offload *f,  		       void *headers_c, -		       void *headers_v); +		       void *headers_v, u8 *match_level);  #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index f2573c2d2b5c..ef9e472daffb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -596,6 +596,10 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,  	if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {  		ether_addr_copy(e->h_dest, ha);  		ether_addr_copy(eth->h_dest, ha); +		/* Update the encap source mac, in case that we delete +		 * the flows when encap source mac changed. +		 */ +		ether_addr_copy(eth->h_source, e->route_dev->dev_addr);  		mlx5e_tc_encap_flows_add(priv, e);  	} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index edd722824697..36eafc877e6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -148,6 +148,7 @@ struct mlx5e_encap_entry {  	unsigned char h_dest[ETH_ALEN];	/* destination eth addr	*/  	struct net_device *out_dev; +	struct net_device *route_dev;  	int tunnel_type;  	int tunnel_hlen;  	int reformat_type; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cae6c6d48984..b5c1b039375a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -128,6 +128,7 @@ struct mlx5e_tc_flow_parse_attr {  	struct net_device *filter_dev;  	struct mlx5_flow_spec spec;  	int num_mod_hdr_actions; +	int max_mod_hdr_actions;  	void *mod_hdr_actions;  	int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];  }; @@ -1302,7 +1303,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,  static int parse_tunnel_attr(struct mlx5e_priv *priv,  			     struct mlx5_flow_spec *spec,  			     struct tc_cls_flower_offload *f, -			     struct net_device *filter_dev) +			     struct net_device *filter_dev, u8 *match_level)  {  	struct netlink_ext_ack *extack = f->common.extack;  	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1317,7 +1318,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,  	int err = 0;  	err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, -				 headers_c, headers_v); +				 headers_c, headers_v, match_level);  	if (err) {  		NL_SET_ERR_MSG_MOD(extack,  				   "failed to parse tunnel attributes"); @@ -1426,7 +1427,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,  			      struct mlx5_flow_spec *spec,  			      struct tc_cls_flower_offload *f,  			      struct net_device *filter_dev, -			      u8 *match_level) +			      u8 *match_level, u8 *tunnel_match_level)  {  	struct netlink_ext_ack *extack = f->common.extack;  	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1477,7 +1478,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,  		switch (key->addr_type) {  		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:  		case FLOW_DISSECTOR_KEY_IPV6_ADDRS: -			if (parse_tunnel_attr(priv, spec, f, filter_dev)) +			if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level))  				return -EOPNOTSUPP;  			break;  		default: @@ -1826,11 +1827,11 @@ static int parse_cls_flower(struct mlx5e_priv *priv,  	struct mlx5_core_dev *dev = priv->mdev;  	struct mlx5_eswitch *esw = dev->priv.eswitch;  	struct mlx5e_rep_priv *rpriv = priv->ppriv; +	u8 match_level, tunnel_match_level = MLX5_MATCH_NONE;  	struct mlx5_eswitch_rep *rep; -	u8 match_level;  	int err; -	err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level); +	err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level);  	if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {  		rep = rpriv->rep; @@ -1846,10 +1847,12 @@ static int parse_cls_flower(struct mlx5e_priv *priv,  		}  	} -	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) +	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {  		flow->esw_attr->match_level = match_level; -	else +		flow->esw_attr->tunnel_match_level = tunnel_match_level; +	} else {  		flow->nic_attr->match_level = match_level; +	}  	return err;  } @@ -1934,9 +1937,9 @@ static struct mlx5_fields fields[] = {  	OFFLOAD(UDP_DPORT, 2, udp.dest,   0),  }; -/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, it says how many HW actions were - * actually parsed. +/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at + * max from the SW pedit action. On success, attr->num_mod_hdr_actions + * says how many HW actions were actually parsed.   */  static int offload_pedit_fields(struct pedit_headers *masks,  				struct pedit_headers *vals, @@ -1960,9 +1963,11 @@ static int offload_pedit_fields(struct pedit_headers *masks,  	add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];  	action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); -	action = parse_attr->mod_hdr_actions; -	max_actions = parse_attr->num_mod_hdr_actions; -	nactions = 0; +	action = parse_attr->mod_hdr_actions + +		 parse_attr->num_mod_hdr_actions * action_size; + +	max_actions = parse_attr->max_mod_hdr_actions; +	nactions = parse_attr->num_mod_hdr_actions;  	for (i = 0; i < ARRAY_SIZE(fields); i++) {  		f = &fields[i]; @@ -2073,7 +2078,7 @@ static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,  	if (!parse_attr->mod_hdr_actions)  		return -ENOMEM; -	parse_attr->num_mod_hdr_actions = max_actions; +	parse_attr->max_mod_hdr_actions = max_actions;  	return 0;  } @@ -2119,9 +2124,11 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv,  			goto out_err;  	} -	err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); -	if (err) -		goto out_err; +	if (!parse_attr->mod_hdr_actions) { +		err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); +		if (err) +			goto out_err; +	}  	err = offload_pedit_fields(masks, vals, parse_attr, extack);  	if (err < 0) @@ -2179,6 +2186,7 @@ static bool csum_offload_supported(struct mlx5e_priv *priv,  static bool modify_header_match_supported(struct mlx5_flow_spec *spec,  					  struct tcf_exts *exts, +					  u32 actions,  					  struct netlink_ext_ack *extack)  {  	const struct tc_action *a; @@ -2188,7 +2196,11 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,  	u16 ethertype;  	int nkeys, i; -	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); +	if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) +		headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); +	else +		headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); +  	ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);  	/* for non-IP we only re-write MACs, so we're okay */ @@ -2245,7 +2257,7 @@ static bool actions_match_supported(struct mlx5e_priv *priv,  	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)  		return modify_header_match_supported(&parse_attr->spec, exts, -						     extack); +						     actions, extack);  	return true;  } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 598ad7e4d5c9..0e55cd1f2e98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -387,8 +387,14 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,  	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);  	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);  	if (unlikely(contig_wqebbs_room < num_wqebbs)) { +#ifdef CONFIG_MLX5_EN_IPSEC +		struct mlx5_wqe_eth_seg cur_eth = wqe->eth; +#endif  		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);  		mlx5e_sq_fetch_wqe(sq, &wqe, &pi); +#ifdef CONFIG_MLX5_EN_IPSEC +		wqe->eth = cur_eth; +#endif  	}  	/* fill wqe */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9c89eea9b2c3..748ff178a1d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -312,6 +312,7 @@ struct mlx5_esw_flow_attr {  	} dests[MLX5_MAX_FLOW_FWD_VPORTS];  	u32	mod_hdr_id;  	u8	match_level; +	u8	tunnel_match_level;  	struct mlx5_fc *counter;  	u32	chain;  	u16	prio; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 53065b6ae593..d4e6fe5b9300 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -160,14 +160,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,  		MLX5_SET_TO_ONES(fte_match_set_misc, misc,  				 source_eswitch_owner_vhca_id); -	if (attr->match_level == MLX5_MATCH_NONE) -		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; -	else -		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | -					      MLX5_MATCH_MISC_PARAMETERS; - -	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) -		spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; +	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; +	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { +		if (attr->tunnel_match_level != MLX5_MATCH_NONE) +			spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; +		if (attr->match_level != MLX5_MATCH_NONE) +			spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; +	} else if (attr->match_level != MLX5_MATCH_NONE) { +		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; +	}  	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)  		flow_act.modify_id = attr->mod_hdr_id; diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 24a90163775e..2d8a77cc156b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -53,7 +53,7 @@  extern const struct qed_common_ops qed_common_ops_pass;  #define QED_MAJOR_VERSION		8 -#define QED_MINOR_VERSION		33 +#define QED_MINOR_VERSION		37  #define QED_REVISION_VERSION		0  #define QED_ENGINEERING_VERSION		20 diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index e68ca83ae915..58be1c4c6668 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2216,7 +2216,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,  			u16 num_queues = 0;  			/* Since the feature controls only queue-zones, -			 * make sure we have the contexts [rx, tx, xdp] to +			 * make sure we have the contexts [rx, xdp, tcs] to  			 * match.  			 */  			for_each_hwfn(cdev, i) { @@ -2226,7 +2226,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,  				u16 cids;  				cids = hwfn->pf_params.eth_pf_params.num_cons; -				num_queues += min_t(u16, l2_queues, cids / 3); +				cids /= (2 + info->num_tc); +				num_queues += min_t(u16, l2_queues, cids);  			}  			/* queues might theoretically be >256, but interrupts' @@ -2870,7 +2871,8 @@ static int qed_get_coalesce(struct qed_dev *cdev, u16 *coal, void *handle)  	p_hwfn = p_cid->p_owner;  	rc = qed_get_queue_coalesce(p_hwfn, coal, handle);  	if (rc) -		DP_NOTICE(p_hwfn, "Unable to read queue coalescing\n"); +		DP_VERBOSE(cdev, QED_MSG_DEBUG, +			   "Unable to read queue coalescing\n");  	return rc;  } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 4179c9013fc6..96ab77ae6af5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -382,6 +382,7 @@ void qed_consq_setup(struct qed_hwfn *p_hwfn);   * @param p_hwfn   */  void qed_consq_free(struct qed_hwfn *p_hwfn); +int qed_spq_pend_post(struct qed_hwfn *p_hwfn);  /**   * @file diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 888274fa208b..5a495fda9e9d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -604,6 +604,9 @@ int qed_sp_pf_update_stag(struct qed_hwfn *p_hwfn)  	p_ent->ramrod.pf_update.update_mf_vlan_flag = true;  	p_ent->ramrod.pf_update.mf_vlan = cpu_to_le16(p_hwfn->hw_info.ovlan); +	if (test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits)) +		p_ent->ramrod.pf_update.mf_vlan |= +			cpu_to_le16(((u16)p_hwfn->ufp_info.tc << 13));  	return qed_spq_post(p_hwfn, p_ent, NULL);  } diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index eb88bbc6b193..ba64ff9bedbd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -397,6 +397,11 @@ int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie)  	qed_eq_prod_update(p_hwfn, qed_chain_get_prod_idx(p_chain)); +	/* Attempt to post pending requests */ +	spin_lock_bh(&p_hwfn->p_spq->lock); +	rc = qed_spq_pend_post(p_hwfn); +	spin_unlock_bh(&p_hwfn->p_spq->lock); +  	return rc;  } @@ -767,7 +772,7 @@ static int qed_spq_post_list(struct qed_hwfn *p_hwfn,  	return 0;  } -static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) +int qed_spq_pend_post(struct qed_hwfn *p_hwfn)  {  	struct qed_spq *p_spq = p_hwfn->p_spq;  	struct qed_spq_entry *p_ent = NULL; @@ -905,7 +910,6 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,  	struct qed_spq_entry	*p_ent = NULL;  	struct qed_spq_entry	*tmp;  	struct qed_spq_entry	*found = NULL; -	int			rc;  	if (!p_hwfn)  		return -EINVAL; @@ -963,12 +967,7 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,  		 */  		qed_spq_return_entry(p_hwfn, found); -	/* Attempt to post pending requests */ -	spin_lock_bh(&p_spq->lock); -	rc = qed_spq_pend_post(p_hwfn); -	spin_unlock_bh(&p_spq->lock); - -	return rc; +	return 0;  }  int qed_consq_alloc(struct qed_hwfn *p_hwfn) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 613249d1e967..730997b13747 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -56,7 +56,7 @@  #include <net/tc_act/tc_gact.h>  #define QEDE_MAJOR_VERSION		8 -#define QEDE_MINOR_VERSION		33 +#define QEDE_MINOR_VERSION		37  #define QEDE_REVISION_VERSION		0  #define QEDE_ENGINEERING_VERSION	20  #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "."	\ @@ -494,6 +494,9 @@ struct qede_reload_args {  /* Datapath functions definition */  netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev); +u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, +		      struct net_device *sb_dev, +		      select_queue_fallback_t fallback);  netdev_features_t qede_features_check(struct sk_buff *skb,  				      struct net_device *dev,  				      netdev_features_t features); diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index bdf816fe5a16..31b046e24565 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1695,6 +1695,19 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev)  	return NETDEV_TX_OK;  } +u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, +		      struct net_device *sb_dev, +		      select_queue_fallback_t fallback) +{ +	struct qede_dev *edev = netdev_priv(dev); +	int total_txq; + +	total_txq = QEDE_TSS_COUNT(edev) * edev->dev_info.num_tc; + +	return QEDE_TSS_COUNT(edev) ? +		fallback(dev, skb, NULL) % total_txq :  0; +} +  /* 8B udp header + 8B base tunnel header + 32B option length */  #define QEDE_MAX_TUN_HDR_LEN 48 diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5a74fcbdbc2b..9790f26d17c4 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -631,6 +631,7 @@ static const struct net_device_ops qede_netdev_ops = {  	.ndo_open = qede_open,  	.ndo_stop = qede_close,  	.ndo_start_xmit = qede_start_xmit, +	.ndo_select_queue = qede_select_queue,  	.ndo_set_rx_mode = qede_set_rx_mode,  	.ndo_set_mac_address = qede_set_mac_addr,  	.ndo_validate_addr = eth_validate_addr, @@ -666,6 +667,7 @@ static const struct net_device_ops qede_netdev_vf_ops = {  	.ndo_open = qede_open,  	.ndo_stop = qede_close,  	.ndo_start_xmit = qede_start_xmit, +	.ndo_select_queue = qede_select_queue,  	.ndo_set_rx_mode = qede_set_rx_mode,  	.ndo_set_mac_address = qede_set_mac_addr,  	.ndo_validate_addr = eth_validate_addr, @@ -684,6 +686,7 @@ static const struct net_device_ops qede_netdev_vf_xdp_ops = {  	.ndo_open = qede_open,  	.ndo_stop = qede_close,  	.ndo_start_xmit = qede_start_xmit, +	.ndo_select_queue = qede_select_queue,  	.ndo_set_rx_mode = qede_set_rx_mode,  	.ndo_set_mac_address = qede_set_mac_addr,  	.ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 15c62c160953..be47d864f8b9 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -1037,7 +1037,7 @@ static void epic_tx(struct net_device *dev, struct epic_private *ep)  		skb = ep->tx_skbuff[entry];  		pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,  				 skb->len, PCI_DMA_TODEVICE); -		dev_kfree_skb_irq(skb); +		dev_consume_skb_irq(skb);  		ep->tx_skbuff[entry] = NULL;  	} diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index d1f61c25d82b..5d85742a2be0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -721,8 +721,11 @@ static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv)  {  	unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); -	if (!clk) -		return 0; +	if (!clk) { +		clk = priv->plat->clk_ref_rate; +		if (!clk) +			return 0; +	}  	return (usec * (clk / 1000000)) / 256;  } @@ -731,8 +734,11 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv)  {  	unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); -	if (!clk) -		return 0; +	if (!clk) { +		clk = priv->plat->clk_ref_rate; +		if (!clk) +			return 0; +	}  	return (riwt * 256) / (clk / 1000000);  } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5afba69981cf..685d20472358 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3023,10 +3023,22 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)  	tx_q = &priv->tx_queue[queue]; +	if (priv->tx_path_in_lpi_mode) +		stmmac_disable_eee_mode(priv); +  	/* Manage oversized TCP frames for GMAC4 device */  	if (skb_is_gso(skb) && priv->tso) { -		if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) +		if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { +			/* +			 * There is no way to determine the number of TSO +			 * capable Queues. Let's use always the Queue 0 +			 * because if TSO is supported then at least this +			 * one will be capable. +			 */ +			skb_set_queue_mapping(skb, 0); +  			return stmmac_tso_xmit(skb, dev); +		}  	}  	if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { @@ -3041,9 +3053,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)  		return NETDEV_TX_BUSY;  	} -	if (priv->tx_path_in_lpi_mode) -		stmmac_disable_eee_mode(priv); -  	entry = tx_q->cur_tx;  	first_entry = entry;  	WARN_ON(tx_q->tx_skbuff[first_entry]); diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 7ec4eb74fe21..6fc05c106afc 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -1898,7 +1898,7 @@ static inline void cas_tx_ringN(struct cas *cp, int ring, int limit)  		cp->net_stats[ring].tx_packets++;  		cp->net_stats[ring].tx_bytes += skb->len;  		spin_unlock(&cp->stat_lock[ring]); -		dev_kfree_skb_irq(skb); +		dev_consume_skb_irq(skb);  	}  	cp->tx_old[ring] = entry; diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index 720b7ac77f3b..e9b757b03b56 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -781,7 +781,7 @@ static void bigmac_tx(struct bigmac *bp)  		DTX(("skb(%p) ", skb));  		bp->tx_skbs[elem] = NULL; -		dev_kfree_skb_irq(skb); +		dev_consume_skb_irq(skb);  		elem = NEXT_TX(elem);  	} diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index ff641cf30a4e..d007dfeba5c3 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -1962,7 +1962,7 @@ static void happy_meal_tx(struct happy_meal *hp)  			this = &txbase[elem];  		} -		dev_kfree_skb_irq(skb); +		dev_consume_skb_irq(skb);  		dev->stats.tx_packets++;  	}  	hp->tx_old = elem; diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index dc966ddb6d81..b24c11187017 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -1739,7 +1739,7 @@ static void bdx_tx_cleanup(struct bdx_priv *priv)  		tx_level -= db->rptr->len;	/* '-' koz len is negative */  		/* now should come skb pointer - free it */ -		dev_kfree_skb_irq(db->rptr->addr.skb); +		dev_consume_skb_irq(db->rptr->addr.skb);  		bdx_tx_db_inc_rptr(db);  	} diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 82412691ee66..27f6cf140845 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -1740,7 +1740,7 @@ static void velocity_free_tx_buf(struct velocity_info *vptr,  		dma_unmap_single(vptr->dev, tdinfo->skb_dma[i],  				 le16_to_cpu(pktlen), DMA_TO_DEVICE);  	} -	dev_kfree_skb_irq(skb); +	dev_consume_skb_irq(skb);  	tdinfo->skb = NULL;  } diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c index 38ac8ef41f5f..56b7791911bf 100644 --- a/drivers/net/fddi/defxx.c +++ b/drivers/net/fddi/defxx.c @@ -3512,7 +3512,7 @@ static int dfx_xmt_done(DFX_board_t *bp)  				 bp->descr_block_virt->xmt_data[comp].long_1,  				 p_xmt_drv_descr->p_skb->len,  				 DMA_TO_DEVICE); -		dev_kfree_skb_irq(p_xmt_drv_descr->p_skb); +		dev_consume_skb_irq(p_xmt_drv_descr->p_skb);  		/*  		 * Move to start of next packet by updating completion index diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 58bbba8582b0..3377ac66a347 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1512,9 +1512,13 @@ static void geneve_link_config(struct net_device *dev,  	}  #if IS_ENABLED(CONFIG_IPV6)  	case AF_INET6: { -		struct rt6_info *rt = rt6_lookup(geneve->net, -						 &info->key.u.ipv6.dst, NULL, 0, -						 NULL, 0); +		struct rt6_info *rt; + +		if (!__in6_dev_get(dev)) +			break; + +		rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, +				NULL, 0);  		if (rt && rt->dst.dev)  			ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 44de81e5f140..c589f5ae75bb 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -905,9 +905,9 @@ mcr20a_irq_clean_complete(void *context)  		}  		break;  	case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_SEQIRQ): -			/* rx is starting */ -			dev_dbg(printdev(lp), "RX is starting\n"); -			mcr20a_handle_rx(lp); +		/* rx is starting */ +		dev_dbg(printdev(lp), "RX is starting\n"); +		mcr20a_handle_rx(lp);  		break;  	case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_TXIRQ | DAR_IRQSTS1_SEQIRQ):  		if (lp->is_tx) { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 19bdde60680c..7cdac77d0c68 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -100,12 +100,12 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval,  			err = ipvlan_register_nf_hook(read_pnet(&port->pnet));  			if (!err) {  				mdev->l3mdev_ops = &ipvl_l3mdev_ops; -				mdev->priv_flags |= IFF_L3MDEV_MASTER; +				mdev->priv_flags |= IFF_L3MDEV_RX_HANDLER;  			} else  				goto fail;  		} else if (port->mode == IPVLAN_MODE_L3S) {  			/* Old mode was L3S */ -			mdev->priv_flags &= ~IFF_L3MDEV_MASTER; +			mdev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;  			ipvlan_unregister_nf_hook(read_pnet(&port->pnet));  			mdev->l3mdev_ops = NULL;  		} @@ -167,7 +167,7 @@ static void ipvlan_port_destroy(struct net_device *dev)  	struct sk_buff *skb;  	if (port->mode == IPVLAN_MODE_L3S) { -		dev->priv_flags &= ~IFF_L3MDEV_MASTER; +		dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;  		ipvlan_unregister_nf_hook(dev_net(dev));  		dev->l3mdev_ops = NULL;  	} diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 18b41bc345ab..6e8807212aa3 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -898,14 +898,14 @@ static void decode_txts(struct dp83640_private *dp83640,  			struct phy_txts *phy_txts)  {  	struct skb_shared_hwtstamps shhwtstamps; +	struct dp83640_skb_info *skb_info;  	struct sk_buff *skb; -	u64 ns;  	u8 overflow; +	u64 ns;  	/* We must already have the skb that triggered this. */ - +again:  	skb = skb_dequeue(&dp83640->tx_queue); -  	if (!skb) {  		pr_debug("have timestamp but tx_queue empty\n");  		return; @@ -920,6 +920,11 @@ static void decode_txts(struct dp83640_private *dp83640,  		}  		return;  	} +	skb_info = (struct dp83640_skb_info *)skb->cb; +	if (time_after(jiffies, skb_info->tmo)) { +		kfree_skb(skb); +		goto again; +	}  	ns = phy2txts(phy_txts);  	memset(&shhwtstamps, 0, sizeof(shhwtstamps)); @@ -1472,6 +1477,7 @@ static bool dp83640_rxtstamp(struct phy_device *phydev,  static void dp83640_txtstamp(struct phy_device *phydev,  			     struct sk_buff *skb, int type)  { +	struct dp83640_skb_info *skb_info = (struct dp83640_skb_info *)skb->cb;  	struct dp83640_private *dp83640 = phydev->priv;  	switch (dp83640->hwts_tx_en) { @@ -1484,6 +1490,7 @@ static void dp83640_txtstamp(struct phy_device *phydev,  		/* fall through */  	case HWTSTAMP_TX_ON:  		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; +		skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT;  		skb_queue_tail(&dp83640->tx_queue, skb);  		break; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 2e12f982534f..abb7876a8776 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -847,7 +847,6 @@ static int m88e1510_config_init(struct phy_device *phydev)  	/* SGMII-to-Copper mode initialization */  	if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { -  		/* Select page 18 */  		err = marvell_set_page(phydev, 18);  		if (err < 0) @@ -870,21 +869,6 @@ static int m88e1510_config_init(struct phy_device *phydev)  		err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);  		if (err < 0)  			return err; - -		/* There appears to be a bug in the 88e1512 when used in -		 * SGMII to copper mode, where the AN advertisement register -		 * clears the pause bits each time a negotiation occurs. -		 * This means we can never be truely sure what was advertised, -		 * so disable Pause support. -		 */ -		linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, -				   phydev->supported); -		linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, -				   phydev->supported); -		linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, -				   phydev->advertising); -		linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, -				   phydev->advertising);  	}  	return m88e1318_config_init(phydev); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 18656c4094b3..fed298c0cb39 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -866,8 +866,6 @@ static int tun_attach(struct tun_struct *tun, struct file *file,  	if (rtnl_dereference(tun->xdp_prog))  		sock_set_flag(&tfile->sk, SOCK_XDP); -	tun_set_real_num_queues(tun); -  	/* device is allowed to go away first, so no need to hold extra  	 * refcnt.  	 */ @@ -879,6 +877,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file,  	rcu_assign_pointer(tfile->tun, tun);  	rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);  	tun->numqueues++; +	tun_set_real_num_queues(tun);  out:  	return err;  } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8fadd8eaf601..4cfceb789eea 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -57,6 +57,8 @@ module_param(napi_tx, bool, 0644);  #define VIRTIO_XDP_TX		BIT(0)  #define VIRTIO_XDP_REDIR	BIT(1) +#define VIRTIO_XDP_FLAG	BIT(0) +  /* RX packet size EWMA. The average packet size is used to determine the packet   * buffer size when refilling RX rings. As the entire RX ring may be refilled   * at once, the weight is chosen so that the EWMA will be insensitive to short- @@ -252,6 +254,21 @@ struct padded_vnet_hdr {  	char padding[4];  }; +static bool is_xdp_frame(void *ptr) +{ +	return (unsigned long)ptr & VIRTIO_XDP_FLAG; +} + +static void *xdp_to_ptr(struct xdp_frame *ptr) +{ +	return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); +} + +static struct xdp_frame *ptr_to_xdp(void *ptr) +{ +	return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); +} +  /* Converting between virtqueue no. and kernel tx/rx queue no.   * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq   */ @@ -462,7 +479,8 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,  	sg_init_one(sq->sg, xdpf->data, xdpf->len); -	err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC); +	err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), +				   GFP_ATOMIC);  	if (unlikely(err))  		return -ENOSPC; /* Caller handle free/refcnt */ @@ -482,36 +500,47 @@ static int virtnet_xdp_xmit(struct net_device *dev,  {  	struct virtnet_info *vi = netdev_priv(dev);  	struct receive_queue *rq = vi->rq; -	struct xdp_frame *xdpf_sent;  	struct bpf_prog *xdp_prog;  	struct send_queue *sq;  	unsigned int len; +	int packets = 0; +	int bytes = 0;  	int drops = 0;  	int kicks = 0;  	int ret, err; +	void *ptr;  	int i; -	sq = virtnet_xdp_sq(vi); - -	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { -		ret = -EINVAL; -		drops = n; -		goto out; -	} -  	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this  	 * indicate XDP resources have been successfully allocated.  	 */  	xdp_prog = rcu_dereference(rq->xdp_prog); -	if (!xdp_prog) { -		ret = -ENXIO; +	if (!xdp_prog) +		return -ENXIO; + +	sq = virtnet_xdp_sq(vi); + +	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { +		ret = -EINVAL;  		drops = n;  		goto out;  	}  	/* Free up any pending old buffers before queueing new ones. */ -	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) -		xdp_return_frame(xdpf_sent); +	while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { +		if (likely(is_xdp_frame(ptr))) { +			struct xdp_frame *frame = ptr_to_xdp(ptr); + +			bytes += frame->len; +			xdp_return_frame(frame); +		} else { +			struct sk_buff *skb = ptr; + +			bytes += skb->len; +			napi_consume_skb(skb, false); +		} +		packets++; +	}  	for (i = 0; i < n; i++) {  		struct xdp_frame *xdpf = frames[i]; @@ -530,6 +559,8 @@ static int virtnet_xdp_xmit(struct net_device *dev,  	}  out:  	u64_stats_update_begin(&sq->stats.syncp); +	sq->stats.bytes += bytes; +	sq->stats.packets += packets;  	sq->stats.xdp_tx += n;  	sq->stats.xdp_tx_drops += drops;  	sq->stats.kicks += kicks; @@ -1332,18 +1363,26 @@ static int virtnet_receive(struct receive_queue *rq, int budget,  static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)  { -	struct sk_buff *skb;  	unsigned int len;  	unsigned int packets = 0;  	unsigned int bytes = 0; +	void *ptr; -	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) { -		pr_debug("Sent skb %p\n", skb); +	while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { +		if (likely(!is_xdp_frame(ptr))) { +			struct sk_buff *skb = ptr; -		bytes += skb->len; -		packets++; +			pr_debug("Sent skb %p\n", skb); + +			bytes += skb->len; +			napi_consume_skb(skb, in_napi); +		} else { +			struct xdp_frame *frame = ptr_to_xdp(ptr); -		napi_consume_skb(skb, in_napi); +			bytes += frame->len; +			xdp_return_frame(frame); +		} +		packets++;  	}  	/* Avoid overhead when no packets have been processed @@ -1358,6 +1397,16 @@ static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)  	u64_stats_update_end(&sq->stats.syncp);  } +static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) +{ +	if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) +		return false; +	else if (q < vi->curr_queue_pairs) +		return true; +	else +		return false; +} +  static void virtnet_poll_cleantx(struct receive_queue *rq)  {  	struct virtnet_info *vi = rq->vq->vdev->priv; @@ -1365,7 +1414,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)  	struct send_queue *sq = &vi->sq[index];  	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); -	if (!sq->napi.weight) +	if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))  		return;  	if (__netif_tx_trylock(txq)) { @@ -1442,8 +1491,16 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)  {  	struct send_queue *sq = container_of(napi, struct send_queue, napi);  	struct virtnet_info *vi = sq->vq->vdev->priv; -	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); +	unsigned int index = vq2txq(sq->vq); +	struct netdev_queue *txq; +	if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { +		/* We don't need to enable cb for XDP */ +		napi_complete_done(napi, 0); +		return 0; +	} + +	txq = netdev_get_tx_queue(vi->dev, index);  	__netif_tx_lock(txq, raw_smp_processor_id());  	free_old_xmit_skbs(sq, true);  	__netif_tx_unlock(txq); @@ -2395,6 +2452,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,  		return -ENOMEM;  	} +	old_prog = rtnl_dereference(vi->rq[0].xdp_prog); +	if (!prog && !old_prog) +		return 0; +  	if (prog) {  		prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);  		if (IS_ERR(prog)) @@ -2402,36 +2463,62 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,  	}  	/* Make sure NAPI is not using any XDP TX queues for RX. */ -	if (netif_running(dev)) -		for (i = 0; i < vi->max_queue_pairs; i++) +	if (netif_running(dev)) { +		for (i = 0; i < vi->max_queue_pairs; i++) {  			napi_disable(&vi->rq[i].napi); +			virtnet_napi_tx_disable(&vi->sq[i].napi); +		} +	} + +	if (!prog) { +		for (i = 0; i < vi->max_queue_pairs; i++) { +			rcu_assign_pointer(vi->rq[i].xdp_prog, prog); +			if (i == 0) +				virtnet_restore_guest_offloads(vi); +		} +		synchronize_net(); +	} -	netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);  	err = _virtnet_set_queues(vi, curr_qp + xdp_qp);  	if (err)  		goto err; +	netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);  	vi->xdp_queue_pairs = xdp_qp; -	for (i = 0; i < vi->max_queue_pairs; i++) { -		old_prog = rtnl_dereference(vi->rq[i].xdp_prog); -		rcu_assign_pointer(vi->rq[i].xdp_prog, prog); -		if (i == 0) { -			if (!old_prog) +	if (prog) { +		for (i = 0; i < vi->max_queue_pairs; i++) { +			rcu_assign_pointer(vi->rq[i].xdp_prog, prog); +			if (i == 0 && !old_prog)  				virtnet_clear_guest_offloads(vi); -			if (!prog) -				virtnet_restore_guest_offloads(vi);  		} +	} + +	for (i = 0; i < vi->max_queue_pairs; i++) {  		if (old_prog)  			bpf_prog_put(old_prog); -		if (netif_running(dev)) +		if (netif_running(dev)) {  			virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); +			virtnet_napi_tx_enable(vi, vi->sq[i].vq, +					       &vi->sq[i].napi); +		}  	}  	return 0;  err: -	for (i = 0; i < vi->max_queue_pairs; i++) -		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); +	if (!prog) { +		virtnet_clear_guest_offloads(vi); +		for (i = 0; i < vi->max_queue_pairs; i++) +			rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); +	} + +	if (netif_running(dev)) { +		for (i = 0; i < vi->max_queue_pairs; i++) { +			virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); +			virtnet_napi_tx_enable(vi, vi->sq[i].vq, +					       &vi->sq[i].napi); +		} +	}  	if (prog)  		bpf_prog_sub(prog, vi->max_queue_pairs - 1);  	return err; @@ -2613,16 +2700,6 @@ static void free_receive_page_frags(struct virtnet_info *vi)  			put_page(vi->rq[i].alloc_frag.page);  } -static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) -{ -	if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) -		return false; -	else if (q < vi->curr_queue_pairs) -		return true; -	else -		return false; -} -  static void free_unused_bufs(struct virtnet_info *vi)  {  	void *buf; @@ -2631,10 +2708,10 @@ static void free_unused_bufs(struct virtnet_info *vi)  	for (i = 0; i < vi->max_queue_pairs; i++) {  		struct virtqueue *vq = vi->sq[i].vq;  		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { -			if (!is_xdp_raw_buffer_queue(vi, i)) +			if (!is_xdp_frame(buf))  				dev_kfree_skb(buf);  			else -				put_page(virt_to_head_page(buf)); +				xdp_return_frame(ptr_to_xdp(buf));  		}  	} diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index c0b0f525c87c..27decf8ae840 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -1575,7 +1575,7 @@ try:  					dev->stats.tx_packets++;  					dev->stats.tx_bytes += skb->len;  				} -				dev_kfree_skb_irq(skb); +				dev_consume_skb_irq(skb);  				dpriv->tx_skbuff[cur] = NULL;  				++dpriv->tx_dirty;  			} else { diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 66d889d54e58..a08f04c3f644 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -482,7 +482,7 @@ static int hdlc_tx_done(struct ucc_hdlc_private *priv)  		memset(priv->tx_buffer +  		       (be32_to_cpu(bd->buf) - priv->dma_tx_addr),  		       0, skb->len); -		dev_kfree_skb_irq(skb); +		dev_consume_skb_irq(skb);  		priv->tx_skbuff[priv->skb_dirtytx] = NULL;  		priv->skb_dirtytx = diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 399b501f3c3c..e8891f5fc83a 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -548,7 +548,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {  	{  		.id = WCN3990_HW_1_0_DEV_VERSION,  		.dev_id = 0, -		.bus = ATH10K_BUS_PCI, +		.bus = ATH10K_BUS_SNOC,  		.name = "wcn3990 hw1.0",  		.continuous_frag_desc = true,  		.tx_chain_mask = 0x7, diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig index 491ca3c8b43c..83d5bceea08f 100644 --- a/drivers/net/wireless/intel/iwlwifi/Kconfig +++ b/drivers/net/wireless/intel/iwlwifi/Kconfig @@ -1,6 +1,6 @@  config IWLWIFI  	tristate "Intel Wireless WiFi Next Gen AGN - Wireless-N/Advanced-N/Ultimate-N (iwlwifi) " -	depends on PCI && HAS_IOMEM +	depends on PCI && HAS_IOMEM && CFG80211  	select FW_LOADER  	---help---  	  Select to build the driver supporting the: @@ -47,6 +47,7 @@ if IWLWIFI  config IWLWIFI_LEDS  	bool  	depends on LEDS_CLASS=y || LEDS_CLASS=IWLWIFI +	depends on IWLMVM || IWLDVM  	select LEDS_TRIGGERS  	select MAC80211_LEDS  	default y diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c index 497e762978cc..b2cabce1d74d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c @@ -212,24 +212,24 @@ void mt76x0_get_tx_power_per_rate(struct mt76x02_dev *dev)  	mt76x02_add_rate_power_offset(t, delta);  } -void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info) +void mt76x0_get_power_info(struct mt76x02_dev *dev, s8 *tp)  {  	struct mt76x0_chan_map {  		u8 chan;  		u8 offset;  	} chan_map[] = { -		{   2,  0 }, {   4,  1 }, {   6,  2 }, {   8,  3 }, -		{  10,  4 }, {  12,  5 }, {  14,  6 }, {  38,  0 }, -		{  44,  1 }, {  48,  2 }, {  54,  3 }, {  60,  4 }, -		{  64,  5 }, { 102,  6 }, { 108,  7 }, { 112,  8 }, -		{ 118,  9 }, { 124, 10 }, { 128, 11 }, { 134, 12 }, -		{ 140, 13 }, { 151, 14 }, { 157, 15 }, { 161, 16 }, -		{ 167, 17 }, { 171, 18 }, { 173, 19 }, +		{   2,  0 }, {   4,  2 }, {   6,  4 }, {   8,  6 }, +		{  10,  8 }, {  12, 10 }, {  14, 12 }, {  38,  0 }, +		{  44,  2 }, {  48,  4 }, {  54,  6 }, {  60,  8 }, +		{  64, 10 }, { 102, 12 }, { 108, 14 }, { 112, 16 }, +		{ 118, 18 }, { 124, 20 }, { 128, 22 }, { 134, 24 }, +		{ 140, 26 }, { 151, 28 }, { 157, 30 }, { 161, 32 }, +		{ 167, 34 }, { 171, 36 }, { 175, 38 },  	};  	struct ieee80211_channel *chan = dev->mt76.chandef.chan;  	u8 offset, addr; +	int i, idx = 0;  	u16 data; -	int i;  	if (mt76x0_tssi_enabled(dev)) {  		s8 target_power; @@ -239,14 +239,14 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info)  		else  			data = mt76x02_eeprom_get(dev, MT_EE_2G_TARGET_POWER);  		target_power = (data & 0xff) - dev->mt76.rate_power.ofdm[7]; -		info[0] = target_power + mt76x0_get_delta(dev); -		info[1] = 0; +		*tp = target_power + mt76x0_get_delta(dev);  		return;  	}  	for (i = 0; i < ARRAY_SIZE(chan_map); i++) { -		if (chan_map[i].chan <= chan->hw_value) { +		if (chan->hw_value <= chan_map[i].chan) { +			idx = (chan->hw_value == chan_map[i].chan);  			offset = chan_map[i].offset;  			break;  		} @@ -258,13 +258,16 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info)  		addr = MT_EE_TX_POWER_DELTA_BW80 + offset;  	} else {  		switch (chan->hw_value) { +		case 42: +			offset = 2; +			break;  		case 58:  			offset = 8;  			break;  		case 106:  			offset = 14;  			break; -		case 112: +		case 122:  			offset = 20;  			break;  		case 155: @@ -277,14 +280,9 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info)  	}  	data = mt76x02_eeprom_get(dev, addr); - -	info[0] = data; -	if (!info[0] || info[0] > 0x3f) -		info[0] = 5; - -	info[1] = data >> 8; -	if (!info[1] || info[1] > 0x3f) -		info[1] = 5; +	*tp = data >> (8 * idx); +	if (*tp < 0 || *tp > 0x3f) +		*tp = 5;  }  static int mt76x0_check_eeprom(struct mt76x02_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h index ee9ade9f3c8b..42b259f90b6d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h @@ -26,7 +26,7 @@ struct mt76x02_dev;  int mt76x0_eeprom_init(struct mt76x02_dev *dev);  void mt76x0_read_rx_gain(struct mt76x02_dev *dev);  void mt76x0_get_tx_power_per_rate(struct mt76x02_dev *dev); -void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info); +void mt76x0_get_power_info(struct mt76x02_dev *dev, s8 *tp);  static inline s8 s6_to_s8(u32 val)  { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c index 1eb1a802ed20..b6166703ad76 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c @@ -845,17 +845,17 @@ static void mt76x0_phy_tssi_calibrate(struct mt76x02_dev *dev)  void mt76x0_phy_set_txpower(struct mt76x02_dev *dev)  {  	struct mt76_rate_power *t = &dev->mt76.rate_power; -	u8 info[2]; +	s8 info;  	mt76x0_get_tx_power_per_rate(dev); -	mt76x0_get_power_info(dev, info); +	mt76x0_get_power_info(dev, &info); -	mt76x02_add_rate_power_offset(t, info[0]); +	mt76x02_add_rate_power_offset(t, info);  	mt76x02_limit_rate_power(t, dev->mt76.txpower_conf);  	dev->mt76.txpower_cur = mt76x02_get_max_rate_power(t); -	mt76x02_add_rate_power_offset(t, -info[0]); +	mt76x02_add_rate_power_offset(t, -info); -	mt76x02_phy_set_txpower(dev, info[0], info[1]); +	mt76x02_phy_set_txpower(dev, info, info);  }  void mt76x0_phy_calibrate(struct mt76x02_dev *dev, bool power_on) diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index bd10165d7eec..4d4b07701149 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -164,6 +164,12 @@ static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue)  	}  	sdio_claim_host(func); +	/* +	 * To guarantee that the SDIO card is power cycled, as required to make +	 * the FW programming to succeed, let's do a brute force HW reset. +	 */ +	mmc_hw_reset(card->host); +  	sdio_enable_func(func);  	sdio_release_host(func); @@ -174,20 +180,13 @@ static int wl12xx_sdio_power_off(struct wl12xx_sdio_glue *glue)  {  	struct sdio_func *func = dev_to_sdio_func(glue->dev);  	struct mmc_card *card = func->card; -	int error;  	sdio_claim_host(func);  	sdio_disable_func(func);  	sdio_release_host(func);  	/* Let runtime PM know the card is powered off */ -	error = pm_runtime_put(&card->dev); -	if (error < 0 && error != -EBUSY) { -		dev_err(&card->dev, "%s failed: %i\n", __func__, error); - -		return error; -	} - +	pm_runtime_put(&card->dev);  	return 0;  } diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 0ee026947f20..122059ecad84 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -22,6 +22,7 @@  #include <linux/hashtable.h>  #include <linux/ip.h>  #include <linux/refcount.h> +#include <linux/workqueue.h>  #include <net/ipv6.h>  #include <net/if_inet6.h> @@ -789,6 +790,7 @@ struct qeth_card {  	struct qeth_seqno seqno;  	struct qeth_card_options options; +	struct workqueue_struct *event_wq;  	wait_queue_head_t wait_q;  	spinlock_t mclock;  	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; @@ -962,7 +964,6 @@ extern const struct attribute_group *qeth_osn_attr_groups[];  extern const struct attribute_group qeth_device_attr_group;  extern const struct attribute_group qeth_device_blkt_group;  extern const struct device_type qeth_generic_devtype; -extern struct workqueue_struct *qeth_wq;  int qeth_card_hw_is_reachable(struct qeth_card *);  const char *qeth_get_cardname_short(struct qeth_card *); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index e63e03143ca7..89f912213e62 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -74,8 +74,7 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *queue,  static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf);  static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int); -struct workqueue_struct *qeth_wq; -EXPORT_SYMBOL_GPL(qeth_wq); +static struct workqueue_struct *qeth_wq;  int qeth_card_hw_is_reachable(struct qeth_card *card)  { @@ -566,6 +565,7 @@ static int __qeth_issue_next_read(struct qeth_card *card)  		QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n",  				 rc, CARD_DEVID(card));  		atomic_set(&channel->irq_pending, 0); +		qeth_release_buffer(channel, iob);  		card->read_or_write_problem = 1;  		qeth_schedule_recovery(card);  		wake_up(&card->wait_q); @@ -1127,6 +1127,8 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,  		rc = qeth_get_problem(card, cdev, irb);  		if (rc) {  			card->read_or_write_problem = 1; +			if (iob) +				qeth_release_buffer(iob->channel, iob);  			qeth_clear_ipacmd_list(card);  			qeth_schedule_recovery(card);  			goto out; @@ -1466,6 +1468,10 @@ static struct qeth_card *qeth_alloc_card(struct ccwgroup_device *gdev)  	CARD_RDEV(card) = gdev->cdev[0];  	CARD_WDEV(card) = gdev->cdev[1];  	CARD_DDEV(card) = gdev->cdev[2]; + +	card->event_wq = alloc_ordered_workqueue("%s", 0, dev_name(&gdev->dev)); +	if (!card->event_wq) +		goto out_wq;  	if (qeth_setup_channel(&card->read, true))  		goto out_ip;  	if (qeth_setup_channel(&card->write, true)) @@ -1481,6 +1487,8 @@ out_data:  out_channel:  	qeth_clean_channel(&card->read);  out_ip: +	destroy_workqueue(card->event_wq); +out_wq:  	dev_set_drvdata(&gdev->dev, NULL);  	kfree(card);  out: @@ -1809,6 +1817,7 @@ static int qeth_idx_activate_get_answer(struct qeth_card *card,  		QETH_DBF_MESSAGE(2, "Error2 in activating channel rc=%d\n", rc);  		QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc);  		atomic_set(&channel->irq_pending, 0); +		qeth_release_buffer(channel, iob);  		wake_up(&card->wait_q);  		return rc;  	} @@ -1878,6 +1887,7 @@ static int qeth_idx_activate_channel(struct qeth_card *card,  			rc);  		QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc);  		atomic_set(&channel->irq_pending, 0); +		qeth_release_buffer(channel, iob);  		wake_up(&card->wait_q);  		return rc;  	} @@ -2058,6 +2068,7 @@ int qeth_send_control_data(struct qeth_card *card, int len,  	}  	reply = qeth_alloc_reply(card);  	if (!reply) { +		qeth_release_buffer(channel, iob);  		return -ENOMEM;  	}  	reply->callback = reply_cb; @@ -2389,11 +2400,12 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)  	return 0;  } -static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q) +static void qeth_free_output_queue(struct qeth_qdio_out_q *q)  {  	if (!q)  		return; +	qeth_clear_outq_buffers(q, 1);  	qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);  	kfree(q);  } @@ -2467,10 +2479,8 @@ out_freeoutqbufs:  		card->qdio.out_qs[i]->bufs[j] = NULL;  	}  out_freeoutq: -	while (i > 0) { -		qeth_free_qdio_out_buf(card->qdio.out_qs[--i]); -		qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); -	} +	while (i > 0) +		qeth_free_output_queue(card->qdio.out_qs[--i]);  	kfree(card->qdio.out_qs);  	card->qdio.out_qs = NULL;  out_freepool: @@ -2503,10 +2513,8 @@ static void qeth_free_qdio_buffers(struct qeth_card *card)  	qeth_free_buffer_pool(card);  	/* free outbound qdio_qs */  	if (card->qdio.out_qs) { -		for (i = 0; i < card->qdio.no_out_queues; ++i) { -			qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); -			qeth_free_qdio_out_buf(card->qdio.out_qs[i]); -		} +		for (i = 0; i < card->qdio.no_out_queues; i++) +			qeth_free_output_queue(card->qdio.out_qs[i]);  		kfree(card->qdio.out_qs);  		card->qdio.out_qs = NULL;  	} @@ -5028,6 +5036,7 @@ static void qeth_core_free_card(struct qeth_card *card)  	qeth_clean_channel(&card->read);  	qeth_clean_channel(&card->write);  	qeth_clean_channel(&card->data); +	destroy_workqueue(card->event_wq);  	qeth_free_qdio_buffers(card);  	unregister_service_level(&card->qeth_service_level);  	dev_set_drvdata(&card->gdev->dev, NULL); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index f108d4b44605..a43de2f9bcac 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -369,6 +369,8 @@ static void qeth_l2_stop_card(struct qeth_card *card, int recovery_mode)  		qeth_clear_cmd_buffers(&card->read);  		qeth_clear_cmd_buffers(&card->write);  	} + +	flush_workqueue(card->event_wq);  }  static int qeth_l2_process_inbound_buffer(struct qeth_card *card, @@ -801,6 +803,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)  	if (cgdev->state == CCWGROUP_ONLINE)  		qeth_l2_set_offline(cgdev); + +	cancel_work_sync(&card->close_dev_work);  	if (qeth_netdev_is_registered(card->dev))  		unregister_netdev(card->dev);  } @@ -1434,7 +1438,7 @@ static void qeth_bridge_state_change(struct qeth_card *card,  	data->card = card;  	memcpy(&data->qports, qports,  			sizeof(struct qeth_sbp_state_change) + extrasize); -	queue_work(qeth_wq, &data->worker); +	queue_work(card->event_wq, &data->worker);  }  struct qeth_bridge_host_data { @@ -1506,7 +1510,7 @@ static void qeth_bridge_host_event(struct qeth_card *card,  	data->card = card;  	memcpy(&data->hostevs, hostevs,  			sizeof(struct qeth_ipacmd_addr_change) + extrasize); -	queue_work(qeth_wq, &data->worker); +	queue_work(card->event_wq, &data->worker);  }  /* SETBRIDGEPORT support; sending commands */ diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 42a7cdc59b76..df34bff4ac31 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1433,6 +1433,8 @@ static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)  		qeth_clear_cmd_buffers(&card->read);  		qeth_clear_cmd_buffers(&card->write);  	} + +	flush_workqueue(card->event_wq);  }  /* @@ -2338,6 +2340,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)  	if (cgdev->state == CCWGROUP_ONLINE)  		qeth_l3_set_offline(cgdev); +	cancel_work_sync(&card->close_dev_work);  	if (qeth_netdev_is_registered(card->dev))  		unregister_netdev(card->dev);  	qeth_l3_clear_ip_htable(card, 0); diff --git a/include/linux/filter.h b/include/linux/filter.h index ad106d845b22..e532fcc6e4b5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -591,8 +591,8 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)  	return qdisc_skb_cb(skb)->data;  } -static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, -				       struct sk_buff *skb) +static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, +					 struct sk_buff *skb)  {  	u8 *cb_data = bpf_skb_cb(skb);  	u8 cb_saved[BPF_SKB_CB_LEN]; @@ -611,15 +611,30 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,  	return res;  } +static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, +				       struct sk_buff *skb) +{ +	u32 res; + +	preempt_disable(); +	res = __bpf_prog_run_save_cb(prog, skb); +	preempt_enable(); +	return res; +} +  static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,  					struct sk_buff *skb)  {  	u8 *cb_data = bpf_skb_cb(skb); +	u32 res;  	if (unlikely(prog->cb_access))  		memset(cb_data, 0, BPF_SKB_CB_LEN); -	return BPF_PROG_RUN(prog, skb); +	preempt_disable(); +	res = BPF_PROG_RUN(prog, skb); +	preempt_enable(); +	return res;  }  static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1377d085ef99..86dbb3e29139 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1483,6 +1483,7 @@ struct net_device_ops {   * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook   * @IFF_FAILOVER: device is a failover master device   * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device + * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device   */  enum netdev_priv_flags {  	IFF_802_1Q_VLAN			= 1<<0, @@ -1514,6 +1515,7 @@ enum netdev_priv_flags {  	IFF_NO_RX_HANDLER		= 1<<26,  	IFF_FAILOVER			= 1<<27,  	IFF_FAILOVER_SLAVE		= 1<<28, +	IFF_L3MDEV_RX_HANDLER		= 1<<29,  };  #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN @@ -1544,6 +1546,7 @@ enum netdev_priv_flags {  #define IFF_NO_RX_HANDLER		IFF_NO_RX_HANDLER  #define IFF_FAILOVER			IFF_FAILOVER  #define IFF_FAILOVER_SLAVE		IFF_FAILOVER_SLAVE +#define IFF_L3MDEV_RX_HANDLER		IFF_L3MDEV_RX_HANDLER  /**   *	struct net_device - The DEVICE structure. @@ -4549,6 +4552,11 @@ static inline bool netif_supports_nofcs(struct net_device *dev)  	return dev->priv_flags & IFF_SUPP_NOFCS;  } +static inline bool netif_has_l3_rx_handler(const struct net_device *dev) +{ +	return dev->priv_flags & IFF_L3MDEV_RX_HANDLER; +} +  static inline bool netif_is_l3_master(const struct net_device *dev)  {  	return dev->priv_flags & IFF_L3MDEV_MASTER; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 7ddfc65586b0..4335bd771ce5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -184,6 +184,7 @@ struct plat_stmmacenet_data {  	struct clk *pclk;  	struct clk *clk_ptp_ref;  	unsigned int clk_ptp_rate; +	unsigned int clk_ref_rate;  	struct reset_control *stmmac_rst;  	struct stmmac_axi *axi;  	int has_gmac4; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 78fa0ac4613c..5175fd63cd82 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -153,7 +153,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)  	if (netif_is_l3_slave(skb->dev))  		master = netdev_master_upper_dev_get_rcu(skb->dev); -	else if (netif_is_l3_master(skb->dev)) +	else if (netif_is_l3_master(skb->dev) || +		 netif_has_l3_rx_handler(skb->dev))  		master = skb->dev;  	if (master && master->l3mdev_ops->l3mdev_l3_rcv) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 841835a387e1..b4984bbbe157 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -469,9 +469,7 @@ struct nft_set_binding {  int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,  		       struct nft_set_binding *binding);  void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, -			  struct nft_set_binding *binding); -void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, -			  struct nft_set_binding *binding); +			  struct nft_set_binding *binding, bool commit);  void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);  /** @@ -721,6 +719,13 @@ struct nft_expr_type {  #define NFT_EXPR_STATEFUL		0x1  #define NFT_EXPR_GC			0x2 +enum nft_trans_phase { +	NFT_TRANS_PREPARE, +	NFT_TRANS_ABORT, +	NFT_TRANS_COMMIT, +	NFT_TRANS_RELEASE +}; +  /**   *	struct nft_expr_ops - nf_tables expression operations   * @@ -750,7 +755,8 @@ struct nft_expr_ops {  	void				(*activate)(const struct nft_ctx *ctx,  						    const struct nft_expr *expr);  	void				(*deactivate)(const struct nft_ctx *ctx, -						      const struct nft_expr *expr); +						      const struct nft_expr *expr, +						      enum nft_trans_phase phase);  	void				(*destroy)(const struct nft_ctx *ctx,  						   const struct nft_expr *expr);  	void				(*destroy_clone)(const struct nft_ctx *ctx, @@ -1323,12 +1329,15 @@ struct nft_trans_rule {  struct nft_trans_set {  	struct nft_set			*set;  	u32				set_id; +	bool				bound;  };  #define nft_trans_set(trans)	\  	(((struct nft_trans_set *)trans->data)->set)  #define nft_trans_set_id(trans)	\  	(((struct nft_trans_set *)trans->data)->set_id) +#define nft_trans_set_bound(trans)	\ +	(((struct nft_trans_set *)trans->data)->bound)  struct nft_trans_chain {  	bool				update; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index befe570be5ba..c57bd10340ed 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1459,7 +1459,8 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,  		/* "typedef void new_void", "const void"...etc */  		if (!btf_type_is_void(next_type) && -		    !btf_type_is_fwd(next_type)) { +		    !btf_type_is_fwd(next_type) && +		    !btf_type_is_func_proto(next_type)) {  			btf_verifier_log_type(env, v->t, "Invalid type_id");  			return -EINVAL;  		} diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ab612fe9862f..d17d05570a3f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -572,7 +572,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,  	bpf_compute_and_save_data_end(skb, &saved_data_end);  	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, -				 bpf_prog_run_save_cb); +				 __bpf_prog_run_save_cb);  	bpf_restore_data_end(skb, saved_data_end);  	__skb_pull(skb, offset);  	skb->sk = save_sk; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4b7c76765d9d..f9274114c88d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -686,7 +686,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)  	}  	if (htab_is_prealloc(htab)) { -		pcpu_freelist_push(&htab->freelist, &l->fnode); +		__pcpu_freelist_push(&htab->freelist, &l->fnode);  	} else {  		atomic_dec(&htab->count);  		l->htab = htab; @@ -748,7 +748,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,  		} else {  			struct pcpu_freelist_node *l; -			l = pcpu_freelist_pop(&htab->freelist); +			l = __pcpu_freelist_pop(&htab->freelist);  			if (!l)  				return ERR_PTR(-E2BIG);  			l_new = container_of(l, struct htab_elem, fnode); diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 673fa6fe2d73..0c1b4ba9e90e 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s)  	free_percpu(s->freelist);  } -static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, -					struct pcpu_freelist_node *node) +static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, +					 struct pcpu_freelist_node *node)  {  	raw_spin_lock(&head->lock);  	node->next = head->first; @@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head,  	raw_spin_unlock(&head->lock);  } -void pcpu_freelist_push(struct pcpu_freelist *s, +void __pcpu_freelist_push(struct pcpu_freelist *s,  			struct pcpu_freelist_node *node)  {  	struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); -	__pcpu_freelist_push(head, node); +	___pcpu_freelist_push(head, node); +} + +void pcpu_freelist_push(struct pcpu_freelist *s, +			struct pcpu_freelist_node *node) +{ +	unsigned long flags; + +	local_irq_save(flags); +	__pcpu_freelist_push(s, node); +	local_irq_restore(flags);  }  void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, @@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,  	for_each_possible_cpu(cpu) {  again:  		head = per_cpu_ptr(s->freelist, cpu); -		__pcpu_freelist_push(head, buf); +		___pcpu_freelist_push(head, buf);  		i++;  		buf += elem_size;  		if (i == nr_elems) @@ -74,14 +84,12 @@ again:  	local_irq_restore(flags);  } -struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)  {  	struct pcpu_freelist_head *head;  	struct pcpu_freelist_node *node; -	unsigned long flags;  	int orig_cpu, cpu; -	local_irq_save(flags);  	orig_cpu = cpu = raw_smp_processor_id();  	while (1) {  		head = per_cpu_ptr(s->freelist, cpu); @@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)  		node = head->first;  		if (node) {  			head->first = node->next; -			raw_spin_unlock_irqrestore(&head->lock, flags); +			raw_spin_unlock(&head->lock);  			return node;  		}  		raw_spin_unlock(&head->lock);  		cpu = cpumask_next(cpu, cpu_possible_mask);  		if (cpu >= nr_cpu_ids)  			cpu = 0; -		if (cpu == orig_cpu) { -			local_irq_restore(flags); +		if (cpu == orig_cpu)  			return NULL; -		}  	}  } + +struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +{ +	struct pcpu_freelist_node *ret; +	unsigned long flags; + +	local_irq_save(flags); +	ret = __pcpu_freelist_pop(s); +	local_irq_restore(flags); +	return ret; +} diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h index 3049aae8ea1e..c3960118e617 100644 --- a/kernel/bpf/percpu_freelist.h +++ b/kernel/bpf/percpu_freelist.h @@ -22,8 +22,12 @@ struct pcpu_freelist_node {  	struct pcpu_freelist_node *next;  }; +/* pcpu_freelist_* do spin_lock_irqsave. */  void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);  struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); +/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */ +void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *);  void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,  			    u32 nr_elems);  int pcpu_freelist_init(struct pcpu_freelist *); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b155cd17c1bd..8577bb7f8be6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -713,8 +713,13 @@ static int map_lookup_elem(union bpf_attr *attr)  	if (bpf_map_is_dev_bound(map)) {  		err = bpf_map_offload_lookup_elem(map, key, value); -	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || -		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { +		goto done; +	} + +	preempt_disable(); +	this_cpu_inc(bpf_prog_active); +	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || +	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {  		err = bpf_percpu_hash_copy(map, key, value);  	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {  		err = bpf_percpu_array_copy(map, key, value); @@ -744,7 +749,10 @@ static int map_lookup_elem(union bpf_attr *attr)  		}  		rcu_read_unlock();  	} +	this_cpu_dec(bpf_prog_active); +	preempt_enable(); +done:  	if (err)  		goto free_value; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8b068adb9da1..f1a86a0d881d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1204,22 +1204,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *  int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)  { -	int err; - -	mutex_lock(&bpf_event_mutex); -	err = __bpf_probe_register(btp, prog); -	mutex_unlock(&bpf_event_mutex); -	return err; +	return __bpf_probe_register(btp, prog);  }  int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)  { -	int err; - -	mutex_lock(&bpf_event_mutex); -	err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); -	mutex_unlock(&bpf_event_mutex); -	return err; +	return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);  }  int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 6a8ac7626797..e52f8cafe227 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -541,38 +541,45 @@ static unsigned int __init print_ht(struct rhltable *rhlt)  static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects,  				  int cnt, bool slow)  { -	struct rhltable rhlt; +	struct rhltable *rhlt;  	unsigned int i, ret;  	const char *key;  	int err = 0; -	err = rhltable_init(&rhlt, &test_rht_params_dup); -	if (WARN_ON(err)) +	rhlt = kmalloc(sizeof(*rhlt), GFP_KERNEL); +	if (WARN_ON(!rhlt)) +		return -EINVAL; + +	err = rhltable_init(rhlt, &test_rht_params_dup); +	if (WARN_ON(err)) { +		kfree(rhlt);  		return err; +	}  	for (i = 0; i < cnt; i++) {  		rhl_test_objects[i].value.tid = i; -		key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead); +		key = rht_obj(&rhlt->ht, &rhl_test_objects[i].list_node.rhead);  		key += test_rht_params_dup.key_offset;  		if (slow) { -			err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key, +			err = PTR_ERR(rhashtable_insert_slow(&rhlt->ht, key,  							     &rhl_test_objects[i].list_node.rhead));  			if (err == -EAGAIN)  				err = 0;  		} else -			err = rhltable_insert(&rhlt, +			err = rhltable_insert(rhlt,  					      &rhl_test_objects[i].list_node,  					      test_rht_params_dup);  		if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast"))  			goto skip_print;  	} -	ret = print_ht(&rhlt); +	ret = print_ht(rhlt);  	WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast");  skip_print: -	rhltable_destroy(&rhlt); +	rhltable_destroy(rhlt); +	kfree(rhlt);  	return 0;  } diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index e8090f099eb8..ef0dec20c7d8 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -104,6 +104,9 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)  		ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo); +		/* free the TID stats immediately */ +		cfg80211_sinfo_release_content(&sinfo); +  		dev_put(real_netdev);  		if (ret == -ENOENT) {  			/* Node is not associated anymore! It would be diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 508f4416dfc9..415d494cbe22 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -20,7 +20,6 @@  #include "main.h"  #include <linux/atomic.h> -#include <linux/bug.h>  #include <linux/byteorder/generic.h>  #include <linux/errno.h>  #include <linux/gfp.h> @@ -179,8 +178,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)  	parent_dev = __dev_get_by_index((struct net *)parent_net,  					dev_get_iflink(net_dev));  	/* if we got a NULL parent_dev there is something broken.. */ -	if (WARN(!parent_dev, "Cannot find parent device")) +	if (!parent_dev) { +		pr_err("Cannot find parent device\n");  		return false; +	}  	if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net))  		return false; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5db5a0a4c959..b85ca809e509 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -221,6 +221,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,  	netif_trans_update(soft_iface);  	vid = batadv_get_vid(skb, 0); + +	skb_reset_mac_header(skb);  	ethhdr = eth_hdr(skb);  	switch (ntohs(ethhdr->h_proto)) { diff --git a/net/core/filter.c b/net/core/filter.c index 7559d6835ecb..7a54dc11ac2d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4112,10 +4112,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,  		/* Only some socketops are supported */  		switch (optname) {  		case SO_RCVBUF: +			val = min_t(u32, val, sysctl_rmem_max);  			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;  			sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);  			break;  		case SO_SNDBUF: +			val = min_t(u32, val, sysctl_wmem_max);  			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;  			sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);  			break; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index d6d5c20d7044..8c826603bf36 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -545,8 +545,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)  	struct sk_psock *psock = container_of(gc, struct sk_psock, gc);  	/* No sk_callback_lock since already detached. */ -	if (psock->parser.enabled) -		strp_done(&psock->parser.strp); +	strp_done(&psock->parser.strp);  	cancel_work_sync(&psock->work); diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6eb837a47b5c..baaaeb2b2c42 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,  static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,  					   u8 pkt, u8 opt, u8 *val, u8 len)  { -	if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) +	if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options)  		return 0;  	return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);  } @@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,  static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,  					   u8 pkt, u8 opt, u8 *val, u8 len)  { -	if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) +	if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options)  		return 0;  	return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);  } diff --git a/net/dsa/master.c b/net/dsa/master.c index 71bb15f491c8..54f5551fb799 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -205,6 +205,8 @@ static void dsa_master_reset_mtu(struct net_device *dev)  	rtnl_unlock();  } +static struct lock_class_key dsa_master_addr_list_lock_key; +  int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)  {  	int ret; @@ -218,6 +220,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)  	wmb();  	dev->dsa_ptr = cpu_dp; +	lockdep_set_class(&dev->addr_list_lock, +			  &dsa_master_addr_list_lock_key);  	ret = dsa_master_ethtool_setup(dev);  	if (ret) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a3fcc1d01615..a1c9fe155057 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -140,11 +140,14 @@ static int dsa_slave_close(struct net_device *dev)  static void dsa_slave_change_rx_flags(struct net_device *dev, int change)  {  	struct net_device *master = dsa_slave_to_master(dev); - -	if (change & IFF_ALLMULTI) -		dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); -	if (change & IFF_PROMISC) -		dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); +	if (dev->flags & IFF_UP) { +		if (change & IFF_ALLMULTI) +			dev_set_allmulti(master, +					 dev->flags & IFF_ALLMULTI ? 1 : -1); +		if (change & IFF_PROMISC) +			dev_set_promiscuity(master, +					    dev->flags & IFF_PROMISC ? 1 : -1); +	}  }  static void dsa_slave_set_rx_mode(struct net_device *dev) @@ -639,7 +642,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)  	int ret;  	/* Port's PHY and MAC both need to be EEE capable */ -	if (!dev->phydev && !dp->pl) +	if (!dev->phydev || !dp->pl)  		return -ENODEV;  	if (!ds->ops->set_mac_eee) @@ -659,7 +662,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)  	int ret;  	/* Port's PHY and MAC both need to be EEE capable */ -	if (!dev->phydev && !dp->pl) +	if (!dev->phydev || !dp->pl)  		return -ENODEV;  	if (!ds->ops->get_mac_eee) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 20a64fe6254b..3978f807fa8b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1455,12 +1455,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)  {  	struct ip_tunnel *t = netdev_priv(dev);  	struct ip_tunnel_parm *p = &t->parms; +	__be16 o_flags = p->o_flags; + +	if ((t->erspan_ver == 1 || t->erspan_ver == 2) && +	    !t->collect_md) +		o_flags |= TUNNEL_KEY;  	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||  	    nla_put_be16(skb, IFLA_GRE_IFLAGS,  			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||  	    nla_put_be16(skb, IFLA_GRE_OFLAGS, -			 gre_tnl_flags_to_gre_flags(p->o_flags)) || +			 gre_tnl_flags_to_gre_flags(o_flags)) ||  	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||  	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||  	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4416368dbd49..801a9a0c217e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2098,12 +2098,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)  {  	struct ip6_tnl *t = netdev_priv(dev);  	struct __ip6_tnl_parm *p = &t->parms; +	__be16 o_flags = p->o_flags; + +	if ((p->erspan_ver == 1 || p->erspan_ver == 2) && +	    !p->collect_md) +		o_flags |= TUNNEL_KEY;  	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||  	    nla_put_be16(skb, IFLA_GRE_IFLAGS,  			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||  	    nla_put_be16(skb, IFLA_GRE_OFLAGS, -			 gre_tnl_flags_to_gre_flags(p->o_flags)) || +			 gre_tnl_flags_to_gre_flags(o_flags)) ||  	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||  	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||  	    nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8b075f0bc351..6d0b1f3e927b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -23,9 +23,11 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)  	struct sock *sk = sk_to_full_sk(skb->sk);  	unsigned int hh_len;  	struct dst_entry *dst; +	int strict = (ipv6_addr_type(&iph->daddr) & +		      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));  	struct flowi6 fl6 = {  		.flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : -			rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0, +			strict ? skb_dst(skb)->dev->ifindex : 0,  		.flowi6_mark = skb->mark,  		.flowi6_uid = sock_net_uid(net, sk),  		.daddr = iph->daddr, diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 8181ee7e1e27..ee5403cbe655 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)  	} else {  		ip6_flow_hdr(hdr, 0, flowlabel);  		hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); + +		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));  	}  	hdr->nexthdr = NEXTHDR_ROUTING; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1e03305c0549..e8a1dabef803 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -546,7 +546,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)  	}  	err = 0; -	if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) +	if (__in6_dev_get(skb->dev) && +	    !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))  		goto out;  	if (t->parms.iph.daddr == 0) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 26f1d435696a..fed6becc5daf 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -83,8 +83,7 @@  #define L2TP_SLFLAG_S	   0x40000000  #define L2TP_SL_SEQ_MASK   0x00ffffff -#define L2TP_HDR_SIZE_SEQ		10 -#define L2TP_HDR_SIZE_NOSEQ		6 +#define L2TP_HDR_SIZE_MAX		14  /* Default trace flags */  #define L2TP_DEFAULT_DEBUG_FLAGS	0 @@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)  	__skb_pull(skb, sizeof(struct udphdr));  	/* Short packet? */ -	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) { +	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {  		l2tp_info(tunnel, L2TP_MSG_DATA,  			  "%s: recv short packet (len=%d)\n",  			  tunnel->name, skb->len); @@ -884,6 +883,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)  		goto error;  	} +	if (tunnel->version == L2TP_HDR_VER_3 && +	    l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) +		goto error; +  	l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);  	l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9c9afe94d389..b2ce90260c35 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel)  }  #endif +static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb, +					       unsigned char **ptr, unsigned char **optr) +{ +	int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session); + +	if (opt_len > 0) { +		int off = *ptr - *optr; + +		if (!pskb_may_pull(skb, off + opt_len)) +			return -1; + +		if (skb->data != *optr) { +			*optr = skb->data; +			*ptr = skb->data + off; +		} +	} + +	return 0; +} +  #define l2tp_printk(ptr, type, func, fmt, ...)				\  do {									\  	if (((ptr)->debug) & (type))					\ diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 35f6f86d4dcc..d4c60523c549 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)  		print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);  	} +	if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) +		goto discard_sess; +  	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);  	l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 237f1a4a0b0c..0ae6899edac0 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)  		print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);  	} +	if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) +		goto discard_sess; +  	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);  	l2tp_session_dec_refcount(session); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f170d6c6629a..928f13a208b0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1938,9 +1938,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,  				int head_need, bool may_encrypt)  {  	struct ieee80211_local *local = sdata->local; +	struct ieee80211_hdr *hdr; +	bool enc_tailroom;  	int tail_need = 0; -	if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { +	hdr = (struct ieee80211_hdr *) skb->data; +	enc_tailroom = may_encrypt && +		       (sdata->crypto_tx_tailroom_needed_cnt || +			ieee80211_is_mgmt(hdr->frame_control)); + +	if (enc_tailroom) {  		tail_need = IEEE80211_ENCRYPT_TAILROOM;  		tail_need -= skb_tailroom(skb);  		tail_need = max_t(int, tail_need, 0); @@ -1948,8 +1955,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,  	if (skb_cloned(skb) &&  	    (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || -	     !skb_clone_writable(skb, ETH_HLEN) || -	     (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) +	     !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom))  		I802_DEBUG_INC(local->tx_expand_skb_head_cloned);  	else if (head_need || tail_need)  		I802_DEBUG_INC(local->tx_expand_skb_head); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 741b533148ba..db4d46332e86 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1007,6 +1007,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,  		}  		if (nf_ct_key_equal(h, tuple, zone, net)) { +			/* Tuple is taken already, so caller will need to find +			 * a new source port to use. +			 * +			 * Only exception: +			 * If the *original tuples* are identical, then both +			 * conntracks refer to the same flow. +			 * This is a rare situation, it can occur e.g. when +			 * more than one UDP packet is sent from same socket +			 * in different threads. +			 * +			 * Let nf_ct_resolve_clash() deal with this later. +			 */ +			if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, +					      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) +				continue; +  			NF_CT_STAT_INC_ATOMIC(net, found);  			rcu_read_unlock();  			return 1; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fb07f6cfc719..5a92f23f179f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -116,6 +116,23 @@ static void nft_trans_destroy(struct nft_trans *trans)  	kfree(trans);  } +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ +	struct net *net = ctx->net; +	struct nft_trans *trans; + +	if (!nft_set_is_anonymous(set)) +		return; + +	list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { +		if (trans->msg_type == NFT_MSG_NEWSET && +		    nft_trans_set(trans) == set) { +			nft_trans_set_bound(trans) = true; +			break; +		} +	} +} +  static int nf_tables_register_hook(struct net *net,  				   const struct nft_table *table,  				   struct nft_chain *chain) @@ -211,18 +228,6 @@ static int nft_delchain(struct nft_ctx *ctx)  	return err;  } -/* either expr ops provide both activate/deactivate, or neither */ -static bool nft_expr_check_ops(const struct nft_expr_ops *ops) -{ -	if (!ops) -		return true; - -	if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate))) -		return false; - -	return true; -} -  static void nft_rule_expr_activate(const struct nft_ctx *ctx,  				   struct nft_rule *rule)  { @@ -238,14 +243,15 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,  }  static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, -				     struct nft_rule *rule) +				     struct nft_rule *rule, +				     enum nft_trans_phase phase)  {  	struct nft_expr *expr;  	expr = nft_expr_first(rule);  	while (expr != nft_expr_last(rule) && expr->ops) {  		if (expr->ops->deactivate) -			expr->ops->deactivate(ctx, expr); +			expr->ops->deactivate(ctx, expr, phase);  		expr = nft_expr_next(expr);  	} @@ -296,7 +302,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)  		nft_trans_destroy(trans);  		return err;  	} -	nft_rule_expr_deactivate(ctx, rule); +	nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE);  	return 0;  } @@ -1929,9 +1935,6 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,   */  int nft_register_expr(struct nft_expr_type *type)  { -	if (!nft_expr_check_ops(type->ops)) -		return -EINVAL; -  	nfnl_lock(NFNL_SUBSYS_NFTABLES);  	if (type->family == NFPROTO_UNSPEC)  		list_add_tail_rcu(&type->list, &nf_tables_expressions); @@ -2079,10 +2082,6 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,  			err = PTR_ERR(ops);  			goto err1;  		} -		if (!nft_expr_check_ops(ops)) { -			err = -EINVAL; -			goto err1; -		}  	} else  		ops = type->ops; @@ -2511,7 +2510,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,  static void nf_tables_rule_release(const struct nft_ctx *ctx,  				   struct nft_rule *rule)  { -	nft_rule_expr_deactivate(ctx, rule); +	nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);  	nf_tables_rule_destroy(ctx, rule);  } @@ -3708,39 +3707,30 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,  bind:  	binding->chain = ctx->chain;  	list_add_tail_rcu(&binding->list, &set->bindings); +	nft_set_trans_bind(ctx, set); +  	return 0;  }  EXPORT_SYMBOL_GPL(nf_tables_bind_set); -void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, -			  struct nft_set_binding *binding) -{ -	if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && -	    nft_is_active(ctx->net, set)) -		list_add_tail_rcu(&set->list, &ctx->table->sets); - -	list_add_tail_rcu(&binding->list, &set->bindings); -} -EXPORT_SYMBOL_GPL(nf_tables_rebind_set); -  void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, -		          struct nft_set_binding *binding) +			  struct nft_set_binding *binding, bool event)  {  	list_del_rcu(&binding->list); -	if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && -	    nft_is_active(ctx->net, set)) +	if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {  		list_del_rcu(&set->list); +		if (event) +			nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, +					     GFP_KERNEL); +	}  }  EXPORT_SYMBOL_GPL(nf_tables_unbind_set);  void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)  { -	if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && -	    nft_is_active(ctx->net, set)) { -		nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC); +	if (list_empty(&set->bindings) && nft_set_is_anonymous(set))  		nft_set_destroy(set); -	}  }  EXPORT_SYMBOL_GPL(nf_tables_destroy_set); @@ -6535,6 +6525,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)  			nf_tables_rule_notify(&trans->ctx,  					      nft_trans_rule(trans),  					      NFT_MSG_DELRULE); +			nft_rule_expr_deactivate(&trans->ctx, +						 nft_trans_rule(trans), +						 NFT_TRANS_COMMIT);  			break;  		case NFT_MSG_NEWSET:  			nft_clear(net, nft_trans_set(trans)); @@ -6621,7 +6614,8 @@ static void nf_tables_abort_release(struct nft_trans *trans)  		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));  		break;  	case NFT_MSG_NEWSET: -		nft_set_destroy(nft_trans_set(trans)); +		if (!nft_trans_set_bound(trans)) +			nft_set_destroy(nft_trans_set(trans));  		break;  	case NFT_MSG_NEWSETELEM:  		nft_set_elem_destroy(nft_trans_elem_set(trans), @@ -6682,7 +6676,9 @@ static int __nf_tables_abort(struct net *net)  		case NFT_MSG_NEWRULE:  			trans->ctx.chain->use--;  			list_del_rcu(&nft_trans_rule(trans)->list); -			nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans)); +			nft_rule_expr_deactivate(&trans->ctx, +						 nft_trans_rule(trans), +						 NFT_TRANS_ABORT);  			break;  		case NFT_MSG_DELRULE:  			trans->ctx.chain->use++; @@ -6692,7 +6688,8 @@ static int __nf_tables_abort(struct net *net)  			break;  		case NFT_MSG_NEWSET:  			trans->ctx.table->use--; -			list_del_rcu(&nft_trans_set(trans)->list); +			if (!nft_trans_set_bound(trans)) +				list_del_rcu(&nft_trans_set(trans)->list);  			break;  		case NFT_MSG_DELSET:  			trans->ctx.table->use++; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 5eb269428832..fe64df848365 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -61,6 +61,21 @@ static struct nft_compat_net *nft_compat_pernet(struct net *net)  	return net_generic(net, nft_compat_net_id);  } +static void nft_xt_get(struct nft_xt *xt) +{ +	/* refcount_inc() warns on 0 -> 1 transition, but we can't +	 * init the reference count to 1 in .select_ops -- we can't +	 * undo such an increase when another expression inside the same +	 * rule fails afterwards. +	 */ +	if (xt->listcnt == 0) +		refcount_set(&xt->refcnt, 1); +	else +		refcount_inc(&xt->refcnt); + +	xt->listcnt++; +} +  static bool nft_xt_put(struct nft_xt *xt)  {  	if (refcount_dec_and_test(&xt->refcnt)) { @@ -291,7 +306,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,  		return -EINVAL;  	nft_xt = container_of(expr->ops, struct nft_xt, ops); -	refcount_inc(&nft_xt->refcnt); +	nft_xt_get(nft_xt);  	return 0;  } @@ -504,7 +519,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,  		return ret;  	nft_xt = container_of(expr->ops, struct nft_xt, ops); -	refcount_inc(&nft_xt->refcnt); +	nft_xt_get(nft_xt);  	return 0;  } @@ -558,41 +573,16 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)  	__nft_match_destroy(ctx, expr, nft_expr_priv(expr));  } -static void nft_compat_activate(const struct nft_ctx *ctx, -				const struct nft_expr *expr, -				struct list_head *h) -{ -	struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); - -	if (xt->listcnt == 0) -		list_add(&xt->head, h); - -	xt->listcnt++; -} - -static void nft_compat_activate_mt(const struct nft_ctx *ctx, -				   const struct nft_expr *expr) -{ -	struct nft_compat_net *cn = nft_compat_pernet(ctx->net); - -	nft_compat_activate(ctx, expr, &cn->nft_match_list); -} - -static void nft_compat_activate_tg(const struct nft_ctx *ctx, -				   const struct nft_expr *expr) -{ -	struct nft_compat_net *cn = nft_compat_pernet(ctx->net); - -	nft_compat_activate(ctx, expr, &cn->nft_target_list); -} -  static void nft_compat_deactivate(const struct nft_ctx *ctx, -				  const struct nft_expr *expr) +				  const struct nft_expr *expr, +				  enum nft_trans_phase phase)  {  	struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); -	if (--xt->listcnt == 0) -		list_del_init(&xt->head); +	if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) { +		if (--xt->listcnt == 0) +			list_del_init(&xt->head); +	}  }  static void @@ -848,7 +838,6 @@ nft_match_select_ops(const struct nft_ctx *ctx,  	nft_match->ops.eval = nft_match_eval;  	nft_match->ops.init = nft_match_init;  	nft_match->ops.destroy = nft_match_destroy; -	nft_match->ops.activate = nft_compat_activate_mt;  	nft_match->ops.deactivate = nft_compat_deactivate;  	nft_match->ops.dump = nft_match_dump;  	nft_match->ops.validate = nft_match_validate; @@ -866,7 +855,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,  	nft_match->ops.size = matchsize; -	nft_match->listcnt = 1; +	nft_match->listcnt = 0;  	list_add(&nft_match->head, &cn->nft_match_list);  	return &nft_match->ops; @@ -953,7 +942,6 @@ nft_target_select_ops(const struct nft_ctx *ctx,  	nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));  	nft_target->ops.init = nft_target_init;  	nft_target->ops.destroy = nft_target_destroy; -	nft_target->ops.activate = nft_compat_activate_tg;  	nft_target->ops.deactivate = nft_compat_deactivate;  	nft_target->ops.dump = nft_target_dump;  	nft_target->ops.validate = nft_target_validate; @@ -964,7 +952,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,  	else  		nft_target->ops.eval = nft_target_eval_xt; -	nft_target->listcnt = 1; +	nft_target->listcnt = 0;  	list_add(&nft_target->head, &cn->nft_target_list);  	return &nft_target->ops; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 07d4efd3d851..f1172f99752b 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -235,20 +235,17 @@ err1:  	return err;  } -static void nft_dynset_activate(const struct nft_ctx *ctx, -				const struct nft_expr *expr) -{ -	struct nft_dynset *priv = nft_expr_priv(expr); - -	nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} -  static void nft_dynset_deactivate(const struct nft_ctx *ctx, -				  const struct nft_expr *expr) +				  const struct nft_expr *expr, +				  enum nft_trans_phase phase)  {  	struct nft_dynset *priv = nft_expr_priv(expr); -	nf_tables_unbind_set(ctx, priv->set, &priv->binding); +	if (phase == NFT_TRANS_PREPARE) +		return; + +	nf_tables_unbind_set(ctx, priv->set, &priv->binding, +			     phase == NFT_TRANS_COMMIT);  }  static void nft_dynset_destroy(const struct nft_ctx *ctx, @@ -296,7 +293,6 @@ static const struct nft_expr_ops nft_dynset_ops = {  	.eval		= nft_dynset_eval,  	.init		= nft_dynset_init,  	.destroy	= nft_dynset_destroy, -	.activate	= nft_dynset_activate,  	.deactivate	= nft_dynset_deactivate,  	.dump		= nft_dynset_dump,  }; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 0777a93211e2..3f6d1d2a6281 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -72,10 +72,14 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,  }  static void nft_immediate_deactivate(const struct nft_ctx *ctx, -				     const struct nft_expr *expr) +				     const struct nft_expr *expr, +				     enum nft_trans_phase phase)  {  	const struct nft_immediate_expr *priv = nft_expr_priv(expr); +	if (phase == NFT_TRANS_COMMIT) +		return; +  	return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));  } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 227b2b15a19c..14496da5141d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -121,20 +121,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx,  	return 0;  } -static void nft_lookup_activate(const struct nft_ctx *ctx, -				const struct nft_expr *expr) -{ -	struct nft_lookup *priv = nft_expr_priv(expr); - -	nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} -  static void nft_lookup_deactivate(const struct nft_ctx *ctx, -				  const struct nft_expr *expr) +				  const struct nft_expr *expr, +				  enum nft_trans_phase phase)  {  	struct nft_lookup *priv = nft_expr_priv(expr); -	nf_tables_unbind_set(ctx, priv->set, &priv->binding); +	if (phase == NFT_TRANS_PREPARE) +		return; + +	nf_tables_unbind_set(ctx, priv->set, &priv->binding, +			     phase == NFT_TRANS_COMMIT);  }  static void nft_lookup_destroy(const struct nft_ctx *ctx, @@ -225,7 +222,6 @@ static const struct nft_expr_ops nft_lookup_ops = {  	.size		= NFT_EXPR_SIZE(sizeof(struct nft_lookup)),  	.eval		= nft_lookup_eval,  	.init		= nft_lookup_init, -	.activate	= nft_lookup_activate,  	.deactivate	= nft_lookup_deactivate,  	.destroy	= nft_lookup_destroy,  	.dump		= nft_lookup_dump, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index a3185ca2a3a9..ae178e914486 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -155,20 +155,17 @@ nla_put_failure:  	return -1;  } -static void nft_objref_map_activate(const struct nft_ctx *ctx, -				    const struct nft_expr *expr) -{ -	struct nft_objref_map *priv = nft_expr_priv(expr); - -	nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} -  static void nft_objref_map_deactivate(const struct nft_ctx *ctx, -				      const struct nft_expr *expr) +				      const struct nft_expr *expr, +				      enum nft_trans_phase phase)  {  	struct nft_objref_map *priv = nft_expr_priv(expr); -	nf_tables_unbind_set(ctx, priv->set, &priv->binding); +	if (phase == NFT_TRANS_PREPARE) +		return; + +	nf_tables_unbind_set(ctx, priv->set, &priv->binding, +			     phase == NFT_TRANS_COMMIT);  }  static void nft_objref_map_destroy(const struct nft_ctx *ctx, @@ -185,7 +182,6 @@ static const struct nft_expr_ops nft_objref_map_ops = {  	.size		= NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),  	.eval		= nft_objref_map_eval,  	.init		= nft_objref_map_init, -	.activate	= nft_objref_map_activate,  	.deactivate	= nft_objref_map_deactivate,  	.destroy	= nft_objref_map_destroy,  	.dump		= nft_objref_map_dump, diff --git a/net/rds/bind.c b/net/rds/bind.c index 762d2c6788a3..17c9d9f0c848 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -78,10 +78,10 @@ struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,  	__rds_create_bind_key(key, addr, port, scope_id);  	rcu_read_lock();  	rs = rhashtable_lookup(&bind_hash_table, key, ht_parms); -	if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) -		rds_sock_addref(rs); -	else +	if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) || +		   !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt)))  		rs = NULL; +  	rcu_read_unlock();  	rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr, diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index eaf19ebaa964..3f7bb11f3290 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -596,6 +596,7 @@ error_requeue_call:  	}  error_no_call:  	release_sock(&rx->sk); +error_trace:  	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);  	return ret; @@ -604,7 +605,7 @@ wait_interrupted:  wait_error:  	finish_wait(sk_sleep(&rx->sk), &wait);  	call = NULL; -	goto error_no_call; +	goto error_trace;  }  /** diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index f6aa57fbbbaf..12ca9d13db83 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1371,7 +1371,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,  	if (!tc_skip_hw(fnew->flags)) {  		err = fl_hw_replace_filter(tp, fnew, extack);  		if (err) -			goto errout_mask; +			goto errout_mask_ht;  	}  	if (!tc_in_hw(fnew->flags)) @@ -1401,6 +1401,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,  	kfree(mask);  	return 0; +errout_mask_ht: +	rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, +			       fnew->mask->filter_ht_params); +  errout_mask:  	fl_mask_put(head, fnew->mask, false); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f93c3cf9e567..65d6d04546ae 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2027,7 +2027,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)  	struct sctp_endpoint *ep = sctp_sk(sk)->ep;  	struct sctp_transport *transport = NULL;  	struct sctp_sndrcvinfo _sinfo, *sinfo; -	struct sctp_association *asoc; +	struct sctp_association *asoc, *tmp;  	struct sctp_cmsgs cmsgs;  	union sctp_addr *daddr;  	bool new = false; @@ -2053,7 +2053,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)  	/* SCTP_SENDALL process */  	if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) { -		list_for_each_entry(asoc, &ep->asocs, asocs) { +		list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) {  			err = sctp_sendmsg_check_sflags(asoc, sflags, msg,  							msg_len);  			if (err == 0) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 80e0ae5534ec..f24633114dfd 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -84,6 +84,19 @@ static void fa_zero(struct flex_array *fa, size_t index, size_t count)  	}  } +static size_t fa_index(struct flex_array *fa, void *elem, size_t count) +{ +	size_t index = 0; + +	while (count--) { +		if (elem == flex_array_get(fa, index)) +			break; +		index++; +	} + +	return index; +} +  /* Migrates chunks from stream queues to new stream queues if needed,   * but not across associations. Also, removes those chunks to streams   * higher than the new max. @@ -147,6 +160,13 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,  	if (stream->out) {  		fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt)); +		if (stream->out_curr) { +			size_t index = fa_index(stream->out, stream->out_curr, +						stream->outcnt); + +			BUG_ON(index == stream->outcnt); +			stream->out_curr = flex_array_get(out, index); +		}  		fa_free(stream->out);  	} diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c4e56602e0c6..b04a813fc865 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1505,6 +1505,11 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,  	smc = smc_sk(sk);  	lock_sock(sk); +	if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { +		/* socket was connected before, no more data to read */ +		rc = 0; +		goto out; +	}  	if ((sk->sk_state == SMC_INIT) ||  	    (sk->sk_state == SMC_LISTEN) ||  	    (sk->sk_state == SMC_CLOSED)) @@ -1840,7 +1845,11 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,  	smc = smc_sk(sk);  	lock_sock(sk); - +	if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { +		/* socket was connected before, no more data to read */ +		rc = 0; +		goto out; +	}  	if (sk->sk_state == SMC_INIT ||  	    sk->sk_state == SMC_LISTEN ||  	    sk->sk_state == SMC_CLOSED) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index db83332ac1c8..a712c9f8699b 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -21,13 +21,6 @@  /********************************** send *************************************/ -struct smc_cdc_tx_pend { -	struct smc_connection	*conn;		/* socket connection */ -	union smc_host_cursor	cursor;	/* tx sndbuf cursor sent */ -	union smc_host_cursor	p_cursor;	/* rx RMBE cursor produced */ -	u16			ctrl_seq;	/* conn. tx sequence # */ -}; -  /* handler for send/transmission completion of a CDC msg */  static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,  			       struct smc_link *link, @@ -61,12 +54,14 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,  int smc_cdc_get_free_slot(struct smc_connection *conn,  			  struct smc_wr_buf **wr_buf, +			  struct smc_rdma_wr **wr_rdma_buf,  			  struct smc_cdc_tx_pend **pend)  {  	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];  	int rc;  	rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, +				     wr_rdma_buf,  				     (struct smc_wr_tx_pend_priv **)pend);  	if (!conn->alert_token_local)  		/* abnormal termination */ @@ -96,6 +91,7 @@ int smc_cdc_msg_send(struct smc_connection *conn,  		     struct smc_wr_buf *wr_buf,  		     struct smc_cdc_tx_pend *pend)  { +	union smc_host_cursor cfed;  	struct smc_link *link;  	int rc; @@ -107,10 +103,10 @@ int smc_cdc_msg_send(struct smc_connection *conn,  	conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;  	smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf,  			    &conn->local_tx_ctrl, conn); +	smc_curs_copy(&cfed, &((struct smc_host_cdc_msg *)wr_buf)->cons, conn);  	rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);  	if (!rc) -		smc_curs_copy(&conn->rx_curs_confirmed, -			      &conn->local_tx_ctrl.cons, conn); +		smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);  	return rc;  } @@ -121,11 +117,14 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)  	struct smc_wr_buf *wr_buf;  	int rc; -	rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); +	rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend);  	if (rc)  		return rc; -	return smc_cdc_msg_send(conn, wr_buf, pend); +	spin_lock_bh(&conn->send_lock); +	rc = smc_cdc_msg_send(conn, wr_buf, pend); +	spin_unlock_bh(&conn->send_lock); +	return rc;  }  int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index b5bfe38c7f9b..271e2524dc8f 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -160,7 +160,9 @@ static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt,  #endif  } -/* calculate cursor difference between old and new, where old <= new */ +/* calculate cursor difference between old and new, where old <= new and + * difference cannot exceed size + */  static inline int smc_curs_diff(unsigned int size,  				union smc_host_cursor *old,  				union smc_host_cursor *new) @@ -185,6 +187,28 @@ static inline int smc_curs_comp(unsigned int size,  	return smc_curs_diff(size, old, new);  } +/* calculate cursor difference between old and new, where old <= new and + * difference may exceed size + */ +static inline int smc_curs_diff_large(unsigned int size, +				      union smc_host_cursor *old, +				      union smc_host_cursor *new) +{ +	if (old->wrap < new->wrap) +		return min_t(int, +			     (size - old->count) + new->count + +			     (new->wrap - old->wrap - 1) * size, +			     size); + +	if (old->wrap > new->wrap) /* wrap has switched from 0xffff to 0x0000 */ +		return min_t(int, +			     (size - old->count) + new->count + +			     (new->wrap + 0xffff - old->wrap) * size, +			     size); + +	return max_t(int, 0, (new->count - old->count)); +} +  static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer,  					  union smc_host_cursor *local,  					  struct smc_connection *conn) @@ -270,10 +294,16 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,  		smcr_cdc_msg_to_host(local, peer, conn);  } -struct smc_cdc_tx_pend; +struct smc_cdc_tx_pend { +	struct smc_connection	*conn;		/* socket connection */ +	union smc_host_cursor	cursor;		/* tx sndbuf cursor sent */ +	union smc_host_cursor	p_cursor;	/* rx RMBE cursor produced */ +	u16			ctrl_seq;	/* conn. tx sequence # */ +};  int smc_cdc_get_free_slot(struct smc_connection *conn,  			  struct smc_wr_buf **wr_buf, +			  struct smc_rdma_wr **wr_rdma_buf,  			  struct smc_cdc_tx_pend **pend);  void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);  int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 776e9dfc915d..d53fd588d1f5 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -378,7 +378,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)  	vec.iov_len = sizeof(struct smc_clc_msg_decline);  	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,  			     sizeof(struct smc_clc_msg_decline)); -	if (len < sizeof(struct smc_clc_msg_decline)) +	if (len < 0 || len < sizeof(struct smc_clc_msg_decline))  		len = -EPROTO;  	return len > 0 ? 0 : len;  } diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index ea2b87f29469..e39cadda1bf5 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -345,14 +345,7 @@ static void smc_close_passive_work(struct work_struct *work)  	switch (sk->sk_state) {  	case SMC_INIT: -		if (atomic_read(&conn->bytes_to_rcv) || -		    (rxflags->peer_done_writing && -		     !smc_cdc_rxed_any_close(conn))) { -			sk->sk_state = SMC_APPCLOSEWAIT1; -		} else { -			sk->sk_state = SMC_CLOSED; -			sock_put(sk); /* passive closing */ -		} +		sk->sk_state = SMC_APPCLOSEWAIT1;  		break;  	case SMC_ACTIVE:  		sk->sk_state = SMC_APPCLOSEWAIT1; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 35c1cdc93e1c..aa1c551cee81 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -128,6 +128,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)  {  	struct smc_link_group *lgr = conn->lgr; +	if (!lgr) +		return;  	write_lock_bh(&lgr->conns_lock);  	if (conn->alert_token_local) {  		__smc_lgr_unregister_conn(conn); @@ -300,13 +302,13 @@ static void smc_buf_unuse(struct smc_connection *conn,  		conn->sndbuf_desc->used = 0;  	if (conn->rmb_desc) {  		if (!conn->rmb_desc->regerr) { -			conn->rmb_desc->used = 0;  			if (!lgr->is_smcd) {  				/* unregister rmb with peer */  				smc_llc_do_delete_rkey(  						&lgr->lnk[SMC_SINGLE_LINK],  						conn->rmb_desc);  			} +			conn->rmb_desc->used = 0;  		} else {  			/* buf registration failed, reuse not possible */  			write_lock_bh(&lgr->rmbs_lock); @@ -628,6 +630,8 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,  			local_contact = SMC_REUSE_CONTACT;  			conn->lgr = lgr;  			smc_lgr_register_conn(conn); /* add smc conn to lgr */ +			if (delayed_work_pending(&lgr->free_work)) +				cancel_delayed_work(&lgr->free_work);  			write_unlock_bh(&lgr->conns_lock);  			break;  		} diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index b00287989a3d..8806d2afa6ed 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -52,6 +52,24 @@ enum smc_wr_reg_state {  	FAILED		/* ib_wr_reg_mr response: failure */  }; +struct smc_rdma_sge {				/* sges for RDMA writes */ +	struct ib_sge		wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE]; +}; + +#define SMC_MAX_RDMA_WRITES	2		/* max. # of RDMA writes per +						 * message send +						 */ + +struct smc_rdma_sges {				/* sges per message send */ +	struct smc_rdma_sge	tx_rdma_sge[SMC_MAX_RDMA_WRITES]; +}; + +struct smc_rdma_wr {				/* work requests per message +						 * send +						 */ +	struct ib_rdma_wr	wr_tx_rdma[SMC_MAX_RDMA_WRITES]; +}; +  struct smc_link {  	struct smc_ib_device	*smcibdev;	/* ib-device */  	u8			ibport;		/* port - values 1 | 2 */ @@ -64,6 +82,8 @@ struct smc_link {  	struct smc_wr_buf	*wr_tx_bufs;	/* WR send payload buffers */  	struct ib_send_wr	*wr_tx_ibs;	/* WR send meta data */  	struct ib_sge		*wr_tx_sges;	/* WR send gather meta data */ +	struct smc_rdma_sges	*wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/ +	struct smc_rdma_wr	*wr_tx_rdmas;	/* WR RDMA WRITE */  	struct smc_wr_tx_pend	*wr_tx_pends;	/* WR send waiting for CQE */  	/* above four vectors have wr_tx_cnt elements and use the same index */  	dma_addr_t		wr_tx_dma_addr;	/* DMA address of wr_tx_bufs */ diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e519ef29c0ff..76487a16934e 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -289,8 +289,8 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)  static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)  { -	struct smc_ib_device *smcibdev = -		(struct smc_ib_device *)ibevent->device; +	struct smc_link *lnk = (struct smc_link *)priv; +	struct smc_ib_device *smcibdev = lnk->smcibdev;  	u8 port_idx;  	switch (ibevent->event) { @@ -298,7 +298,7 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)  	case IB_EVENT_GID_CHANGE:  	case IB_EVENT_PORT_ERR:  	case IB_EVENT_QP_ACCESS_ERR: -		port_idx = ibevent->element.port_num - 1; +		port_idx = ibevent->element.qp->port - 1;  		set_bit(port_idx, &smcibdev->port_event_mask);  		schedule_work(&smcibdev->port_event_work);  		break; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index a6d3623d06f4..4fd60c522802 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -166,7 +166,8 @@ static int smc_llc_add_pending_send(struct smc_link *link,  {  	int rc; -	rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend); +	rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL, +				     pend);  	if (rc < 0)  		return rc;  	BUILD_BUG_ON_MSG( diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7cb3e4f07c10..632c3109dee5 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -27,7 +27,7 @@  static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {  	[SMC_PNETID_NAME] = {  		.type = NLA_NUL_STRING, -		.len = SMC_MAX_PNETID_LEN - 1 +		.len = SMC_MAX_PNETID_LEN  	},  	[SMC_PNETID_ETHNAME] = {  		.type = NLA_NUL_STRING, diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index d8366ed51757..f93f3580c100 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -165,12 +165,11 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)  			conn->local_tx_ctrl.prod_flags.urg_data_pending = 1;  		if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) { +			if (send_done) +				return send_done;  			rc = smc_tx_wait(smc, msg->msg_flags); -			if (rc) { -				if (send_done) -					return send_done; +			if (rc)  				goto out_err; -			}  			continue;  		} @@ -267,27 +266,23 @@ int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,  /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */  static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, -			     int num_sges, struct ib_sge sges[]) +			     int num_sges, struct ib_rdma_wr *rdma_wr)  {  	struct smc_link_group *lgr = conn->lgr; -	struct ib_rdma_wr rdma_wr;  	struct smc_link *link;  	int rc; -	memset(&rdma_wr, 0, sizeof(rdma_wr));  	link = &lgr->lnk[SMC_SINGLE_LINK]; -	rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link); -	rdma_wr.wr.sg_list = sges; -	rdma_wr.wr.num_sge = num_sges; -	rdma_wr.wr.opcode = IB_WR_RDMA_WRITE; -	rdma_wr.remote_addr = +	rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link); +	rdma_wr->wr.num_sge = num_sges; +	rdma_wr->remote_addr =  		lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +  		/* RMBE within RMB */  		conn->tx_off +  		/* offset within RMBE */  		peer_rmbe_offset; -	rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; -	rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL); +	rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; +	rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);  	if (rc) {  		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;  		smc_lgr_terminate(lgr); @@ -314,24 +309,25 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,  /* SMC-R helper for smc_tx_rdma_writes() */  static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,  			       size_t src_off, size_t src_len, -			       size_t dst_off, size_t dst_len) +			       size_t dst_off, size_t dst_len, +			       struct smc_rdma_wr *wr_rdma_buf)  {  	dma_addr_t dma_addr =  		sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); -	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];  	int src_len_sum = src_len, dst_len_sum = dst_len; -	struct ib_sge sges[SMC_IB_MAX_SEND_SGE];  	int sent_count = src_off;  	int srcchunk, dstchunk;  	int num_sges;  	int rc;  	for (dstchunk = 0; dstchunk < 2; dstchunk++) { +		struct ib_sge *sge = +			wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list; +  		num_sges = 0;  		for (srcchunk = 0; srcchunk < 2; srcchunk++) { -			sges[srcchunk].addr = dma_addr + src_off; -			sges[srcchunk].length = src_len; -			sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; +			sge[srcchunk].addr = dma_addr + src_off; +			sge[srcchunk].length = src_len;  			num_sges++;  			src_off += src_len; @@ -344,7 +340,8 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,  			src_len = dst_len - src_len; /* remainder */  			src_len_sum += src_len;  		} -		rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); +		rc = smc_tx_rdma_write(conn, dst_off, num_sges, +				       &wr_rdma_buf->wr_tx_rdma[dstchunk]);  		if (rc)  			return rc;  		if (dst_len_sum == len) @@ -403,7 +400,8 @@ static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len,  /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;   * usable snd_wnd as max transmit   */ -static int smc_tx_rdma_writes(struct smc_connection *conn) +static int smc_tx_rdma_writes(struct smc_connection *conn, +			      struct smc_rdma_wr *wr_rdma_buf)  {  	size_t len, src_len, dst_off, dst_len; /* current chunk values */  	union smc_host_cursor sent, prep, prod, cons; @@ -464,7 +462,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)  					 dst_off, dst_len);  	else  		rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len, -					 dst_off, dst_len); +					 dst_off, dst_len, wr_rdma_buf);  	if (rc)  		return rc; @@ -485,31 +483,30 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)  static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)  {  	struct smc_cdc_producer_flags *pflags; +	struct smc_rdma_wr *wr_rdma_buf;  	struct smc_cdc_tx_pend *pend;  	struct smc_wr_buf *wr_buf;  	int rc; -	spin_lock_bh(&conn->send_lock); -	rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); +	rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend);  	if (rc < 0) {  		if (rc == -EBUSY) {  			struct smc_sock *smc =  				container_of(conn, struct smc_sock, conn); -			if (smc->sk.sk_err == ECONNABORTED) { -				rc = sock_error(&smc->sk); -				goto out_unlock; -			} +			if (smc->sk.sk_err == ECONNABORTED) +				return sock_error(&smc->sk);  			rc = 0;  			if (conn->alert_token_local) /* connection healthy */  				mod_delayed_work(system_wq, &conn->tx_work,  						 SMC_TX_WORK_DELAY);  		} -		goto out_unlock; +		return rc;  	} +	spin_lock_bh(&conn->send_lock);  	if (!conn->local_tx_ctrl.prod_flags.urg_data_present) { -		rc = smc_tx_rdma_writes(conn); +		rc = smc_tx_rdma_writes(conn, wr_rdma_buf);  		if (rc) {  			smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],  					   (struct smc_wr_tx_pend_priv *)pend); @@ -536,7 +533,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)  	spin_lock_bh(&conn->send_lock);  	if (!pflags->urg_data_present) -		rc = smc_tx_rdma_writes(conn); +		rc = smc_tx_rdma_writes(conn, NULL);  	if (!rc)  		rc = smcd_cdc_msg_send(conn); @@ -598,7 +595,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)  	if (to_confirm > conn->rmbe_update_limit) {  		smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn);  		sender_free = conn->rmb_desc->len - -			      smc_curs_diff(conn->rmb_desc->len, &prod, &cfed); +			      smc_curs_diff_large(conn->rmb_desc->len, +						  &cfed, &prod);  	}  	if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index c2694750a6a8..253aa75dc2b6 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -160,6 +160,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)   * @link:		Pointer to smc_link used to later send the message.   * @handler:		Send completion handler function pointer.   * @wr_buf:		Out value returns pointer to message buffer. + * @wr_rdma_buf:	Out value returns pointer to rdma work request.   * @wr_pend_priv:	Out value returns pointer serving as handler context.   *   * Return: 0 on success, or -errno on error. @@ -167,6 +168,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)  int smc_wr_tx_get_free_slot(struct smc_link *link,  			    smc_wr_tx_handler handler,  			    struct smc_wr_buf **wr_buf, +			    struct smc_rdma_wr **wr_rdma_buf,  			    struct smc_wr_tx_pend_priv **wr_pend_priv)  {  	struct smc_wr_tx_pend *wr_pend; @@ -204,6 +206,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,  	wr_ib = &link->wr_tx_ibs[idx];  	wr_ib->wr_id = wr_id;  	*wr_buf = &link->wr_tx_bufs[idx]; +	if (wr_rdma_buf) +		*wr_rdma_buf = &link->wr_tx_rdmas[idx];  	*wr_pend_priv = &wr_pend->priv;  	return 0;  } @@ -218,10 +222,10 @@ int smc_wr_tx_put_slot(struct smc_link *link,  		u32 idx = pend->idx;  		/* clear the full struct smc_wr_tx_pend including .priv */ -		memset(&link->wr_tx_pends[pend->idx], 0, -		       sizeof(link->wr_tx_pends[pend->idx])); -		memset(&link->wr_tx_bufs[pend->idx], 0, -		       sizeof(link->wr_tx_bufs[pend->idx])); +		memset(&link->wr_tx_pends[idx], 0, +		       sizeof(link->wr_tx_pends[idx])); +		memset(&link->wr_tx_bufs[idx], 0, +		       sizeof(link->wr_tx_bufs[idx]));  		test_and_clear_bit(idx, link->wr_tx_mask);  		return 1;  	} @@ -465,12 +469,26 @@ static void smc_wr_init_sge(struct smc_link *lnk)  			lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;  		lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;  		lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; +		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey = +			lnk->roce_pd->local_dma_lkey; +		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey = +			lnk->roce_pd->local_dma_lkey; +		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey = +			lnk->roce_pd->local_dma_lkey; +		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey = +			lnk->roce_pd->local_dma_lkey;  		lnk->wr_tx_ibs[i].next = NULL;  		lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];  		lnk->wr_tx_ibs[i].num_sge = 1;  		lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;  		lnk->wr_tx_ibs[i].send_flags =  			IB_SEND_SIGNALED | IB_SEND_SOLICITED; +		lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE; +		lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE; +		lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list = +			lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge; +		lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list = +			lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;  	}  	for (i = 0; i < lnk->wr_rx_cnt; i++) {  		lnk->wr_rx_sges[i].addr = @@ -521,8 +539,12 @@ void smc_wr_free_link_mem(struct smc_link *lnk)  	lnk->wr_tx_mask = NULL;  	kfree(lnk->wr_tx_sges);  	lnk->wr_tx_sges = NULL; +	kfree(lnk->wr_tx_rdma_sges); +	lnk->wr_tx_rdma_sges = NULL;  	kfree(lnk->wr_rx_sges);  	lnk->wr_rx_sges = NULL; +	kfree(lnk->wr_tx_rdmas); +	lnk->wr_tx_rdmas = NULL;  	kfree(lnk->wr_rx_ibs);  	lnk->wr_rx_ibs = NULL;  	kfree(lnk->wr_tx_ibs); @@ -552,10 +574,20 @@ int smc_wr_alloc_link_mem(struct smc_link *link)  				  GFP_KERNEL);  	if (!link->wr_rx_ibs)  		goto no_mem_wr_tx_ibs; +	link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT, +				    sizeof(link->wr_tx_rdmas[0]), +				    GFP_KERNEL); +	if (!link->wr_tx_rdmas) +		goto no_mem_wr_rx_ibs; +	link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT, +					sizeof(link->wr_tx_rdma_sges[0]), +					GFP_KERNEL); +	if (!link->wr_tx_rdma_sges) +		goto no_mem_wr_tx_rdmas;  	link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),  				   GFP_KERNEL);  	if (!link->wr_tx_sges) -		goto no_mem_wr_rx_ibs; +		goto no_mem_wr_tx_rdma_sges;  	link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,  				   sizeof(link->wr_rx_sges[0]),  				   GFP_KERNEL); @@ -579,6 +611,10 @@ no_mem_wr_rx_sges:  	kfree(link->wr_rx_sges);  no_mem_wr_tx_sges:  	kfree(link->wr_tx_sges); +no_mem_wr_tx_rdma_sges: +	kfree(link->wr_tx_rdma_sges); +no_mem_wr_tx_rdmas: +	kfree(link->wr_tx_rdmas);  no_mem_wr_rx_ibs:  	kfree(link->wr_rx_ibs);  no_mem_wr_tx_ibs: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 1d85bb14fd6f..09bf32fd3959 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -85,6 +85,7 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev);  int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler,  			    struct smc_wr_buf **wr_buf, +			    struct smc_rdma_wr **wrs,  			    struct smc_wr_tx_pend_priv **wr_pend_priv);  int smc_wr_tx_put_slot(struct smc_link *link,  		       struct smc_wr_tx_pend_priv *wr_pend_priv); diff --git a/net/socket.c b/net/socket.c index e89884e2197b..d80d87a395ea 100644 --- a/net/socket.c +++ b/net/socket.c @@ -941,8 +941,7 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))  EXPORT_SYMBOL(dlci_ioctl_set);  static long sock_do_ioctl(struct net *net, struct socket *sock, -			  unsigned int cmd, unsigned long arg, -			  unsigned int ifreq_size) +			  unsigned int cmd, unsigned long arg)  {  	int err;  	void __user *argp = (void __user *)arg; @@ -968,11 +967,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,  	} else {  		struct ifreq ifr;  		bool need_copyout; -		if (copy_from_user(&ifr, argp, ifreq_size)) +		if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))  			return -EFAULT;  		err = dev_ioctl(net, cmd, &ifr, &need_copyout);  		if (!err && need_copyout) -			if (copy_to_user(argp, &ifr, ifreq_size)) +			if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))  				return -EFAULT;  	}  	return err; @@ -1071,8 +1070,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)  			err = open_related_ns(&net->ns, get_net_ns);  			break;  		default: -			err = sock_do_ioctl(net, sock, cmd, arg, -					    sizeof(struct ifreq)); +			err = sock_do_ioctl(net, sock, cmd, arg);  			break;  		}  	return err; @@ -2780,8 +2778,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock,  	int err;  	set_fs(KERNEL_DS); -	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv, -			    sizeof(struct compat_ifreq)); +	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);  	set_fs(old_fs);  	if (!err)  		err = compat_put_timeval(&ktv, up); @@ -2797,8 +2794,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock,  	int err;  	set_fs(KERNEL_DS); -	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts, -			    sizeof(struct compat_ifreq)); +	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);  	set_fs(old_fs);  	if (!err)  		err = compat_put_timespec(&kts, up); @@ -2994,6 +2990,54 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,  	return dev_ioctl(net, cmd, &ifreq, NULL);  } +static int compat_ifreq_ioctl(struct net *net, struct socket *sock, +			      unsigned int cmd, +			      struct compat_ifreq __user *uifr32) +{ +	struct ifreq __user *uifr; +	int err; + +	/* Handle the fact that while struct ifreq has the same *layout* on +	 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, +	 * which are handled elsewhere, it still has different *size* due to +	 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, +	 * resulting in struct ifreq being 32 and 40 bytes respectively). +	 * As a result, if the struct happens to be at the end of a page and +	 * the next page isn't readable/writable, we get a fault. To prevent +	 * that, copy back and forth to the full size. +	 */ + +	uifr = compat_alloc_user_space(sizeof(*uifr)); +	if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) +		return -EFAULT; + +	err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); + +	if (!err) { +		switch (cmd) { +		case SIOCGIFFLAGS: +		case SIOCGIFMETRIC: +		case SIOCGIFMTU: +		case SIOCGIFMEM: +		case SIOCGIFHWADDR: +		case SIOCGIFINDEX: +		case SIOCGIFADDR: +		case SIOCGIFBRDADDR: +		case SIOCGIFDSTADDR: +		case SIOCGIFNETMASK: +		case SIOCGIFPFLAGS: +		case SIOCGIFTXQLEN: +		case SIOCGMIIPHY: +		case SIOCGMIIREG: +		case SIOCGIFNAME: +			if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) +				err = -EFAULT; +			break; +		} +	} +	return err; +} +  static int compat_sioc_ifmap(struct net *net, unsigned int cmd,  			struct compat_ifreq __user *uifr32)  { @@ -3109,8 +3153,7 @@ static int routing_ioctl(struct net *net, struct socket *sock,  	}  	set_fs(KERNEL_DS); -	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r, -			    sizeof(struct compat_ifreq)); +	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);  	set_fs(old_fs);  out: @@ -3210,21 +3253,22 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,  	case SIOCSIFTXQLEN:  	case SIOCBRADDIF:  	case SIOCBRDELIF: +	case SIOCGIFNAME:  	case SIOCSIFNAME:  	case SIOCGMIIPHY:  	case SIOCGMIIREG:  	case SIOCSMIIREG: -	case SIOCSARP: -	case SIOCGARP: -	case SIOCDARP: -	case SIOCATMARK:  	case SIOCBONDENSLAVE:  	case SIOCBONDRELEASE:  	case SIOCBONDSETHWADDR:  	case SIOCBONDCHANGEACTIVE: -	case SIOCGIFNAME: -		return sock_do_ioctl(net, sock, cmd, arg, -				     sizeof(struct compat_ifreq)); +		return compat_ifreq_ioctl(net, sock, cmd, argp); + +	case SIOCSARP: +	case SIOCGARP: +	case SIOCDARP: +	case SIOCATMARK: +		return sock_do_ioctl(net, sock, cmd, arg);  	}  	return -ENOIOCTLCMD; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 5d3cce9e8744..15eb5d3d4750 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void)  {  	struct virtio_vsock *vsock = virtio_vsock_get(); +	if (!vsock) +		return VMADDR_CID_ANY; +  	return vsock->guest_cid;  } @@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)  	virtio_vsock_update_guest_cid(vsock); -	ret = vsock_core_init(&virtio_transport.transport); -	if (ret < 0) -		goto out_vqs; -  	vsock->rx_buf_nr = 0;  	vsock->rx_buf_max_nr = 0;  	atomic_set(&vsock->queued_replies, 0); @@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)  	mutex_unlock(&the_virtio_vsock_mutex);  	return 0; -out_vqs: -	vsock->vdev->config->del_vqs(vsock->vdev);  out:  	kfree(vsock);  	mutex_unlock(&the_virtio_vsock_mutex); @@ -637,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev)  	flush_work(&vsock->event_work);  	flush_work(&vsock->send_pkt_work); +	/* Reset all connected sockets when the device disappear */ +	vsock_for_each_connected_socket(virtio_vsock_reset_sock); +  	vdev->config->reset(vdev);  	mutex_lock(&vsock->rx_lock); @@ -669,7 +669,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev)  	mutex_lock(&the_virtio_vsock_mutex);  	the_virtio_vsock = NULL; -	vsock_core_exit();  	mutex_unlock(&the_virtio_vsock_mutex);  	vdev->config->del_vqs(vdev); @@ -702,14 +701,28 @@ static int __init virtio_vsock_init(void)  	virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0);  	if (!virtio_vsock_workqueue)  		return -ENOMEM; +  	ret = register_virtio_driver(&virtio_vsock_driver);  	if (ret) -		destroy_workqueue(virtio_vsock_workqueue); +		goto out_wq; + +	ret = vsock_core_init(&virtio_transport.transport); +	if (ret) +		goto out_vdr; + +	return 0; + +out_vdr: +	unregister_virtio_driver(&virtio_vsock_driver); +out_wq: +	destroy_workqueue(virtio_vsock_workqueue);  	return ret; +  }  static void __exit virtio_vsock_exit(void)  { +	vsock_core_exit();  	unregister_virtio_driver(&virtio_vsock_driver);  	destroy_workqueue(virtio_vsock_workqueue);  } diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 882d97bdc6bf..550ac9d827fe 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -41,6 +41,8 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,  		cfg80211_sched_dfs_chan_update(rdev);  	} +	schedule_work(&cfg80211_disconnect_work); +  	return err;  } diff --git a/net/wireless/core.h b/net/wireless/core.h index c5d6f3418601..f6b40563dc63 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -445,6 +445,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev);  bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,  				u32 center_freq_khz, u32 bw_khz); +extern struct work_struct cfg80211_disconnect_work; +  /**   * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable   * @wiphy: the wiphy to validate against diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f741d8376a46..7d34cb884840 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -667,7 +667,7 @@ static void disconnect_work(struct work_struct *work)  	rtnl_unlock();  } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); +DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);  /* diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 897483457bf0..f7261fad45c1 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -297,10 +297,8 @@ char *get_fdinfo(int fd, const char *key)  	snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd);  	fdi = fopen(path, "r"); -	if (!fdi) { -		p_err("can't open fdinfo: %s", strerror(errno)); +	if (!fdi)  		return NULL; -	}  	while ((n = getline(&line, &line_n, fdi)) > 0) {  		char *value; @@ -313,7 +311,6 @@ char *get_fdinfo(int fd, const char *key)  		value = strchr(line, '\t');  		if (!value || !value[1]) { -			p_err("malformed fdinfo!?");  			free(line);  			return NULL;  		} @@ -326,7 +323,6 @@ char *get_fdinfo(int fd, const char *key)  		return line;  	} -	p_err("key '%s' not found in fdinfo", key);  	free(line);  	fclose(fdi);  	return NULL; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 2037e3dc864b..1ef1ee2280a2 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -347,6 +347,20 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val,  	return argv + i;  } +/* on per cpu maps we must copy the provided value on all value instances */ +static void fill_per_cpu_value(struct bpf_map_info *info, void *value) +{ +	unsigned int i, n, step; + +	if (!map_is_per_cpu(info->type)) +		return; + +	n = get_possible_cpus(); +	step = round_up(info->value_size, 8); +	for (i = 1; i < n; i++) +		memcpy(value + i * step, value, info->value_size); +} +  static int parse_elem(char **argv, struct bpf_map_info *info,  		      void *key, void *value, __u32 key_size, __u32 value_size,  		      __u32 *flags, __u32 **value_fd) @@ -426,6 +440,8 @@ static int parse_elem(char **argv, struct bpf_map_info *info,  			argv = parse_bytes(argv, "value", value, value_size);  			if (!argv)  				return -1; + +			fill_per_cpu_value(info, value);  		}  		return parse_elem(argv, info, key, NULL, key_size, value_size, @@ -497,10 +513,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)  				jsonw_uint_field(json_wtr, "owner_prog_type",  						 prog_type);  		} -		if (atoi(owner_jited)) -			jsonw_bool_field(json_wtr, "owner_jited", true); -		else -			jsonw_bool_field(json_wtr, "owner_jited", false); +		if (owner_jited) +			jsonw_bool_field(json_wtr, "owner_jited", +					 !!atoi(owner_jited));  		free(owner_prog_type);  		free(owner_jited); @@ -553,7 +568,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)  		char *owner_prog_type = get_fdinfo(fd, "owner_prog_type");  		char *owner_jited = get_fdinfo(fd, "owner_jited"); -		printf("\n\t"); +		if (owner_prog_type || owner_jited) +			printf("\n\t");  		if (owner_prog_type) {  			unsigned int prog_type = atoi(owner_prog_type); @@ -563,10 +579,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)  			else  				printf("owner_prog_type %d  ", prog_type);  		} -		if (atoi(owner_jited)) -			printf("owner jited"); -		else -			printf("owner not jited"); +		if (owner_jited) +			printf("owner%s jited", +			       atoi(owner_jited) ? "" : " not");  		free(owner_prog_type);  		free(owner_jited); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2d1bb7d6ff51..b54ed82b9589 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -78,13 +78,14 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)  static int prog_fd_by_tag(unsigned char *tag)  { -	struct bpf_prog_info info = {}; -	__u32 len = sizeof(info);  	unsigned int id = 0;  	int err;  	int fd;  	while (true) { +		struct bpf_prog_info info = {}; +		__u32 len = sizeof(info); +  		err = bpf_prog_get_next_id(id, &id);  		if (err) {  			p_err("%s", strerror(errno)); diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 315a44fa32af..84fd6f1bf33e 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -13,7 +13,7 @@ static inline unsigned int bpf_num_possible_cpus(void)  	unsigned int start, end, possible_cpus = 0;  	char buff[128];  	FILE *fp; -	int n; +	int len, n, i, j = 0;  	fp = fopen(fcpu, "r");  	if (!fp) { @@ -21,17 +21,27 @@ static inline unsigned int bpf_num_possible_cpus(void)  		exit(1);  	} -	while (fgets(buff, sizeof(buff), fp)) { -		n = sscanf(buff, "%u-%u", &start, &end); -		if (n == 0) { -			printf("Failed to retrieve # possible CPUs!\n"); -			exit(1); -		} else if (n == 1) { -			end = start; +	if (!fgets(buff, sizeof(buff), fp)) { +		printf("Failed to read %s!\n", fcpu); +		exit(1); +	} + +	len = strlen(buff); +	for (i = 0; i <= len; i++) { +		if (buff[i] == ',' || buff[i] == '\0') { +			buff[i] = '\0'; +			n = sscanf(&buff[j], "%u-%u", &start, &end); +			if (n <= 0) { +				printf("Failed to retrieve # possible CPUs!\n"); +				exit(1); +			} else if (n == 1) { +				end = start; +			} +			possible_cpus += end - start + 1; +			j = i + 1;  		} -		possible_cpus = start == 0 ? end + 1 : 0; -		break;  	} +  	fclose(fp);  	return possible_cpus; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index a0bd04befe87..91420fa83b08 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -1881,13 +1881,12 @@ static struct btf_raw_test raw_tests[] = {  },  { -	.descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)", +	.descr = "func proto (TYPEDEF=>FUNC_PROTO)",  	.raw_types = {  		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */  		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */ -		BTF_CONST_ENC(4),				/* [3] */ -		BTF_TYPEDEF_ENC(NAME_TBD, 5),			/* [4] */ -		BTF_FUNC_PROTO_ENC(0, 2),			/* [5] */ +		BTF_TYPEDEF_ENC(NAME_TBD, 4),			/* [3] */ +		BTF_FUNC_PROTO_ENC(0, 2),			/* [4] */  			BTF_FUNC_PROTO_ARG_ENC(0, 1),  			BTF_FUNC_PROTO_ARG_ENC(0, 2),  		BTF_END_RAW, @@ -1901,8 +1900,6 @@ static struct btf_raw_test raw_tests[] = {  	.key_type_id = 1,  	.value_type_id = 1,  	.max_entries = 4, -	.btf_load_err = true, -	.err_str = "Invalid type_id",  },  { diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 47ed6cef93fb..c9ff2b47bd1c 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@  # SPDX-License-Identifier: GPL-2.0  # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh  include ../lib.mk diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 1017313e41a8..59caa8f71cd8 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -1,2 +1,2 @@  CONFIG_NET_NS=y -NF_TABLES_INET=y +CONFIG_NF_TABLES_INET=y diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh new file mode 100755 index 000000000000..8ec76681605c --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -0,0 +1,762 @@ +#!/bin/bash +# +# This test is for basic NAT functionality: snat, dnat, redirect, masquerade. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then +	echo "SKIP: Could not run test without nft tool" +	exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then +	echo "SKIP: Could not run test without ip tool" +	exit $ksft_skip +fi + +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 + +ip -net ns0 link set lo up +ip -net ns0 link set veth0 up +ip -net ns0 addr add 10.0.1.1/24 dev veth0 +ip -net ns0 addr add dead:1::1/64 dev veth0 + +ip -net ns0 link set veth1 up +ip -net ns0 addr add 10.0.2.1/24 dev veth1 +ip -net ns0 addr add dead:2::1/64 dev veth1 + +for i in 1 2; do +  ip -net ns$i link set lo up +  ip -net ns$i link set eth0 up +  ip -net ns$i addr add 10.0.$i.99/24 dev eth0 +  ip -net ns$i route add default via 10.0.$i.1 +  ip -net ns$i addr add dead:$i::99/64 dev eth0 +  ip -net ns$i route add default via dead:$i::1 +done + +bad_counter() +{ +	local ns=$1 +	local counter=$2 +	local expect=$3 + +	echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2 +	ip netns exec $ns nft list counter inet filter $counter 1>&2 +} + +check_counters() +{ +	ns=$1 +	local lret=0 + +	cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84") +	if [ $? -ne 0 ]; then +		bad_counter $ns ns0in "packets 1 bytes 84" +		lret=1 +	fi +	cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84") +	if [ $? -ne 0 ]; then +		bad_counter $ns ns0out "packets 1 bytes 84" +		lret=1 +	fi + +	expect="packets 1 bytes 104" +	cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect") +	if [ $? -ne 0 ]; then +		bad_counter $ns ns0in6 "$expect" +		lret=1 +	fi +	cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect") +	if [ $? -ne 0 ]; then +		bad_counter $ns ns0out6 "$expect" +		lret=1 +	fi + +	return $lret +} + +check_ns0_counters() +{ +	local ns=$1 +	local lret=0 + +	cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") +	if [ $? -ne 0 ]; then +		bad_counter ns0 ns0in "packets 0 bytes 0" +		lret=1 +	fi + +	cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") +	if [ $? -ne 0 ]; then +		bad_counter ns0 ns0in6 "packets 0 bytes 0" +		lret=1 +	fi + +	cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") +	if [ $? -ne 0 ]; then +		bad_counter ns0 ns0out "packets 0 bytes 0" +		lret=1 +	fi +	cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") +	if [ $? -ne 0 ]; then +		bad_counter ns0 ns0out6 "packets 0 bytes 0" +		lret=1 +	fi + +	for dir in "in" "out" ; do +		expect="packets 1 bytes 84" +		cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns0 $ns$dir "$expect" +			lret=1 +		fi + +		expect="packets 1 bytes 104" +		cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns0 $ns$dir6 "$expect" +			lret=1 +		fi +	done + +	return $lret +} + +reset_counters() +{ +	for i in 0 1 2;do +		ip netns exec ns$i nft reset counters inet > /dev/null +	done +} + +test_local_dnat6() +{ +	local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { +	chain output { +		type nat hook output priority 0; policy accept; +		ip6 daddr dead:1::99 dnat to dead:2::99 +	} +} +EOF +	if [ $? -ne 0 ]; then +		echo "SKIP: Could not add add ip6 dnat hook" +		return $ksft_skip +	fi + +	# ping netns1, expect rewrite to netns2 +	ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null +	if [ $? -ne 0 ]; then +		lret=1 +		echo "ERROR: ping6 failed" +		return $lret +	fi + +	expect="packets 0 bytes 0" +	for dir in "in6" "out6" ; do +		cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns0 ns1$dir "$expect" +			lret=1 +		fi +	done + +	expect="packets 1 bytes 104" +	for dir in "in6" "out6" ; do +		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns0 ns2$dir "$expect" +			lret=1 +		fi +	done + +	# expect 0 count in ns1 +	expect="packets 0 bytes 0" +	for dir in "in6" "out6" ; do +		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns0$dir "$expect" +			lret=1 +		fi +	done + +	# expect 1 packet in ns2 +	expect="packets 1 bytes 104" +	for dir in "in6" "out6" ; do +		cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns2 ns0$dir "$expect" +			lret=1 +		fi +	done + +	test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2" +	ip netns exec ns0 nft flush chain ip6 nat output + +	return $lret +} + +test_local_dnat() +{ +	local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip nat { +	chain output { +		type nat hook output priority 0; policy accept; +		ip daddr 10.0.1.99 dnat to 10.0.2.99 +	} +} +EOF +	# ping netns1, expect rewrite to netns2 +	ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null +	if [ $? -ne 0 ]; then +		lret=1 +		echo "ERROR: ping failed" +		return $lret +	fi + +	expect="packets 0 bytes 0" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns0 ns1$dir "$expect" +			lret=1 +		fi +	done + +	expect="packets 1 bytes 84" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns0 ns2$dir "$expect" +			lret=1 +		fi +	done + +	# expect 0 count in ns1 +	expect="packets 0 bytes 0" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns0$dir "$expect" +			lret=1 +		fi +	done + +	# expect 1 packet in ns2 +	expect="packets 1 bytes 84" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns2 ns0$dir "$expect" +			lret=1 +		fi +	done + +	test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2" + +	ip netns exec ns0 nft flush chain ip nat output + +	reset_counters +	ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null +	if [ $? -ne 0 ]; then +		lret=1 +		echo "ERROR: ping failed" +		return $lret +	fi + +	expect="packets 1 bytes 84" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns1$dir "$expect" +			lret=1 +		fi +	done +	expect="packets 0 bytes 0" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns0 ns2$dir "$expect" +			lret=1 +		fi +	done + +	# expect 1 count in ns1 +	expect="packets 1 bytes 84" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns0 ns0$dir "$expect" +			lret=1 +		fi +	done + +	# expect 0 packet in ns2 +	expect="packets 0 bytes 0" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns2 ns2$dir "$expect" +			lret=1 +		fi +	done + +	test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush" + +	return $lret +} + + +test_masquerade6() +{ +	local lret=0 + +	ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + +	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 +	if [ $? -ne 0 ] ; then +		echo "ERROR: cannot ping ns1 from ns2 via ipv6" +		return 1 +		lret=1 +	fi + +	expect="packets 1 bytes 104" +	for dir in "in6" "out6" ; do +		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns2$dir "$expect" +			lret=1 +		fi + +		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns2 ns1$dir "$expect" +			lret=1 +		fi +	done + +	reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { +	chain postrouting { +		type nat hook postrouting priority 0; policy accept; +		meta oif veth0 masquerade +	} +} +EOF +	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 +	if [ $? -ne 0 ] ; then +		echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" +		lret=1 +	fi + +	# ns1 should have seen packets from ns0, due to masquerade +	expect="packets 1 bytes 104" +	for dir in "in6" "out6" ; do + +		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns0$dir "$expect" +			lret=1 +		fi + +		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns2 ns1$dir "$expect" +			lret=1 +		fi +	done + +	# ns1 should not have seen packets from ns2, due to masquerade +	expect="packets 0 bytes 0" +	for dir in "in6" "out6" ; do +		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns0$dir "$expect" +			lret=1 +		fi + +		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns2 ns1$dir "$expect" +			lret=1 +		fi +	done + +	ip netns exec ns0 nft flush chain ip6 nat postrouting +	if [ $? -ne 0 ]; then +		echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 +		lret=1 +	fi + +	test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + +	return $lret +} + +test_masquerade() +{ +	local lret=0 + +	ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +	ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + +	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 +	if [ $? -ne 0 ] ; then +		echo "ERROR: canot ping ns1 from ns2" +		lret=1 +	fi + +	expect="packets 1 bytes 84" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns2$dir "$expect" +			lret=1 +		fi + +		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns2 ns1$dir "$expect" +			lret=1 +		fi +	done + +	reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { +	chain postrouting { +		type nat hook postrouting priority 0; policy accept; +		meta oif veth0 masquerade +	} +} +EOF +	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 +	if [ $? -ne 0 ] ; then +		echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" +		lret=1 +	fi + +	# ns1 should have seen packets from ns0, due to masquerade +	expect="packets 1 bytes 84" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns0$dir "$expect" +			lret=1 +		fi + +		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns2 ns1$dir "$expect" +			lret=1 +		fi +	done + +	# ns1 should not have seen packets from ns2, due to masquerade +	expect="packets 0 bytes 0" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns0$dir "$expect" +			lret=1 +		fi + +		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns2 ns1$dir "$expect" +			lret=1 +		fi +	done + +	ip netns exec ns0 nft flush chain ip nat postrouting +	if [ $? -ne 0 ]; then +		echo "ERROR: Could not flush nat postrouting" 1>&2 +		lret=1 +	fi + +	test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + +	return $lret +} + +test_redirect6() +{ +	local lret=0 + +	ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + +	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 +	if [ $? -ne 0 ] ; then +		echo "ERROR: cannnot ping ns1 from ns2 via ipv6" +		lret=1 +	fi + +	expect="packets 1 bytes 104" +	for dir in "in6" "out6" ; do +		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns2$dir "$expect" +			lret=1 +		fi + +		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns2 ns1$dir "$expect" +			lret=1 +		fi +	done + +	reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { +	chain prerouting { +		type nat hook prerouting priority 0; policy accept; +		meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect +	} +} +EOF +	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 +	if [ $? -ne 0 ] ; then +		echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect" +		lret=1 +	fi + +	# ns1 should have seen no packets from ns2, due to redirection +	expect="packets 0 bytes 0" +	for dir in "in6" "out6" ; do +		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns0$dir "$expect" +			lret=1 +		fi +	done + +	# ns0 should have seen packets from ns2, due to masquerade +	expect="packets 1 bytes 104" +	for dir in "in6" "out6" ; do +		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns0$dir "$expect" +			lret=1 +		fi +	done + +	ip netns exec ns0 nft delete table ip6 nat +	if [ $? -ne 0 ]; then +		echo "ERROR: Could not delete ip6 nat table" 1>&2 +		lret=1 +	fi + +	test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2" + +	return $lret +} + +test_redirect() +{ +	local lret=0 + +	ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +	ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + +	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 +	if [ $? -ne 0 ] ; then +		echo "ERROR: cannot ping ns1 from ns2" +		lret=1 +	fi + +	expect="packets 1 bytes 84" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns2$dir "$expect" +			lret=1 +		fi + +		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns2 ns1$dir "$expect" +			lret=1 +		fi +	done + +	reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { +	chain prerouting { +		type nat hook prerouting priority 0; policy accept; +		meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect +	} +} +EOF +	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 +	if [ $? -ne 0 ] ; then +		echo "ERROR: cannot ping ns1 from ns2 with active ip redirect" +		lret=1 +	fi + +	# ns1 should have seen no packets from ns2, due to redirection +	expect="packets 0 bytes 0" +	for dir in "in" "out" ; do + +		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns0$dir "$expect" +			lret=1 +		fi +	done + +	# ns0 should have seen packets from ns2, due to masquerade +	expect="packets 1 bytes 84" +	for dir in "in" "out" ; do +		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") +		if [ $? -ne 0 ]; then +			bad_counter ns1 ns0$dir "$expect" +			lret=1 +		fi +	done + +	ip netns exec ns0 nft delete table ip nat +	if [ $? -ne 0 ]; then +		echo "ERROR: Could not delete nat table" 1>&2 +		lret=1 +	fi + +	test $lret -eq 0 && echo "PASS: IP redirection for ns2" + +	return $lret +} + + +# ip netns exec ns0 ping -c 1 -q 10.0.$i.99 +for i in 0 1 2; do +ip netns exec ns$i nft -f - <<EOF +table inet filter { +	counter ns0in {} +	counter ns1in {} +	counter ns2in {} + +	counter ns0out {} +	counter ns1out {} +	counter ns2out {} + +	counter ns0in6 {} +	counter ns1in6 {} +	counter ns2in6 {} + +	counter ns0out6 {} +	counter ns1out6 {} +	counter ns2out6 {} + +	map nsincounter { +		type ipv4_addr : counter +		elements = { 10.0.1.1 : "ns0in", +			     10.0.2.1 : "ns0in", +			     10.0.1.99 : "ns1in", +			     10.0.2.99 : "ns2in" } +	} + +	map nsincounter6 { +		type ipv6_addr : counter +		elements = { dead:1::1 : "ns0in6", +			     dead:2::1 : "ns0in6", +			     dead:1::99 : "ns1in6", +			     dead:2::99 : "ns2in6" } +	} + +	map nsoutcounter { +		type ipv4_addr : counter +		elements = { 10.0.1.1 : "ns0out", +			     10.0.2.1 : "ns0out", +			     10.0.1.99: "ns1out", +			     10.0.2.99: "ns2out" } +	} + +	map nsoutcounter6 { +		type ipv6_addr : counter +		elements = { dead:1::1 : "ns0out6", +			     dead:2::1 : "ns0out6", +			     dead:1::99 : "ns1out6", +			     dead:2::99 : "ns2out6" } +	} + +	chain input { +		type filter hook input priority 0; policy accept; +		counter name ip saddr map @nsincounter +		icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6 +	} +	chain output { +		type filter hook output priority 0; policy accept; +		counter name ip daddr map @nsoutcounter +		icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6 +	} +} +EOF +done + +sleep 3 +# test basic connectivity +for i in 1 2; do +  ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null +  if [ $? -ne 0 ];then +  	echo "ERROR: Could not reach other namespace(s)" 1>&2 +	ret=1 +  fi + +  ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null +  if [ $? -ne 0 ];then +	echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 +	ret=1 +  fi +  check_counters ns$i +  if [ $? -ne 0 ]; then +	ret=1 +  fi + +  check_ns0_counters ns$i +  if [ $? -ne 0 ]; then +	ret=1 +  fi +  reset_counters +done + +if [ $ret -eq 0 ];then +	echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2" +fi + +reset_counters +test_local_dnat +test_local_dnat6 + +reset_counters +test_masquerade +test_masquerade6 + +reset_counters +test_redirect +test_redirect6 + +for i in 0 1 2; do ip netns del ns$i;done + +exit $ret  | 
