diff options
Diffstat (limited to 'net/core/dev.c')
| -rw-r--r-- | net/core/dev.c | 206 | 
1 files changed, 156 insertions, 50 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 1be7cb73a602..be97c440ecd5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -462,7 +462,9 @@ EXPORT_PER_CPU_SYMBOL(softnet_data);   * PP consumers must pay attention to run APIs in the appropriate context   * (e.g. NAPI context).   */ -DEFINE_PER_CPU(struct page_pool *, system_page_pool); +DEFINE_PER_CPU(struct page_pool_bh, system_page_pool) = { +	.bh_lock = INIT_LOCAL_LOCK(bh_lock), +};  #ifdef CONFIG_LOCKDEP  /* @@ -828,7 +830,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)  	dev_hold(dev);  	rcu_read_unlock(); -	dev = __netdev_put_lock(dev); +	dev = __netdev_put_lock(dev, net);  	if (!dev)  		return NULL; @@ -1039,10 +1041,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)   * This helper is intended for locking net_device after it has been looked up   * using a lockless lookup helper. Lock prevents the instance from going away.   */ -struct net_device *__netdev_put_lock(struct net_device *dev) +struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net)  {  	netdev_lock(dev); -	if (dev->reg_state > NETREG_REGISTERED) { +	if (dev->reg_state > NETREG_REGISTERED || +	    dev->moving_ns || !net_eq(dev_net(dev), net)) {  		netdev_unlock(dev);  		dev_put(dev);  		return NULL; @@ -1051,6 +1054,20 @@ struct net_device *__netdev_put_lock(struct net_device *dev)  	return dev;  } +static struct net_device * +__netdev_put_lock_ops_compat(struct net_device *dev, struct net *net) +{ +	netdev_lock_ops_compat(dev); +	if (dev->reg_state > NETREG_REGISTERED || +	    dev->moving_ns || !net_eq(dev_net(dev), net)) { +		netdev_unlock_ops_compat(dev); +		dev_put(dev); +		return NULL; +	} +	dev_put(dev); +	return dev; +} +  /**   *	netdev_get_by_index_lock() - find a device by its ifindex   *	@net: the applicable net namespace @@ -1070,7 +1087,19 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)  	if (!dev)  		return NULL; -	return __netdev_put_lock(dev); +	return __netdev_put_lock(dev, net); +} + +struct net_device * +netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex) +{ +	struct net_device *dev; + +	dev = dev_get_by_index(net, ifindex); +	if (!dev) +		return NULL; + +	return __netdev_put_lock_ops_compat(dev, net);  }  struct net_device * @@ -1090,7 +1119,32 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev,  		dev_hold(dev);  		rcu_read_unlock(); -		dev = __netdev_put_lock(dev); +		dev = __netdev_put_lock(dev, net); +		if (dev) +			return dev; + +		(*index)++; +	} while (true); +} + +struct net_device * +netdev_xa_find_lock_ops_compat(struct net *net, struct net_device *dev, +			       unsigned long *index) +{ +	if (dev) +		netdev_unlock_ops_compat(dev); + +	do { +		rcu_read_lock(); +		dev = xa_find(&net->dev_by_index, index, ULONG_MAX, XA_PRESENT); +		if (!dev) { +			rcu_read_unlock(); +			return NULL; +		} +		dev_hold(dev); +		rcu_read_unlock(); + +		dev = __netdev_put_lock_ops_compat(dev, net);  		if (dev)  			return dev; @@ -3542,9 +3596,10 @@ out:  }  EXPORT_SYMBOL(skb_checksum_help); +#ifdef CONFIG_NET_CRC32C  int skb_crc32c_csum_help(struct sk_buff *skb)  { -	__le32 crc32c_csum; +	u32 crc;  	int ret = 0, offset, start;  	if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -3572,15 +3627,14 @@ int skb_crc32c_csum_help(struct sk_buff *skb)  	if (ret)  		goto out; -	crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start, -						  skb->len - start, ~(__u32)0, -						  crc32c_csum_stub)); -	*(__le32 *)(skb->data + offset) = crc32c_csum; +	crc = ~skb_crc32c(skb, start, skb->len - start, ~0); +	*(__le32 *)(skb->data + offset) = cpu_to_le32(crc);  	skb_reset_csum_not_inet(skb);  out:  	return ret;  }  EXPORT_SYMBOL(skb_crc32c_csum_help); +#endif /* CONFIG_NET_CRC32C */  __be16 skb_network_protocol(struct sk_buff *skb, int *depth)  { @@ -3844,12 +3898,42 @@ sw_checksum:  }  EXPORT_SYMBOL(skb_csum_hwoffload_help); +static struct sk_buff *validate_xmit_unreadable_skb(struct sk_buff *skb, +						    struct net_device *dev) +{ +	struct skb_shared_info *shinfo; +	struct net_iov *niov; + +	if (likely(skb_frags_readable(skb))) +		goto out; + +	if (!dev->netmem_tx) +		goto out_free; + +	shinfo = skb_shinfo(skb); + +	if (shinfo->nr_frags > 0) { +		niov = netmem_to_net_iov(skb_frag_netmem(&shinfo->frags[0])); +		if (net_is_devmem_iov(niov) && +		    net_devmem_iov_binding(niov)->dev != dev) +			goto out_free; +	} + +out: +	return skb; + +out_free: +	kfree_skb(skb); +	return NULL; +} +  static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)  {  	netdev_features_t features; -	if (!skb_frags_readable(skb)) -		goto out_kfree_skb; +	skb = validate_xmit_unreadable_skb(skb, dev); +	if (unlikely(!skb)) +		goto out_null;  	features = netif_skb_features(skb);  	skb = validate_xmit_vlan(skb, features); @@ -4731,6 +4815,7 @@ static inline void ____napi_schedule(struct softnet_data *sd,  	}  use_local_napi: +	DEBUG_NET_WARN_ON_ONCE(!list_empty(&napi->poll_list));  	list_add_tail(&napi->poll_list, &sd->poll_list);  	WRITE_ONCE(napi->list_owner, smp_processor_id());  	/* If not called from net_rx_action() @@ -4946,7 +5031,8 @@ static void rps_trigger_softirq(void *data)  	struct softnet_data *sd = data;  	____napi_schedule(sd, &sd->backlog); -	sd->received_rps++; +	/* Pairs with READ_ONCE() in softnet_seq_show() */ +	WRITE_ONCE(sd->received_rps, sd->received_rps + 1);  }  #endif /* CONFIG_RPS */ @@ -5031,7 +5117,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)  	rcu_read_lock();  	fl = rcu_dereference(sd->flow_limit);  	if (fl) { -		new_flow = skb_get_hash(skb) & (fl->num_buckets - 1); +		new_flow = hash_32(skb_get_hash(skb), fl->log_buckets);  		old_flow = fl->history[fl->history_head];  		fl->history[fl->history_head] = new_flow; @@ -5042,7 +5128,8 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)  			fl->buckets[old_flow]--;  		if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) { -			fl->count++; +			/* Pairs with READ_ONCE() in softnet_seq_show() */ +			WRITE_ONCE(fl->count, fl->count + 1);  			rcu_read_unlock();  			return true;  		} @@ -5238,7 +5325,10 @@ netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog)  	struct sk_buff *skb = *pskb;  	int err, hroom, troom; -	if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog)) +	local_lock_nested_bh(&system_page_pool.bh_lock); +	err = skb_cow_data_for_xdp(this_cpu_read(system_page_pool.pool), pskb, prog); +	local_unlock_nested_bh(&system_page_pool.bh_lock); +	if (!err)  		return 0;  	/* In case we have to go down the path and also linearize, @@ -7387,9 +7477,14 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)  	work = __napi_poll(n, &do_repoll); -	if (do_repoll) +	if (do_repoll) { +#if defined(CONFIG_DEBUG_NET) +		if (unlikely(!napi_is_scheduled(n))) +			pr_crit("repoll requested for device %s %ps but napi is not scheduled.\n", +				n->dev->name, n->poll); +#endif  		list_add_tail(&n->poll_list, repoll); - +	}  	netpoll_poll_unlock(have);  	return work; @@ -7515,7 +7610,8 @@ start:  		 */  		if (unlikely(budget <= 0 ||  			     time_after_eq(jiffies, time_limit))) { -			sd->time_squeeze++; +			/* Pairs with READ_ONCE() in softnet_seq_show() */ +			WRITE_ONCE(sd->time_squeeze, sd->time_squeeze + 1);  			break;  		}  	} @@ -9188,23 +9284,20 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)  		dev_change_rx_flags(dev, IFF_PROMISC);  	} -	if (notify) +	if (notify) { +		/* The ops lock is only required to ensure consistent locking +		 * for `NETDEV_CHANGE` notifiers. This function is sometimes +		 * called without the lock, even for devices that are ops +		 * locked, such as in `dev_uc_sync_multiple` when using +		 * bonding or teaming. +		 */ +		netdev_ops_assert_locked(dev);  		__dev_notify_flags(dev, old_flags, IFF_PROMISC, 0, NULL); +	}  	return 0;  } -/** - *	dev_set_promiscuity	- update promiscuity count on a device - *	@dev: device - *	@inc: modifier - * - *	Add or remove promiscuity from a device. While the count in the device - *	remains above zero the interface remains promiscuous. Once it hits zero - *	the device reverts back to normal filtering operation. A negative inc - *	value is used to drop promiscuity on the device. - *	Return 0 if successful or a negative errno code on error. - */ -int dev_set_promiscuity(struct net_device *dev, int inc) +int netif_set_promiscuity(struct net_device *dev, int inc)  {  	unsigned int old_flags = dev->flags;  	int err; @@ -9216,7 +9309,6 @@ int dev_set_promiscuity(struct net_device *dev, int inc)  		dev_set_rx_mode(dev);  	return err;  } -EXPORT_SYMBOL(dev_set_promiscuity);  int netif_set_allmulti(struct net_device *dev, int inc, bool notify)  { @@ -9577,7 +9669,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,  }  EXPORT_SYMBOL(dev_pre_changeaddr_notify); -int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, +int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,  			  struct netlink_ext_ack *extack)  {  	const struct net_device_ops *ops = dev->netdev_ops; @@ -9585,15 +9677,15 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa,  	if (!ops->ndo_set_mac_address)  		return -EOPNOTSUPP; -	if (sa->sa_family != dev->type) +	if (ss->ss_family != dev->type)  		return -EINVAL;  	if (!netif_device_present(dev))  		return -ENODEV; -	err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack); +	err = dev_pre_changeaddr_notify(dev, ss->__data, extack);  	if (err)  		return err; -	if (memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) { -		err = ops->ndo_set_mac_address(dev, sa); +	if (memcmp(dev->dev_addr, ss->__data, dev->addr_len)) { +		err = ops->ndo_set_mac_address(dev, ss);  		if (err)  			return err;  	} @@ -9605,6 +9697,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa,  DECLARE_RWSEM(dev_addr_sem); +/* "sa" is a true struct sockaddr with limited "sa_data" member. */  int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)  {  	size_t size = sizeof(sa->sa_data_min); @@ -9875,6 +9968,7 @@ int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)  	return dev->netdev_ops->ndo_bpf(dev, bpf);  } +EXPORT_SYMBOL_GPL(netif_xdp_propagate);  u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)  { @@ -10405,7 +10499,7 @@ static void dev_index_release(struct net *net, int ifindex)  static bool from_cleanup_net(void)  {  #ifdef CONFIG_NET_NS -	return current == cleanup_net_task; +	return current == READ_ONCE(cleanup_net_task);  #else  	return false;  #endif @@ -10453,6 +10547,7 @@ static void netdev_sync_lower_features(struct net_device *upper,  		if (!(features & feature) && (lower->features & feature)) {  			netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",  				   &feature, lower->name); +			netdev_lock_ops(lower);  			lower->wanted_features &= ~feature;  			__netdev_update_features(lower); @@ -10461,6 +10556,7 @@ static void netdev_sync_lower_features(struct net_device *upper,  					    &feature, lower->name);  			else  				netdev_features_change(lower); +			netdev_unlock_ops(lower);  		}  	}  } @@ -11057,8 +11153,7 @@ int register_netdevice(struct net_device *dev)  	 *	Prevent userspace races by waiting until the network  	 *	device is fully setup before sending notifications.  	 */ -	if (!dev->rtnl_link_ops || -	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED) +	if (!(dev->rtnl_link_ops && dev->rtnl_link_initializing))  		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL);  out: @@ -11966,9 +12061,9 @@ void unregister_netdevice_many_notify(struct list_head *head,  		struct sk_buff *skb = NULL;  		/* Shutdown queueing discipline. */ +		netdev_lock_ops(dev);  		dev_shutdown(dev);  		dev_tcx_uninstall(dev); -		netdev_lock_ops(dev);  		dev_xdp_uninstall(dev);  		dev_memory_provider_uninstall(dev);  		netdev_unlock_ops(dev); @@ -11981,8 +12076,7 @@ void unregister_netdevice_many_notify(struct list_head *head,  		 */  		call_netdevice_notifiers(NETDEV_UNREGISTER, dev); -		if (!dev->rtnl_link_ops || -		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED) +		if (!(dev->rtnl_link_ops && dev->rtnl_link_initializing))  			skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,  						     GFP_KERNEL, NULL, 0,  						     portid, nlh); @@ -12156,12 +12250,18 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,  	netif_close(dev);  	/* And unlink it from device chain */  	unlist_netdevice(dev); -	netdev_unlock_ops(dev); + +	if (!netdev_need_ops_lock(dev)) +		netdev_lock(dev); +	dev->moving_ns = true; +	netdev_unlock(dev);  	synchronize_net();  	/* Shutdown queueing discipline. */ +	netdev_lock_ops(dev);  	dev_shutdown(dev); +	netdev_unlock_ops(dev);  	/* Notify protocols, that we are about to destroy  	 * this device. They should clean all the things. @@ -12192,7 +12292,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,  	move_netdevice_notifiers_dev_net(dev, net);  	/* Actually switch the network namespace */ +	netdev_lock(dev);  	dev_net_set(dev, net); +	netdev_unlock(dev);  	dev->ifindex = new_ifindex;  	if (new_name[0]) { @@ -12218,7 +12320,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,  	err = netdev_change_owner(dev, net_old, net);  	WARN_ON(err); -	netdev_lock_ops(dev); +	netdev_lock(dev); +	dev->moving_ns = false; +	if (!netdev_need_ops_lock(dev)) +		netdev_unlock(dev); +  	/* Add the device back in the hashes */  	list_netdevice(dev);  	/* Notify protocols, that a new device appeared. */ @@ -12629,7 +12735,7 @@ static int net_page_pool_create(int cpuid)  		return err;  	} -	per_cpu(system_page_pool, cpuid) = pp_ptr; +	per_cpu(system_page_pool.pool, cpuid) = pp_ptr;  #endif  	return 0;  } @@ -12759,13 +12865,13 @@ out:  		for_each_possible_cpu(i) {  			struct page_pool *pp_ptr; -			pp_ptr = per_cpu(system_page_pool, i); +			pp_ptr = per_cpu(system_page_pool.pool, i);  			if (!pp_ptr)  				continue;  			xdp_unreg_page_pool(pp_ptr);  			page_pool_destroy(pp_ptr); -			per_cpu(system_page_pool, i) = NULL; +			per_cpu(system_page_pool.pool, i) = NULL;  		}  	}  | 
