diff options
Diffstat (limited to 'net/core/dev.c')
| -rw-r--r-- | net/core/dev.c | 149 | 
1 files changed, 109 insertions, 40 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index cd0981977f5c..a39354ee1432 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1055,6 +1055,8 @@ rollback:   */  int dev_set_alias(struct net_device *dev, const char *alias, size_t len)  { +	char *new_ifalias; +  	ASSERT_RTNL();  	if (len >= IFALIASZ) @@ -1068,9 +1070,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)  		return 0;  	} -	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); -	if (!dev->ifalias) +	new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); +	if (!new_ifalias)  		return -ENOMEM; +	dev->ifalias = new_ifalias;  	strlcpy(dev->ifalias, alias, len+1);  	return len; @@ -1136,8 +1139,8 @@ void dev_load(struct net *net, const char *name)  		no_module = request_module("netdev-%s", name);  	if (no_module && capable(CAP_SYS_MODULE)) {  		if (!request_module("%s", name)) -			pr_err("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s instead.\n", -			       name); +			pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s instead.\n", +				name);  	}  }  EXPORT_SYMBOL(dev_load); @@ -1172,6 +1175,7 @@ static int __dev_open(struct net_device *dev)  		net_dmaengine_get();  		dev_set_rx_mode(dev);  		dev_activate(dev); +		add_device_randomness(dev->dev_addr, dev->addr_len);  	}  	return ret; @@ -1632,6 +1636,8 @@ static inline int deliver_skb(struct sk_buff *skb,  			      struct packet_type *pt_prev,  			      struct net_device *orig_dev)  { +	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) +		return -ENOMEM;  	atomic_inc(&skb->users);  	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);  } @@ -1691,7 +1697,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)  	rcu_read_unlock();  } -/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change +/** + * netif_setup_tc - Handle tc mappings on real_num_tx_queues change   * @dev: Network device   * @txq: number of queues available   * @@ -1793,6 +1800,18 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)  EXPORT_SYMBOL(netif_set_real_num_rx_queues);  #endif +/** + * netif_get_num_default_rss_queues - default number of RSS queues + * + * This routine should set an upper limit on the number of RSS queues + * used by default by multiqueue devices. + */ +int netif_get_num_default_rss_queues(void) +{ +	return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus()); +} +EXPORT_SYMBOL(netif_get_num_default_rss_queues); +  static inline void __netif_reschedule(struct Qdisc *q)  {  	struct softnet_data *sd; @@ -2089,25 +2108,6 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)  	return 0;  } -/* - * Try to orphan skb early, right before transmission by the device. - * We cannot orphan skb if tx timestamp is requested or the sk-reference - * is needed on driver level for other reasons, e.g. see net/can/raw.c - */ -static inline void skb_orphan_try(struct sk_buff *skb) -{ -	struct sock *sk = skb->sk; - -	if (sk && !skb_shinfo(skb)->tx_flags) { -		/* skb_tx_hash() wont be able to get sk. -		 * We copy sk_hash into skb->rxhash -		 */ -		if (!skb->rxhash) -			skb->rxhash = sk->sk_hash; -		skb_orphan(skb); -	} -} -  static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)  {  	return ((features & NETIF_F_GEN_CSUM) || @@ -2137,6 +2137,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)  	__be16 protocol = skb->protocol;  	netdev_features_t features = skb->dev->features; +	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) +		features &= ~NETIF_F_GSO_MASK; +  	if (protocol == htons(ETH_P_8021Q)) {  		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;  		protocol = veh->h_vlan_encapsulated_proto; @@ -2193,8 +2196,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,  		if (!list_empty(&ptype_all))  			dev_queue_xmit_nit(skb, dev); -		skb_orphan_try(skb); -  		features = netif_skb_features(skb);  		if (vlan_tx_tag_present(skb) && @@ -2304,7 +2305,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,  	if (skb->sk && skb->sk->sk_hash)  		hash = skb->sk->sk_hash;  	else -		hash = (__force u16) skb->protocol ^ skb->rxhash; +		hash = (__force u16) skb->protocol;  	hash = jhash_1word(hash, hashrnd);  	return (u16) (((u64) hash * qcount) >> 32) + qoffset; @@ -2465,8 +2466,12 @@ static void skb_update_prio(struct sk_buff *skb)  {  	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); -	if ((!skb->priority) && (skb->sk) && map) -		skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx]; +	if (!skb->priority && skb->sk && map) { +		unsigned int prioidx = skb->sk->sk_cgrp_prioidx; + +		if (prioidx < map->priomap_len) +			skb->priority = map->priomap[prioidx]; +	}  }  #else  #define skb_update_prio(skb) @@ -2476,6 +2481,23 @@ static DEFINE_PER_CPU(int, xmit_recursion);  #define RECURSION_LIMIT 10  /** + *	dev_loopback_xmit - loop back @skb + *	@skb: buffer to transmit + */ +int dev_loopback_xmit(struct sk_buff *skb) +{ +	skb_reset_mac_header(skb); +	__skb_pull(skb, skb_network_offset(skb)); +	skb->pkt_type = PACKET_LOOPBACK; +	skb->ip_summed = CHECKSUM_UNNECESSARY; +	WARN_ON(!skb_dst(skb)); +	skb_dst_force(skb); +	netif_rx_ni(skb); +	return 0; +} +EXPORT_SYMBOL(dev_loopback_xmit); + +/**   *	dev_queue_xmit - transmit a buffer   *	@skb: buffer to transmit   * @@ -3140,6 +3162,23 @@ void netdev_rx_handler_unregister(struct net_device *dev)  }  EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); +/* + * Limit the use of PFMEMALLOC reserves to those protocols that implement + * the special handling of PFMEMALLOC skbs. + */ +static bool skb_pfmemalloc_protocol(struct sk_buff *skb) +{ +	switch (skb->protocol) { +	case __constant_htons(ETH_P_ARP): +	case __constant_htons(ETH_P_IP): +	case __constant_htons(ETH_P_IPV6): +	case __constant_htons(ETH_P_8021Q): +		return true; +	default: +		return false; +	} +} +  static int __netif_receive_skb(struct sk_buff *skb)  {  	struct packet_type *ptype, *pt_prev; @@ -3149,17 +3188,28 @@ static int __netif_receive_skb(struct sk_buff *skb)  	bool deliver_exact = false;  	int ret = NET_RX_DROP;  	__be16 type; +	unsigned long pflags = current->flags;  	net_timestamp_check(!netdev_tstamp_prequeue, skb);  	trace_netif_receive_skb(skb); +	/* +	 * PFMEMALLOC skbs are special, they should +	 * - be delivered to SOCK_MEMALLOC sockets only +	 * - stay away from userspace +	 * - have bounded memory usage +	 * +	 * Use PF_MEMALLOC as this saves us from propagating the allocation +	 * context down to all allocation sites. +	 */ +	if (sk_memalloc_socks() && skb_pfmemalloc(skb)) +		current->flags |= PF_MEMALLOC; +  	/* if we've gotten here through NAPI, check netpoll */  	if (netpoll_receive_skb(skb)) -		return NET_RX_DROP; +		goto out; -	if (!skb->skb_iif) -		skb->skb_iif = skb->dev->ifindex;  	orig_dev = skb->dev;  	skb_reset_network_header(skb); @@ -3171,13 +3221,14 @@ static int __netif_receive_skb(struct sk_buff *skb)  	rcu_read_lock();  another_round: +	skb->skb_iif = skb->dev->ifindex;  	__this_cpu_inc(softnet_data.processed);  	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {  		skb = vlan_untag(skb);  		if (unlikely(!skb)) -			goto out; +			goto unlock;  	}  #ifdef CONFIG_NET_CLS_ACT @@ -3187,6 +3238,9 @@ another_round:  	}  #endif +	if (sk_memalloc_socks() && skb_pfmemalloc(skb)) +		goto skip_taps; +  	list_for_each_entry_rcu(ptype, &ptype_all, list) {  		if (!ptype->dev || ptype->dev == skb->dev) {  			if (pt_prev) @@ -3195,13 +3249,18 @@ another_round:  		}  	} +skip_taps:  #ifdef CONFIG_NET_CLS_ACT  	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);  	if (!skb) -		goto out; +		goto unlock;  ncls:  #endif +	if (sk_memalloc_socks() && skb_pfmemalloc(skb) +				&& !skb_pfmemalloc_protocol(skb)) +		goto drop; +  	rx_handler = rcu_dereference(skb->dev->rx_handler);  	if (vlan_tx_tag_present(skb)) {  		if (pt_prev) { @@ -3211,7 +3270,7 @@ ncls:  		if (vlan_do_receive(&skb, !rx_handler))  			goto another_round;  		else if (unlikely(!skb)) -			goto out; +			goto unlock;  	}  	if (rx_handler) { @@ -3221,7 +3280,7 @@ ncls:  		}  		switch (rx_handler(&skb)) {  		case RX_HANDLER_CONSUMED: -			goto out; +			goto unlock;  		case RX_HANDLER_ANOTHER:  			goto another_round;  		case RX_HANDLER_EXACT: @@ -3249,8 +3308,12 @@ ncls:  	}  	if (pt_prev) { -		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); +		if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) +			ret = -ENOMEM; +		else +			ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);  	} else { +drop:  		atomic_long_inc(&skb->dev->rx_dropped);  		kfree_skb(skb);  		/* Jamal, now you will not able to escape explaining @@ -3259,8 +3322,10 @@ ncls:  		ret = NET_RX_DROP;  	} -out: +unlock:  	rcu_read_unlock(); +out: +	tsk_restore_flags(current, pflags, PF_MEMALLOC);  	return ret;  } @@ -4784,6 +4849,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)  	err = ops->ndo_set_mac_address(dev, sa);  	if (!err)  		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); +	add_device_randomness(dev->dev_addr, dev->addr_len);  	return err;  }  EXPORT_SYMBOL(dev_set_mac_address); @@ -5562,6 +5628,7 @@ int register_netdevice(struct net_device *dev)  	dev_init_scheduler(dev);  	dev_hold(dev);  	list_netdevice(dev); +	add_device_randomness(dev->dev_addr, dev->addr_len);  	/* Notify protocols, that a new device appeared. */  	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); @@ -5663,7 +5730,7 @@ int netdev_refcnt_read(const struct net_device *dev)  }  EXPORT_SYMBOL(netdev_refcnt_read); -/* +/**   * netdev_wait_allrefs - wait until all references are gone.   *   * This is called when unregistering network devices. @@ -5925,6 +5992,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,  	dev_net_set(dev, &init_net);  	dev->gso_max_size = GSO_MAX_SIZE; +	dev->gso_max_segs = GSO_MAX_SEGS;  	INIT_LIST_HEAD(&dev->napi_list);  	INIT_LIST_HEAD(&dev->unreg_list); @@ -6300,7 +6368,8 @@ static struct hlist_head *netdev_create_hash(void)  /* Initialize per network namespace state */  static int __net_init netdev_init(struct net *net)  { -	INIT_LIST_HEAD(&net->dev_base_head); +	if (net != &init_net) +		INIT_LIST_HEAD(&net->dev_base_head);  	net->dev_name_head = netdev_create_hash();  	if (net->dev_name_head == NULL)  | 
