summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c485
-rw-r--r--net/core/dev_mcast.c24
-rw-r--r--net/core/ethtool.c37
-rw-r--r--net/core/fib_rules.c6
-rw-r--r--net/core/filter.c1
-rw-r--r--net/core/flow.c2
-rw-r--r--net/core/iovec.c2
-rw-r--r--net/core/link_watch.c11
-rw-r--r--net/core/neighbour.c17
-rw-r--r--net/core/net-sysfs.c26
-rw-r--r--net/core/net_namespace.c3
-rw-r--r--net/core/netpoll.c24
-rw-r--r--net/core/pktgen.c71
-rw-r--r--net/core/rtnetlink.c29
-rw-r--r--net/core/skbuff.c162
-rw-r--r--net/core/sock.c8
-rw-r--r--net/core/sysctl_net_core.c39
-rw-r--r--net/core/user_dma.c2
18 files changed, 630 insertions, 319 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 582963077877..6bf217da9d8f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -90,6 +90,7 @@
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
#include <net/net_namespace.h>
@@ -119,6 +120,12 @@
#include <linux/err.h>
#include <linux/ctype.h>
#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
#include "net-sysfs.h"
@@ -254,9 +261,9 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
DEFINE_PER_CPU(struct softnet_data, softnet_data);
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#ifdef CONFIG_LOCKDEP
/*
- * register_netdevice() inits dev->_xmit_lock and sets lockdep class
+ * register_netdevice() inits txq->_xmit_lock and sets lockdep class
* according to dev->type
*/
static const unsigned short netdev_lock_type[] =
@@ -294,6 +301,7 @@ static const char *netdev_lock_name[] =
"_xmit_NONE"};
static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
+static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
static inline unsigned short netdev_lock_pos(unsigned short dev_type)
{
@@ -306,8 +314,8 @@ static inline unsigned short netdev_lock_pos(unsigned short dev_type)
return ARRAY_SIZE(netdev_lock_type) - 1;
}
-static inline void netdev_set_lockdep_class(spinlock_t *lock,
- unsigned short dev_type)
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+ unsigned short dev_type)
{
int i;
@@ -315,9 +323,22 @@ static inline void netdev_set_lockdep_class(spinlock_t *lock,
lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
netdev_lock_name[i]);
}
+
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+ int i;
+
+ i = netdev_lock_pos(dev->type);
+ lockdep_set_class_and_name(&dev->addr_list_lock,
+ &netdev_addr_lock_key[i],
+ netdev_lock_name[i]);
+}
#else
-static inline void netdev_set_lockdep_class(spinlock_t *lock,
- unsigned short dev_type)
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+ unsigned short dev_type)
+{
+}
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
{
}
#endif
@@ -453,7 +474,7 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map)
for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
memset(s[i].name, 0, sizeof(s[i].name));
- strcpy(s[i].name, name);
+ strlcpy(s[i].name, name, IFNAMSIZ);
memcpy(&s[i].map, map, sizeof(s[i].map));
break;
}
@@ -478,7 +499,7 @@ int netdev_boot_setup_check(struct net_device *dev)
for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
- !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
+ !strcmp(dev->name, s[i].name)) {
dev->irq = s[i].map.irq;
dev->base_addr = s[i].map.base_addr;
dev->mem_start = s[i].map.mem_start;
@@ -960,6 +981,12 @@ void netdev_state_change(struct net_device *dev)
}
}
+void netdev_bonding_change(struct net_device *dev)
+{
+ call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
+}
+EXPORT_SYMBOL(netdev_bonding_change);
+
/**
* dev_load - load a network module
* @net: the applicable net namespace
@@ -1116,6 +1143,29 @@ int dev_close(struct net_device *dev)
}
+/**
+ * dev_disable_lro - disable Large Receive Offload on a device
+ * @dev: device
+ *
+ * Disable Large Receive Offload (LRO) on a net device. Must be
+ * called under RTNL. This is needed if received packets may be
+ * forwarded to another interface.
+ */
+void dev_disable_lro(struct net_device *dev)
+{
+ if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
+ dev->ethtool_ops->set_flags) {
+ u32 flags = dev->ethtool_ops->get_flags(dev);
+ if (flags & ETH_FLAG_LRO) {
+ flags &= ~ETH_FLAG_LRO;
+ dev->ethtool_ops->set_flags(dev, flags);
+ }
+ }
+ WARN_ON(dev->features & NETIF_F_LRO);
+}
+EXPORT_SYMBOL(dev_disable_lro);
+
+
static int dev_boot_phase = 1;
/*
@@ -1289,16 +1339,19 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
}
-void __netif_schedule(struct net_device *dev)
+void __netif_schedule(struct Qdisc *q)
{
- if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
- unsigned long flags;
+ if (WARN_ON_ONCE(q == &noop_qdisc))
+ return;
+
+ if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
struct softnet_data *sd;
+ unsigned long flags;
local_irq_save(flags);
sd = &__get_cpu_var(softnet_data);
- dev->next_sched = sd->output_queue;
- sd->output_queue = dev;
+ q->next_sched = sd->output_queue;
+ sd->output_queue = q;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
}
@@ -1362,6 +1415,29 @@ void netif_device_attach(struct net_device *dev)
}
EXPORT_SYMBOL(netif_device_attach);
+static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+{
+ return ((features & NETIF_F_GEN_CSUM) ||
+ ((features & NETIF_F_IP_CSUM) &&
+ protocol == htons(ETH_P_IP)) ||
+ ((features & NETIF_F_IPV6_CSUM) &&
+ protocol == htons(ETH_P_IPV6)));
+}
+
+static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
+{
+ if (can_checksum_protocol(dev->features, skb->protocol))
+ return true;
+
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ if (can_checksum_protocol(dev->features & dev->vlan_features,
+ veh->h_vlan_encapsulated_proto))
+ return true;
+ }
+
+ return false;
+}
/*
* Invalidate hardware checksum when packet is to be mangled, and
@@ -1542,7 +1618,8 @@ static int dev_gso_segment(struct sk_buff *skb)
return 0;
}
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+ struct netdev_queue *txq)
{
if (likely(!skb->next)) {
if (!list_empty(&ptype_all))
@@ -1571,9 +1648,7 @@ gso:
skb->next = nskb;
return rc;
}
- if (unlikely((netif_queue_stopped(dev) ||
- netif_subqueue_stopped(dev, skb)) &&
- skb->next))
+ if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
@@ -1584,6 +1659,73 @@ out_kfree_skb:
return 0;
}
+static u32 simple_tx_hashrnd;
+static int simple_tx_hashrnd_initialized = 0;
+
+static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+{
+ u32 addr1, addr2, ports;
+ u32 hash, ihl;
+ u8 ip_proto;
+
+ if (unlikely(!simple_tx_hashrnd_initialized)) {
+ get_random_bytes(&simple_tx_hashrnd, 4);
+ simple_tx_hashrnd_initialized = 1;
+ }
+
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ ip_proto = ip_hdr(skb)->protocol;
+ addr1 = ip_hdr(skb)->saddr;
+ addr2 = ip_hdr(skb)->daddr;
+ ihl = ip_hdr(skb)->ihl;
+ break;
+ case __constant_htons(ETH_P_IPV6):
+ ip_proto = ipv6_hdr(skb)->nexthdr;
+ addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
+ addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
+ ihl = (40 >> 2);
+ break;
+ default:
+ return 0;
+ }
+
+
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_AH:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
+ break;
+
+ default:
+ ports = 0;
+ break;
+ }
+
+ hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
+
+ return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+}
+
+static struct netdev_queue *dev_pick_tx(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ u16 queue_index = 0;
+
+ if (dev->select_queue)
+ queue_index = dev->select_queue(dev, skb);
+ else if (dev->real_num_tx_queues > 1)
+ queue_index = simple_tx_hash(dev, skb);
+
+ skb_set_queue_mapping(skb, queue_index);
+ return netdev_get_tx_queue(dev, queue_index);
+}
+
/**
* dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
@@ -1609,10 +1751,10 @@ out_kfree_skb:
* the BH enable code must have IRQs enabled so that it will not deadlock.
* --BLG
*/
-
int dev_queue_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
+ struct netdev_queue *txq;
struct Qdisc *q;
int rc = -ENOMEM;
@@ -1640,55 +1782,34 @@ int dev_queue_xmit(struct sk_buff *skb)
if (skb->ip_summed == CHECKSUM_PARTIAL) {
skb_set_transport_header(skb, skb->csum_start -
skb_headroom(skb));
-
- if (!(dev->features & NETIF_F_GEN_CSUM) &&
- !((dev->features & NETIF_F_IP_CSUM) &&
- skb->protocol == htons(ETH_P_IP)) &&
- !((dev->features & NETIF_F_IPV6_CSUM) &&
- skb->protocol == htons(ETH_P_IPV6)))
- if (skb_checksum_help(skb))
- goto out_kfree_skb;
+ if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
+ goto out_kfree_skb;
}
gso:
- spin_lock_prefetch(&dev->queue_lock);
-
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
rcu_read_lock_bh();
- /* Updates of qdisc are serialized by queue_lock.
- * The struct Qdisc which is pointed to by qdisc is now a
- * rcu structure - it may be accessed without acquiring
- * a lock (but the structure may be stale.) The freeing of the
- * qdisc will be deferred until it's known that there are no
- * more references to it.
- *
- * If the qdisc has an enqueue function, we still need to
- * hold the queue_lock before calling it, since queue_lock
- * also serializes access to the device queue.
- */
+ txq = dev_pick_tx(dev, skb);
+ q = rcu_dereference(txq->qdisc);
- q = rcu_dereference(dev->qdisc);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
#endif
if (q->enqueue) {
- /* Grab device queue */
- spin_lock(&dev->queue_lock);
- q = dev->qdisc;
- if (q->enqueue) {
- /* reset queue_mapping to zero */
- skb_set_queue_mapping(skb, 0);
- rc = q->enqueue(skb, q);
- qdisc_run(dev);
- spin_unlock(&dev->queue_lock);
-
- rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
- goto out;
- }
- spin_unlock(&dev->queue_lock);
+ spinlock_t *root_lock = qdisc_root_lock(q);
+
+ spin_lock(root_lock);
+
+ rc = qdisc_enqueue_root(skb, q);
+ qdisc_run(q);
+
+ spin_unlock(root_lock);
+
+ rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+ goto out;
}
/* The device has no queue. Common case for software devices:
@@ -1706,19 +1827,18 @@ gso:
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
- if (dev->xmit_lock_owner != cpu) {
+ if (txq->xmit_lock_owner != cpu) {
- HARD_TX_LOCK(dev, cpu);
+ HARD_TX_LOCK(dev, txq, cpu);
- if (!netif_queue_stopped(dev) &&
- !netif_subqueue_stopped(dev, skb)) {
+ if (!netif_tx_queue_stopped(txq)) {
rc = 0;
- if (!dev_hard_start_xmit(skb, dev)) {
- HARD_TX_UNLOCK(dev);
+ if (!dev_hard_start_xmit(skb, dev, txq)) {
+ HARD_TX_UNLOCK(dev, txq);
goto out;
}
}
- HARD_TX_UNLOCK(dev);
+ HARD_TX_UNLOCK(dev, txq);
if (net_ratelimit())
printk(KERN_CRIT "Virtual device %s asks to "
"queue packet!\n", dev->name);
@@ -1862,7 +1982,7 @@ static void net_tx_action(struct softirq_action *h)
}
if (sd->output_queue) {
- struct net_device *head;
+ struct Qdisc *head;
local_irq_disable();
head = sd->output_queue;
@@ -1870,17 +1990,20 @@ static void net_tx_action(struct softirq_action *h)
local_irq_enable();
while (head) {
- struct net_device *dev = head;
+ struct Qdisc *q = head;
+ spinlock_t *root_lock;
+
head = head->next_sched;
smp_mb__before_clear_bit();
- clear_bit(__LINK_STATE_SCHED, &dev->state);
+ clear_bit(__QDISC_STATE_SCHED, &q->state);
- if (spin_trylock(&dev->queue_lock)) {
- qdisc_run(dev);
- spin_unlock(&dev->queue_lock);
+ root_lock = qdisc_root_lock(q);
+ if (spin_trylock(root_lock)) {
+ qdisc_run(q);
+ spin_unlock(root_lock);
} else {
- netif_schedule(dev);
+ __netif_schedule(q);
}
}
}
@@ -1961,10 +2084,11 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
*/
static int ing_filter(struct sk_buff *skb)
{
- struct Qdisc *q;
struct net_device *dev = skb->dev;
- int result = TC_ACT_OK;
u32 ttl = G_TC_RTTL(skb->tc_verd);
+ struct netdev_queue *rxq;
+ int result = TC_ACT_OK;
+ struct Qdisc *q;
if (MAX_RED_LOOP < ttl++) {
printk(KERN_WARNING
@@ -1976,10 +2100,14 @@ static int ing_filter(struct sk_buff *skb)
skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- spin_lock(&dev->ingress_lock);
- if ((q = dev->qdisc_ingress) != NULL)
- result = q->enqueue(skb, q);
- spin_unlock(&dev->ingress_lock);
+ rxq = &dev->rx_queue;
+
+ q = rxq->qdisc;
+ if (q) {
+ spin_lock(qdisc_lock(q));
+ result = qdisc_enqueue_root(skb, q);
+ spin_unlock(qdisc_lock(q));
+ }
return result;
}
@@ -1988,7 +2116,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
- if (!skb->dev->qdisc_ingress)
+ if (!skb->dev->rx_queue.qdisc)
goto out;
if (*pt_prev) {
@@ -2012,6 +2140,33 @@ out:
}
#endif
+/*
+ * netif_nit_deliver - deliver received packets to network taps
+ * @skb: buffer
+ *
+ * This function is used to deliver incoming packets to network
+ * taps. It should be used when the normal netif_receive_skb path
+ * is bypassed, for example because of VLAN acceleration.
+ */
+void netif_nit_deliver(struct sk_buff *skb)
+{
+ struct packet_type *ptype;
+
+ if (list_empty(&ptype_all))
+ return;
+
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb->mac_len = skb->network_header - skb->mac_header;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &ptype_all, list) {
+ if (!ptype->dev || ptype->dev == skb->dev)
+ deliver_skb(skb, ptype, skb->dev);
+ }
+ rcu_read_unlock();
+}
+
/**
* netif_receive_skb - process receive buffer from network
* @skb: buffer to process
@@ -2059,6 +2214,10 @@ int netif_receive_skb(struct sk_buff *skb)
rcu_read_lock();
+ /* Don't receive packets in an exiting network namespace */
+ if (!net_alive(dev_net(skb->dev)))
+ goto out;
+
#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -2747,16 +2906,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
return 0;
}
-static void __dev_set_promiscuity(struct net_device *dev, int inc)
+static int __dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
ASSERT_RTNL();
- if ((dev->promiscuity += inc) == 0)
- dev->flags &= ~IFF_PROMISC;
- else
- dev->flags |= IFF_PROMISC;
+ dev->flags |= IFF_PROMISC;
+ dev->promiscuity += inc;
+ if (dev->promiscuity == 0) {
+ /*
+ * Avoid overflow.
+ * If inc causes overflow, untouch promisc and return error.
+ */
+ if (inc < 0)
+ dev->flags &= ~IFF_PROMISC;
+ else {
+ dev->promiscuity -= inc;
+ printk(KERN_WARNING "%s: promiscuity touches roof, "
+ "set promiscuity failed, promiscuity feature "
+ "of device might be broken.\n", dev->name);
+ return -EOVERFLOW;
+ }
+ }
if (dev->flags != old_flags) {
printk(KERN_INFO "device %s %s promiscuous mode\n",
dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
@@ -2774,6 +2946,7 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc)
if (dev->change_rx_flags)
dev->change_rx_flags(dev, IFF_PROMISC);
}
+ return 0;
}
/**
@@ -2785,14 +2958,19 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc)
* remains above zero the interface remains promiscuous. Once it hits zero
* the device reverts back to normal filtering operation. A negative inc
* value is used to drop promiscuity on the device.
+ * Return 0 if successful or a negative errno code on error.
*/
-void dev_set_promiscuity(struct net_device *dev, int inc)
+int dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
+ int err;
- __dev_set_promiscuity(dev, inc);
+ err = __dev_set_promiscuity(dev, inc);
+ if (err < 0)
+ return err;
if (dev->flags != old_flags)
dev_set_rx_mode(dev);
+ return err;
}
/**
@@ -2805,22 +2983,38 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
* to all interfaces. Once it hits zero the device reverts back to normal
* filtering operation. A negative @inc value is used to drop the counter
* when releasing a resource needing all multicasts.
+ * Return 0 if successful or a negative errno code on error.
*/
-void dev_set_allmulti(struct net_device *dev, int inc)
+int dev_set_allmulti(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
ASSERT_RTNL();
dev->flags |= IFF_ALLMULTI;
- if ((dev->allmulti += inc) == 0)
- dev->flags &= ~IFF_ALLMULTI;
+ dev->allmulti += inc;
+ if (dev->allmulti == 0) {
+ /*
+ * Avoid overflow.
+ * If inc causes overflow, untouch allmulti and return error.
+ */
+ if (inc < 0)
+ dev->flags &= ~IFF_ALLMULTI;
+ else {
+ dev->allmulti -= inc;
+ printk(KERN_WARNING "%s: allmulti touches roof, "
+ "set allmulti failed, allmulti feature of "
+ "device might be broken.\n", dev->name);
+ return -EOVERFLOW;
+ }
+ }
if (dev->flags ^ old_flags) {
if (dev->change_rx_flags)
dev->change_rx_flags(dev, IFF_ALLMULTI);
dev_set_rx_mode(dev);
}
+ return 0;
}
/*
@@ -2859,9 +3053,9 @@ void __dev_set_rx_mode(struct net_device *dev)
void dev_set_rx_mode(struct net_device *dev)
{
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
__dev_set_rx_mode(dev);
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
}
int __dev_addr_delete(struct dev_addr_list **list, int *count,
@@ -2939,11 +3133,11 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
ASSERT_RTNL();
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
if (!err)
__dev_set_rx_mode(dev);
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return err;
}
EXPORT_SYMBOL(dev_unicast_delete);
@@ -2951,7 +3145,7 @@ EXPORT_SYMBOL(dev_unicast_delete);
/**
* dev_unicast_add - add a secondary unicast address
* @dev: device
- * @addr: address to delete
+ * @addr: address to add
* @alen: length of @addr
*
* Add a secondary unicast address to the device or increase
@@ -2965,11 +3159,11 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen)
ASSERT_RTNL();
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
if (!err)
__dev_set_rx_mode(dev);
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return err;
}
EXPORT_SYMBOL(dev_unicast_add);
@@ -3036,12 +3230,12 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
{
int err = 0;
- netif_tx_lock_bh(to);
+ netif_addr_lock_bh(to);
err = __dev_addr_sync(&to->uc_list, &to->uc_count,
&from->uc_list, &from->uc_count);
if (!err)
__dev_set_rx_mode(to);
- netif_tx_unlock_bh(to);
+ netif_addr_unlock_bh(to);
return err;
}
EXPORT_SYMBOL(dev_unicast_sync);
@@ -3057,15 +3251,15 @@ EXPORT_SYMBOL(dev_unicast_sync);
*/
void dev_unicast_unsync(struct net_device *to, struct net_device *from)
{
- netif_tx_lock_bh(from);
- netif_tx_lock_bh(to);
+ netif_addr_lock_bh(from);
+ netif_addr_lock(to);
__dev_addr_unsync(&to->uc_list, &to->uc_count,
&from->uc_list, &from->uc_count);
__dev_set_rx_mode(to);
- netif_tx_unlock_bh(to);
- netif_tx_unlock_bh(from);
+ netif_addr_unlock(to);
+ netif_addr_unlock_bh(from);
}
EXPORT_SYMBOL(dev_unicast_unsync);
@@ -3085,7 +3279,7 @@ static void __dev_addr_discard(struct dev_addr_list **list)
static void dev_addr_discard(struct net_device *dev)
{
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
__dev_addr_discard(&dev->uc_list);
dev->uc_count = 0;
@@ -3093,7 +3287,7 @@ static void dev_addr_discard(struct net_device *dev)
__dev_addr_discard(&dev->mc_list);
dev->mc_count = 0;
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
}
unsigned dev_get_flags(const struct net_device *dev)
@@ -3666,6 +3860,21 @@ static void rollback_registered(struct net_device *dev)
dev_put(dev);
}
+static void __netdev_init_queue_locks_one(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_unused)
+{
+ spin_lock_init(&dev_queue->_xmit_lock);
+ netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
+ dev_queue->xmit_lock_owner = -1;
+}
+
+static void netdev_init_queue_locks(struct net_device *dev)
+{
+ netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
+ __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
+}
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -3700,11 +3909,9 @@ int register_netdevice(struct net_device *dev)
BUG_ON(!dev_net(dev));
net = dev_net(dev);
- spin_lock_init(&dev->queue_lock);
- spin_lock_init(&dev->_xmit_lock);
- netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
- dev->xmit_lock_owner = -1;
- spin_lock_init(&dev->ingress_lock);
+ spin_lock_init(&dev->addr_list_lock);
+ netdev_set_addr_lockdep_class(dev);
+ netdev_init_queue_locks(dev);
dev->iflink = -1;
@@ -3985,6 +4192,19 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
return &dev->stats;
}
+static void netdev_init_one_queue(struct net_device *dev,
+ struct netdev_queue *queue,
+ void *_unused)
+{
+ queue->dev = dev;
+}
+
+static void netdev_init_queues(struct net_device *dev)
+{
+ netdev_init_one_queue(dev, &dev->rx_queue, NULL);
+ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+}
+
/**
* alloc_netdev_mq - allocate network device
* @sizeof_priv: size of private data to allocate space for
@@ -3999,14 +4219,14 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
{
- void *p;
+ struct netdev_queue *tx;
struct net_device *dev;
- int alloc_size;
+ size_t alloc_size;
+ void *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
- alloc_size = sizeof(struct net_device) +
- sizeof(struct net_device_subqueue) * (queue_count - 1);
+ alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
@@ -4021,22 +4241,33 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
return NULL;
}
+ tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
+ if (!tx) {
+ printk(KERN_ERR "alloc_netdev: Unable to allocate "
+ "tx qdiscs.\n");
+ kfree(p);
+ return NULL;
+ }
+
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
dev->padded = (char *)dev - (char *)p;
dev_net_set(dev, &init_net);
+ dev->_tx = tx;
+ dev->num_tx_queues = queue_count;
+ dev->real_num_tx_queues = queue_count;
+
if (sizeof_priv) {
dev->priv = ((char *)dev +
- ((sizeof(struct net_device) +
- (sizeof(struct net_device_subqueue) *
- (queue_count - 1)) + NETDEV_ALIGN_CONST)
+ ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
& ~NETDEV_ALIGN_CONST));
}
- dev->egress_subqueue_count = queue_count;
dev->gso_max_size = GSO_MAX_SIZE;
+ netdev_init_queues(dev);
+
dev->get_stats = internal_stats;
netpoll_netdev_init(dev);
setup(dev);
@@ -4057,6 +4288,8 @@ void free_netdev(struct net_device *dev)
{
release_net(dev_net(dev));
+ kfree(dev->_tx);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
@@ -4238,7 +4471,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
void *ocpu)
{
struct sk_buff **list_skb;
- struct net_device **list_net;
+ struct Qdisc **list_net;
struct sk_buff *skb;
unsigned int cpu, oldcpu = (unsigned long)ocpu;
struct softnet_data *sd, *oldsd;
@@ -4467,6 +4700,26 @@ err_name:
return -ENOMEM;
}
+char *netdev_drivername(struct net_device *dev, char *buffer, int len)
+{
+ struct device_driver *driver;
+ struct device *parent;
+
+ if (len <= 0 || !buffer)
+ return buffer;
+ buffer[0] = 0;
+
+ parent = dev->dev.parent;
+
+ if (!parent)
+ return buffer;
+
+ driver = parent->driver;
+ if (driver && driver->name)
+ strlcpy(buffer, driver->name, len);
+ return buffer;
+}
+
static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
@@ -4563,8 +4816,8 @@ static int __init net_dev_init(void)
dev_boot_phase = 0;
- open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
- open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+ open_softirq(NET_TX_SOFTIRQ, net_tx_action);
+ open_softirq(NET_RX_SOFTIRQ, net_rx_action);
hotcpu_notifier(dev_cpu_callback, 0);
dst_init();
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index f8a3455f4493..5402b3b38e0d 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -72,7 +72,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
{
int err;
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
addr, alen, glbl);
if (!err) {
@@ -83,7 +83,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
__dev_set_rx_mode(dev);
}
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return err;
}
@@ -95,11 +95,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
{
int err;
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
if (!err)
__dev_set_rx_mode(dev);
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return err;
}
@@ -119,12 +119,12 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
{
int err = 0;
- netif_tx_lock_bh(to);
+ netif_addr_lock_bh(to);
err = __dev_addr_sync(&to->mc_list, &to->mc_count,
&from->mc_list, &from->mc_count);
if (!err)
__dev_set_rx_mode(to);
- netif_tx_unlock_bh(to);
+ netif_addr_unlock_bh(to);
return err;
}
@@ -143,15 +143,15 @@ EXPORT_SYMBOL(dev_mc_sync);
*/
void dev_mc_unsync(struct net_device *to, struct net_device *from)
{
- netif_tx_lock_bh(from);
- netif_tx_lock_bh(to);
+ netif_addr_lock_bh(from);
+ netif_addr_lock(to);
__dev_addr_unsync(&to->mc_list, &to->mc_count,
&from->mc_list, &from->mc_count);
__dev_set_rx_mode(to);
- netif_tx_unlock_bh(to);
- netif_tx_unlock_bh(from);
+ netif_addr_unlock(to);
+ netif_addr_unlock_bh(from);
}
EXPORT_SYMBOL(dev_mc_unsync);
@@ -164,7 +164,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
return 0;
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
for (m = dev->mc_list; m; m = m->next) {
int i;
@@ -176,7 +176,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
seq_putc(seq, '\n');
}
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return 0;
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0133b5ebd545..14ada537f895 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -209,6 +209,36 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
return 0;
}
+static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_rxnfc cmd;
+
+ if (!dev->ethtool_ops->set_rxhash)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+ return -EFAULT;
+
+ return dev->ethtool_ops->set_rxhash(dev, &cmd);
+}
+
+static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_rxnfc info;
+
+ if (!dev->ethtool_ops->get_rxhash)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&info, useraddr, sizeof(info)))
+ return -EFAULT;
+
+ dev->ethtool_ops->get_rxhash(dev, &info);
+
+ if (copy_to_user(useraddr, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+}
+
static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
{
struct ethtool_regs regs;
@@ -826,6 +856,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GGSO:
case ETHTOOL_GFLAGS:
case ETHTOOL_GPFLAGS:
+ case ETHTOOL_GRXFH:
break;
default:
if (!capable(CAP_NET_ADMIN))
@@ -977,6 +1008,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
rc = ethtool_set_value(dev, useraddr,
dev->ethtool_ops->set_priv_flags);
break;
+ case ETHTOOL_GRXFH:
+ rc = ethtool_get_rxhash(dev, useraddr);
+ break;
+ case ETHTOOL_SRXFH:
+ rc = ethtool_set_rxhash(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index e3e9ab0f74e3..79de3b14a8d1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -69,7 +69,7 @@ static void rules_ops_put(struct fib_rules_ops *ops)
static void flush_route_cache(struct fib_rules_ops *ops)
{
if (ops->flush_cache)
- ops->flush_cache();
+ ops->flush_cache(ops);
}
int fib_rules_register(struct fib_rules_ops *ops)
@@ -226,7 +226,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
ops = lookup_rules_ops(net, frh->family);
if (ops == NULL) {
- err = EAFNOSUPPORT;
+ err = -EAFNOSUPPORT;
goto errout;
}
@@ -365,7 +365,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
ops = lookup_rules_ops(net, frh->family);
if (ops == NULL) {
- err = EAFNOSUPPORT;
+ err = -EAFNOSUPPORT;
goto errout;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 4f8369729a4e..df3744355839 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -68,7 +68,6 @@ static inline void *load_pointer(struct sk_buff *skb, int k,
* sk_filter - run a packet through a socket filter
* @sk: sock associated with &sk_buff
* @skb: buffer to filter
- * @needlock: set to 1 if the sock is not locked by caller.
*
* Run the filter code and then cut skb->data to correct size returned by
* sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
diff --git a/net/core/flow.c b/net/core/flow.c
index 19991175fdeb..5cf81052d044 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -298,7 +298,7 @@ void flow_cache_flush(void)
init_completion(&info.completion);
local_bh_disable();
- smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0);
+ smp_call_function(flow_cache_flush_per_cpu, &info, 0);
flow_cache_flush_tasklet((unsigned long)&info);
local_bh_enable();
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 755c37fdaee7..4c9c0121c9da 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -36,7 +36,7 @@
* in any case.
*/
-int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
+int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
{
int size, err, ct;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index a5e372b9ec4d..bf8f7af699d7 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -77,10 +77,10 @@ static void rfc2863_policy(struct net_device *dev)
}
-static int linkwatch_urgent_event(struct net_device *dev)
+static bool linkwatch_urgent_event(struct net_device *dev)
{
return netif_running(dev) && netif_carrier_ok(dev) &&
- dev->qdisc != dev->qdisc_sleeping;
+ qdisc_tx_changing(dev);
}
@@ -180,10 +180,9 @@ static void __linkwatch_run_queue(int urgent_only)
rfc2863_policy(dev);
if (dev->flags & IFF_UP) {
- if (netif_carrier_ok(dev)) {
- WARN_ON(dev->qdisc_sleeping == &noop_qdisc);
+ if (netif_carrier_ok(dev))
dev_activate(dev);
- } else
+ else
dev_deactivate(dev);
netdev_state_change(dev);
@@ -214,7 +213,7 @@ static void linkwatch_event(struct work_struct *dummy)
void linkwatch_fire_event(struct net_device *dev)
{
- int urgent = linkwatch_urgent_event(dev);
+ bool urgent = linkwatch_urgent_event(dev);
if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
dev_hold(dev);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5d9d7130bd6e..f62c8af85d38 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -930,6 +930,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
buff = neigh->arp_queue.next;
__skb_unlink(buff, &neigh->arp_queue);
kfree_skb(buff);
+ NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
__skb_queue_tail(&neigh->arp_queue, skb);
}
@@ -1714,7 +1715,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
return nla_nest_end(skb, nest);
nla_put_failure:
- return nla_nest_cancel(skb, nest);
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
}
static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
@@ -2057,9 +2059,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
goto nla_put_failure;
}
- ci.ndm_used = now - neigh->used;
- ci.ndm_confirmed = now - neigh->confirmed;
- ci.ndm_updated = now - neigh->updated;
+ ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
+ ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
+ ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
read_unlock_bh(&neigh->lock);
@@ -2461,12 +2463,12 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n");
+ seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
return 0;
}
seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
- "%08lx %08lx %08lx %08lx\n",
+ "%08lx %08lx %08lx %08lx %08lx\n",
atomic_read(&tbl->entries),
st->allocs,
@@ -2482,7 +2484,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
st->rcv_probes_ucast,
st->periodic_gc_runs,
- st->forced_gc_runs
+ st->forced_gc_runs,
+ st->unres_discards
);
return 0;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 90e2177af081..c1f4e0d428c0 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -242,11 +242,11 @@ static ssize_t netstat_show(const struct device *d,
offset % sizeof(unsigned long) != 0);
read_lock(&dev_base_lock);
- if (dev_isalive(dev) && dev->get_stats &&
- (stats = (*dev->get_stats)(dev)))
+ if (dev_isalive(dev)) {
+ stats = dev->get_stats(dev);
ret = sprintf(buf, fmt_ulong,
*(unsigned long *)(((u8 *) stats) + offset));
-
+ }
read_unlock(&dev_base_lock);
return ret;
}
@@ -318,7 +318,7 @@ static struct attribute_group netstat_group = {
.attrs = netstat_attrs,
};
-#ifdef CONFIG_WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT_SYSFS
/* helper function that does all the locking etc for wireless stats */
static ssize_t wireless_show(struct device *d, char *buf,
ssize_t (*format)(const struct iw_statistics *,
@@ -457,10 +457,9 @@ int netdev_register_kobject(struct net_device *net)
strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
#ifdef CONFIG_SYSFS
- if (net->get_stats)
- *groups++ = &netstat_group;
+ *groups++ = &netstat_group;
-#ifdef CONFIG_WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT_SYSFS
if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
*groups++ = &wireless_group;
#endif
@@ -469,6 +468,19 @@ int netdev_register_kobject(struct net_device *net)
return device_add(dev);
}
+int netdev_class_create_file(struct class_attribute *class_attr)
+{
+ return class_create_file(&net_class, class_attr);
+}
+
+void netdev_class_remove_file(struct class_attribute *class_attr)
+{
+ class_remove_file(&net_class, class_attr);
+}
+
+EXPORT_SYMBOL(netdev_class_create_file);
+EXPORT_SYMBOL(netdev_class_remove_file);
+
void netdev_initialize_kobject(struct net_device *net)
{
struct device *device = &(net->dev);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 72b4c184dd84..7c52fe277b62 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -140,6 +140,9 @@ static void cleanup_net(struct work_struct *work)
struct pernet_operations *ops;
struct net *net;
+ /* Be very certain incoming network packets will not find us */
+ rcu_barrier();
+
net = container_of(work, struct net, work);
mutex_lock(&net_mutex);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8fb134da0346..c12720895ecf 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -58,25 +58,27 @@ static void queue_process(struct work_struct *work)
while ((skb = skb_dequeue(&npinfo->txq))) {
struct net_device *dev = skb->dev;
+ struct netdev_queue *txq;
if (!netif_device_present(dev) || !netif_running(dev)) {
__kfree_skb(skb);
continue;
}
+ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
local_irq_save(flags);
- netif_tx_lock(dev);
- if ((netif_queue_stopped(dev) ||
- netif_subqueue_stopped(dev, skb)) ||
- dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
+ __netif_tx_lock(txq, smp_processor_id());
+ if (netif_tx_queue_stopped(txq) ||
+ dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
- netif_tx_unlock(dev);
+ __netif_tx_unlock(txq);
local_irq_restore(flags);
schedule_delayed_work(&npinfo->tx_work, HZ/10);
return;
}
- netif_tx_unlock(dev);
+ __netif_tx_unlock(txq);
local_irq_restore(flags);
}
}
@@ -278,17 +280,19 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
/* don't get messages out of order, and no recursion */
if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
+ struct netdev_queue *txq;
unsigned long flags;
+ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
local_irq_save(flags);
/* try until next clock tick */
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
- if (netif_tx_trylock(dev)) {
- if (!netif_queue_stopped(dev) &&
- !netif_subqueue_stopped(dev, skb))
+ if (__netif_tx_trylock(txq)) {
+ if (!netif_tx_queue_stopped(txq))
status = dev->hard_start_xmit(skb, dev);
- netif_tx_unlock(dev);
+ __netif_tx_unlock(txq);
if (status == NETDEV_TX_OK)
break;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fdf537707e51..c7d484f7e1c4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1875,7 +1875,7 @@ static int pktgen_device_event(struct notifier_block *unused,
{
struct net_device *dev = ptr;
- if (dev_net(dev) != &init_net)
+ if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
/* It is OK that we do not hold the group lock right now,
@@ -2123,6 +2123,24 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
}
}
#endif
+static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
+{
+ if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
+ __u16 t;
+ if (pkt_dev->flags & F_QUEUE_MAP_RND) {
+ t = random32() %
+ (pkt_dev->queue_map_max -
+ pkt_dev->queue_map_min + 1)
+ + pkt_dev->queue_map_min;
+ } else {
+ t = pkt_dev->cur_queue_map + 1;
+ if (t > pkt_dev->queue_map_max)
+ t = pkt_dev->queue_map_min;
+ }
+ pkt_dev->cur_queue_map = t;
+ }
+}
+
/* Increment/randomize headers according to flags and current values
* for IP src/dest, UDP src/dst port, MAC-Addr src/dst
*/
@@ -2325,19 +2343,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev->cur_pkt_size = t;
}
- if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
- __u16 t;
- if (pkt_dev->flags & F_QUEUE_MAP_RND) {
- t = random32() %
- (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1)
- + pkt_dev->queue_map_min;
- } else {
- t = pkt_dev->cur_queue_map + 1;
- if (t > pkt_dev->queue_map_max)
- t = pkt_dev->queue_map_min;
- }
- pkt_dev->cur_queue_map = t;
- }
+ set_cur_queue_map(pkt_dev);
pkt_dev->flows[flow].count++;
}
@@ -2458,7 +2464,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
__be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
__be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */
__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
-
+ u16 queue_map;
if (pkt_dev->nr_labels)
protocol = htons(ETH_P_MPLS_UC);
@@ -2469,6 +2475,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
+ queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
datalen = (odev->hard_header_len + 16) & ~0xf;
@@ -2507,7 +2514,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
skb->network_header = skb->tail;
skb->transport_header = skb->network_header + sizeof(struct iphdr);
skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
- skb_set_queue_mapping(skb, pkt_dev->cur_queue_map);
+ skb_set_queue_mapping(skb, queue_map);
iph = ip_hdr(skb);
udph = udp_hdr(skb);
@@ -2797,6 +2804,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
__be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
__be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */
__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
+ u16 queue_map;
if (pkt_dev->nr_labels)
protocol = htons(ETH_P_MPLS_UC);
@@ -2807,6 +2815,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
+ queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 +
@@ -2844,7 +2853,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->network_header = skb->tail;
skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
- skb_set_queue_mapping(skb, pkt_dev->cur_queue_map);
+ skb_set_queue_mapping(skb, queue_map);
iph = ipv6_hdr(skb);
udph = udp_hdr(skb);
@@ -3263,7 +3272,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
struct net_device *odev = NULL;
+ struct netdev_queue *txq;
__u64 idle_start = 0;
+ u16 queue_map;
int ret;
odev = pkt_dev->odev;
@@ -3285,9 +3296,15 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
}
}
- if ((netif_queue_stopped(odev) ||
- (pkt_dev->skb &&
- netif_subqueue_stopped(odev, pkt_dev->skb))) ||
+ if (!pkt_dev->skb) {
+ set_cur_queue_map(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
+ } else {
+ queue_map = skb_get_queue_mapping(pkt_dev->skb);
+ }
+
+ txq = netdev_get_tx_queue(odev, queue_map);
+ if (netif_tx_queue_stopped(txq) ||
need_resched()) {
idle_start = getCurUs();
@@ -3303,8 +3320,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->idle_acc += getCurUs() - idle_start;
- if (netif_queue_stopped(odev) ||
- netif_subqueue_stopped(odev, pkt_dev->skb)) {
+ if (netif_tx_queue_stopped(txq)) {
pkt_dev->next_tx_us = getCurUs(); /* TODO */
pkt_dev->next_tx_ns = 0;
goto out; /* Try the next interface */
@@ -3331,9 +3347,12 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
}
}
- netif_tx_lock_bh(odev);
- if (!netif_queue_stopped(odev) &&
- !netif_subqueue_stopped(odev, pkt_dev->skb)) {
+ /* fill_packet() might have changed the queue */
+ queue_map = skb_get_queue_mapping(pkt_dev->skb);
+ txq = netdev_get_tx_queue(odev, queue_map);
+
+ __netif_tx_lock_bh(txq);
+ if (!netif_tx_queue_stopped(txq)) {
atomic_inc(&(pkt_dev->skb->users));
retry_now:
@@ -3377,7 +3396,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->next_tx_ns = 0;
}
- netif_tx_unlock_bh(odev);
+ __netif_tx_unlock_bh(txq);
/* If pkt_dev->count is zero, then run forever */
if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index cf857c4dc7b1..71edb8b36341 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -498,7 +498,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
return nla_nest_end(skb, mx);
nla_put_failure:
- return nla_nest_cancel(skb, mx);
+ nla_nest_cancel(skb, mx);
+ return -EMSGSIZE;
}
int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
@@ -604,8 +605,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags)
{
+ struct netdev_queue *txq;
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
+ struct net_device_stats *stats;
+ struct nlattr *attr;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
if (nlh == NULL)
@@ -632,8 +636,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (dev->master)
NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
- if (dev->qdisc_sleeping)
- NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id);
+ txq = netdev_get_tx_queue(dev, 0);
+ if (txq->qdisc_sleeping)
+ NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id);
if (1) {
struct rtnl_link_ifmap map = {
@@ -652,19 +657,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
}
- if (dev->get_stats) {
- struct net_device_stats *stats = dev->get_stats(dev);
- if (stats) {
- struct nlattr *attr;
-
- attr = nla_reserve(skb, IFLA_STATS,
- sizeof(struct rtnl_link_stats));
- if (attr == NULL)
- goto nla_put_failure;
+ attr = nla_reserve(skb, IFLA_STATS,
+ sizeof(struct rtnl_link_stats));
+ if (attr == NULL)
+ goto nla_put_failure;
- copy_rtnl_link_stats(nla_data(attr), stats);
- }
- }
+ stats = dev->get_stats(dev);
+ copy_rtnl_link_stats(nla_data(attr), stats);
if (dev->rtnl_link_ops) {
if (rtnl_link_fill(skb, dev) < 0)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5c459f2b7985..e4115672b6cf 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4,8 +4,6 @@
* Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
* Florian La Roche <rzsfl@rz.uni-sb.de>
*
- * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
- *
* Fixes:
* Alan Cox : Fixed the worst of the load
* balancer bugs.
@@ -461,6 +459,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->tc_verd = old->tc_verd;
#endif
#endif
+ new->vlan_tci = old->vlan_tci;
+
skb_copy_secmark(new, old);
}
@@ -1282,107 +1282,83 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
return 0;
}
-/*
- * Map linear and fragment data from the skb to spd. Returns number of
- * pages mapped.
- */
-static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
- unsigned int *total_len,
- struct splice_pipe_desc *spd)
+static inline void __segment_seek(struct page **page, unsigned int *poff,
+ unsigned int *plen, unsigned int off)
{
- unsigned int nr_pages = spd->nr_pages;
- unsigned int poff, plen, len, toff, tlen;
- int headlen, seg;
+ *poff += off;
+ *page += *poff / PAGE_SIZE;
+ *poff = *poff % PAGE_SIZE;
+ *plen -= off;
+}
- toff = *offset;
- tlen = *total_len;
- if (!tlen)
- goto err;
+static inline int __splice_segment(struct page *page, unsigned int poff,
+ unsigned int plen, unsigned int *off,
+ unsigned int *len, struct sk_buff *skb,
+ struct splice_pipe_desc *spd)
+{
+ if (!*len)
+ return 1;
- /*
- * if the offset is greater than the linear part, go directly to
- * the fragments.
- */
- headlen = skb_headlen(skb);
- if (toff >= headlen) {
- toff -= headlen;
- goto map_frag;
+ /* skip this segment if already processed */
+ if (*off >= plen) {
+ *off -= plen;
+ return 0;
}
- /*
- * first map the linear region into the pages/partial map, skipping
- * any potential initial offset.
- */
- len = 0;
- while (len < headlen) {
- void *p = skb->data + len;
-
- poff = (unsigned long) p & (PAGE_SIZE - 1);
- plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
- len += plen;
-
- if (toff) {
- if (plen <= toff) {
- toff -= plen;
- continue;
- }
- plen -= toff;
- poff += toff;
- toff = 0;
- }
+ /* ignore any bits we already processed */
+ if (*off) {
+ __segment_seek(&page, &poff, &plen, *off);
+ *off = 0;
+ }
- plen = min(plen, tlen);
- if (!plen)
- break;
+ do {
+ unsigned int flen = min(*len, plen);
- /*
- * just jump directly to update and return, no point
- * in going over fragments when the output is full.
- */
- if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb))
- goto done;
+ /* the linear region may spread across several pages */
+ flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
- tlen -= plen;
- }
+ if (spd_fill_page(spd, page, flen, poff, skb))
+ return 1;
+
+ __segment_seek(&page, &poff, &plen, flen);
+ *len -= flen;
+
+ } while (*len && plen);
+
+ return 0;
+}
+
+/*
+ * Map linear and fragment data from the skb to spd. It reports failure if the
+ * pipe is full or if we already spliced the requested length.
+ */
+static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
+ unsigned int *len,
+ struct splice_pipe_desc *spd)
+{
+ int seg;
+
+ /*
+ * map the linear part
+ */
+ if (__splice_segment(virt_to_page(skb->data),
+ (unsigned long) skb->data & (PAGE_SIZE - 1),
+ skb_headlen(skb),
+ offset, len, skb, spd))
+ return 1;
/*
* then map the fragments
*/
-map_frag:
for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
- plen = f->size;
- poff = f->page_offset;
-
- if (toff) {
- if (plen <= toff) {
- toff -= plen;
- continue;
- }
- plen -= toff;
- poff += toff;
- toff = 0;
- }
-
- plen = min(plen, tlen);
- if (!plen)
- break;
-
- if (spd_fill_page(spd, f->page, plen, poff, skb))
- break;
-
- tlen -= plen;
+ if (__splice_segment(f->page, f->page_offset, f->size,
+ offset, len, skb, spd))
+ return 1;
}
-done:
- if (spd->nr_pages - nr_pages) {
- *offset = 0;
- *total_len = tlen;
- return 0;
- }
-err:
- return 1;
+ return 0;
}
/*
@@ -1445,6 +1421,7 @@ done:
if (spd.nr_pages) {
int ret;
+ struct sock *sk = __skb->sk;
/*
* Drop the socket lock, otherwise we have reverse
@@ -1455,9 +1432,9 @@ done:
* we call into ->sendpage() with the i_mutex lock held
* and networking will grab the socket lock.
*/
- release_sock(__skb->sk);
+ release_sock(sk);
ret = splice_to_pipe(pipe, &spd);
- lock_sock(__skb->sk);
+ lock_sock(sk);
return ret;
}
@@ -2280,6 +2257,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
skb_copy_queue_mapping(nskb, skb);
nskb->priority = skb->priority;
nskb->protocol = skb->protocol;
+ nskb->vlan_tci = skb->vlan_tci;
nskb->dst = dst_clone(skb->dst);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
nskb->pkt_type = skb->pkt_type;
@@ -2584,6 +2562,13 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
return true;
}
+void __skb_warn_lro_forwarding(const struct sk_buff *skb)
+{
+ if (net_ratelimit())
+ pr_warning("%s: received packets cannot be forwarded"
+ " while LRO is enabled\n", skb->dev->name);
+}
+
EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(kfree_skb);
@@ -2617,6 +2602,7 @@ EXPORT_SYMBOL(skb_seq_read);
EXPORT_SYMBOL(skb_abort_seq_read);
EXPORT_SYMBOL(skb_find_text);
EXPORT_SYMBOL(skb_append_datato_frags);
+EXPORT_SYMBOL(__skb_warn_lro_forwarding);
EXPORT_SYMBOL_GPL(skb_to_sgvec);
EXPORT_SYMBOL_GPL(skb_cow_data);
diff --git a/net/core/sock.c b/net/core/sock.c
index 88094cb09c06..10a64d57078c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -7,8 +7,6 @@
* handler for protocols to use and generic option handler.
*
*
- * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Florian La Roche, <flla@stud.uni-sb.de>
@@ -1068,7 +1066,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
* to be taken into account in all callers. -acme
*/
sk_refcnt_debug_inc(newsk);
- newsk->sk_socket = NULL;
+ sk_set_socket(newsk, NULL);
newsk->sk_sleep = NULL;
if (newsk->sk_prot->sockets_allocated)
@@ -1444,7 +1442,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
/* Under pressure. */
if (allocated > prot->sysctl_mem[1])
if (prot->enter_memory_pressure)
- prot->enter_memory_pressure();
+ prot->enter_memory_pressure(sk);
/* Over hard limit. */
if (allocated > prot->sysctl_mem[2])
@@ -1704,7 +1702,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_rcvbuf = sysctl_rmem_default;
sk->sk_sndbuf = sysctl_wmem_default;
sk->sk_state = TCP_CLOSE;
- sk->sk_socket = sock;
+ sk_set_socket(sk, sock);
sock_set_flag(sk, SOCK_ZAPPED);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 5fc801057244..a570e2af22cb 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -125,14 +125,6 @@ static struct ctl_table net_core_table[] = {
#endif /* CONFIG_XFRM */
#endif /* CONFIG_NET */
{
- .ctl_name = NET_CORE_SOMAXCONN,
- .procname = "somaxconn",
- .data = &init_net.core.sysctl_somaxconn,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
.ctl_name = NET_CORE_BUDGET,
.procname = "netdev_budget",
.data = &netdev_budget,
@@ -151,6 +143,18 @@ static struct ctl_table net_core_table[] = {
{ .ctl_name = 0 }
};
+static struct ctl_table netns_core_table[] = {
+ {
+ .ctl_name = NET_CORE_SOMAXCONN,
+ .procname = "somaxconn",
+ .data = &init_net.core.sysctl_somaxconn,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ { .ctl_name = 0 }
+};
+
static __net_initdata struct ctl_path net_core_path[] = {
{ .procname = "net", .ctl_name = CTL_NET, },
{ .procname = "core", .ctl_name = NET_CORE, },
@@ -159,23 +163,17 @@ static __net_initdata struct ctl_path net_core_path[] = {
static __net_init int sysctl_core_net_init(struct net *net)
{
- struct ctl_table *tbl, *tmp;
+ struct ctl_table *tbl;
net->core.sysctl_somaxconn = SOMAXCONN;
- tbl = net_core_table;
+ tbl = netns_core_table;
if (net != &init_net) {
- tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL);
+ tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
if (tbl == NULL)
goto err_dup;
- for (tmp = tbl; tmp->procname; tmp++) {
- if (tmp->data >= (void *)&init_net &&
- tmp->data < (void *)(&init_net + 1))
- tmp->data += (char *)net - (char *)&init_net;
- else
- tmp->mode &= ~0222;
- }
+ tbl[0].data = &net->core.sysctl_somaxconn;
}
net->core.sysctl_hdr = register_net_sysctl_table(net,
@@ -186,7 +184,7 @@ static __net_init int sysctl_core_net_init(struct net *net)
return 0;
err_reg:
- if (tbl != net_core_table)
+ if (tbl != netns_core_table)
kfree(tbl);
err_dup:
return -ENOMEM;
@@ -198,7 +196,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
tbl = net->core.sysctl_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->core.sysctl_hdr);
- BUG_ON(tbl == net_core_table);
+ BUG_ON(tbl == netns_core_table);
kfree(tbl);
}
@@ -209,6 +207,7 @@ static __net_initdata struct pernet_operations sysctl_core_ops = {
static __init int sysctl_core_init(void)
{
+ register_net_sysctl_rotable(net_core_path, net_core_table);
return register_pernet_subsys(&sysctl_core_ops);
}
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index de760504f6fe..8c6b706963ff 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -76,7 +76,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
end = start + skb_shinfo(skb)->frags[i].size;
copy = end - offset;
- if ((copy = end - offset) > 0) {
+ if (copy > 0) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
struct page *page = frag->page;