summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c1
-rw-r--r--net/core/dev.c1077
-rw-r--r--net/core/dev_addr_lists.c741
-rw-r--r--net/core/dev_mcast.c232
-rw-r--r--net/core/drop_monitor.c1
-rw-r--r--net/core/dst.c46
-rw-r--r--net/core/ethtool.c149
-rw-r--r--net/core/fib_rules.c20
-rw-r--r--net/core/filter.c1
-rw-r--r--net/core/flow.c405
-rw-r--r--net/core/gen_estimator.c1
-rw-r--r--net/core/iovec.c1
-rw-r--r--net/core/link_watch.c1
-rw-r--r--net/core/neighbour.c1
-rw-r--r--net/core/net-sysfs.c104
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/netpoll.c8
-rw-r--r--net/core/pktgen.c58
-rw-r--r--net/core/rtnetlink.c79
-rw-r--r--net/core/scm.c1
-rw-r--r--net/core/sock.c8
-rw-r--r--net/core/sysctl_net_core.c69
23 files changed, 1739 insertions, 1268 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 08791ac3e05a..51c3eec850ef 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
+obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o
obj-$(CONFIG_XFRM) += flow.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 95c2e0840d0d..2dccd4ee591b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -48,6 +48,7 @@
#include <linux/poll.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
+#include <linux/slab.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
diff --git a/net/core/dev.c b/net/core/dev.c
index 17b168671501..b31d5d69a467 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -80,6 +80,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/hash.h>
+#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mutex.h>
#include <linux/string.h>
@@ -129,6 +130,7 @@
#include <linux/jhash.h>
#include <linux/random.h>
#include <trace/events/napi.h>
+#include <linux/pci.h>
#include "net-sysfs.h"
@@ -206,6 +208,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
}
+static inline void rps_lock(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+ spin_lock(&sd->input_pkt_queue.lock);
+#endif
+}
+
+static inline void rps_unlock(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+ spin_unlock(&sd->input_pkt_queue.lock);
+#endif
+}
+
/* Device list insertion */
static int list_netdevice(struct net_device *dev)
{
@@ -248,7 +264,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
* queue in the local softnet handler.
*/
-DEFINE_PER_CPU(struct softnet_data, softnet_data);
+DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
EXPORT_PER_CPU_SYMBOL(softnet_data);
#ifdef CONFIG_LOCKDEP
@@ -772,14 +788,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
- struct net_device *dev;
+ struct net_device *dev, *ret = NULL;
- rtnl_lock();
- dev = __dev_getfirstbyhwtype(net, type);
- if (dev)
- dev_hold(dev);
- rtnl_unlock();
- return dev;
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev)
+ if (dev->type == type) {
+ dev_hold(dev);
+ ret = dev;
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(dev_getfirstbyhwtype);
@@ -1084,9 +1103,9 @@ void netdev_state_change(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_state_change);
-void netdev_bonding_change(struct net_device *dev, unsigned long event)
+int netdev_bonding_change(struct net_device *dev, unsigned long event)
{
- call_netdevice_notifiers(event, dev);
+ return call_netdevice_notifiers(event, dev);
}
EXPORT_SYMBOL(netdev_bonding_change);
@@ -1416,6 +1435,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
+ ASSERT_RTNL();
return raw_notifier_call_chain(&netdev_chain, val, dev);
}
@@ -1783,18 +1803,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
* 2. No high memory really exists on this machine.
*/
-static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
int i;
+ if (!(dev->features & NETIF_F_HIGHDMA)) {
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+ return 1;
+ }
- if (dev->features & NETIF_F_HIGHDMA)
- return 0;
-
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- if (PageHighMem(skb_shinfo(skb)->frags[i].page))
- return 1;
+ if (PCI_DMA_BUS_IS_PHYS) {
+ struct device *pdev = dev->dev.parent;
+ if (!pdev)
+ return 0;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+ if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
+ return 1;
+ }
+ }
#endif
return 0;
}
@@ -1852,6 +1881,17 @@ static int dev_gso_segment(struct sk_buff *skb)
return 0;
}
+/*
+ * Try to orphan skb early, right before transmission by the device.
+ * We cannot orphan skb if tx timestamp is requested, since
+ * drivers need to call skb_tstamp_tx() to send the timestamp.
+ */
+static inline void skb_orphan_try(struct sk_buff *skb)
+{
+ if (!skb_tx(skb)->flags)
+ skb_orphan(skb);
+}
+
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
@@ -1876,23 +1916,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(skb);
+ skb_orphan_try(skb);
rc = ops->ndo_start_xmit(skb, dev);
if (rc == NETDEV_TX_OK)
txq_trans_update(txq);
- /*
- * TODO: if skb_orphan() was called by
- * dev->hard_start_xmit() (for example, the unmodified
- * igb driver does that; bnx2 doesn't), then
- * skb_tx_software_timestamp() will be unable to send
- * back the time stamp.
- *
- * How can this be prevented? Always create another
- * reference to the socket before calling
- * dev->hard_start_xmit()? Prevent that skb_orphan()
- * does anything in dev->hard_start_xmit() by clearing
- * the skb destructor before the call and restoring it
- * afterwards, then doing the skb_orphan() ourselves?
- */
return rc;
}
@@ -1910,6 +1937,7 @@ gso:
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(nskb);
+ skb_orphan_try(nskb);
rc = ops->ndo_start_xmit(nskb, dev);
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
@@ -1947,7 +1975,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
else
- hash = skb->protocol;
+ hash = (__force u16) skb->protocol;
hash = jhash_1word(hash, hashrnd);
@@ -1959,9 +1987,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
{
if (unlikely(queue_index >= dev->real_num_tx_queues)) {
if (net_ratelimit()) {
- netdev_warn(dev, "selects TX queue %d, but "
- "real number of TX queues is %d\n",
- queue_index, dev->real_num_tx_queues);
+ pr_warning("%s selects TX queue %d, but "
+ "real number of TX queues is %d\n",
+ dev->name, queue_index, dev->real_num_tx_queues);
}
return 0;
}
@@ -1987,7 +2015,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
if (dev->real_num_tx_queues > 1)
queue_index = skb_tx_hash(dev, skb);
- if (sk && sk->sk_dst_cache)
+ if (sk && rcu_dereference_check(sk->sk_dst_cache, 1))
sk_tx_queue_set(sk, queue_index);
}
}
@@ -2174,29 +2202,38 @@ int weight_p __read_mostly = 64; /* old backlog weight */
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+#ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
/*
* get_rps_cpu is called from netif_receive_skb and returns the target
* CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
*/
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+ struct rps_dev_flow **rflowp)
{
struct ipv6hdr *ip6;
struct iphdr *ip;
struct netdev_rx_queue *rxqueue;
struct rps_map *map;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_sock_flow_table *sock_flow_table;
int cpu = -1;
u8 ip_proto;
+ u16 tcpu;
u32 addr1, addr2, ports, ihl;
- rcu_read_lock();
-
if (skb_rx_queue_recorded(skb)) {
u16 index = skb_get_rx_queue(skb);
if (unlikely(index >= dev->num_rx_queues)) {
if (net_ratelimit()) {
- netdev_warn(dev, "received packet on queue "
- "%u, but number of RX queues is %u\n",
- index, dev->num_rx_queues);
+ pr_warning("%s received packet on queue "
+ "%u, but number of RX queues is %u\n",
+ dev->name, index, dev->num_rx_queues);
}
goto done;
}
@@ -2204,7 +2241,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
} else
rxqueue = dev->_rx;
- if (!rxqueue->rps_map)
+ if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
goto done;
if (skb->rxhash)
@@ -2217,8 +2254,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
ip = (struct iphdr *) skb->data;
ip_proto = ip->protocol;
- addr1 = ip->saddr;
- addr2 = ip->daddr;
+ addr1 = (__force u32) ip->saddr;
+ addr2 = (__force u32) ip->daddr;
ihl = ip->ihl;
break;
case __constant_htons(ETH_P_IPV6):
@@ -2227,8 +2264,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
ip6 = (struct ipv6hdr *) skb->data;
ip_proto = ip6->nexthdr;
- addr1 = ip6->saddr.s6_addr32[3];
- addr2 = ip6->daddr.s6_addr32[3];
+ addr1 = (__force u32) ip6->saddr.s6_addr32[3];
+ addr2 = (__force u32) ip6->daddr.s6_addr32[3];
ihl = (40 >> 2);
break;
default:
@@ -2243,22 +2280,72 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
case IPPROTO_AH:
case IPPROTO_SCTP:
case IPPROTO_UDPLITE:
- if (pskb_may_pull(skb, (ihl * 4) + 4))
- ports = *((u32 *) (skb->data + (ihl * 4)));
+ if (pskb_may_pull(skb, (ihl * 4) + 4)) {
+ __be16 *hports = (__be16 *) (skb->data + (ihl * 4));
+ u32 sport, dport;
+
+ sport = (__force u16) hports[0];
+ dport = (__force u16) hports[1];
+ if (dport < sport)
+ swap(sport, dport);
+ ports = (sport << 16) + dport;
+ }
break;
default:
break;
}
+ /* get a consistent hash (same value on both flow directions) */
+ if (addr2 < addr1)
+ swap(addr1, addr2);
skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
if (!skb->rxhash)
skb->rxhash = 1;
got_hash:
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
+ sock_flow_table = rcu_dereference(rps_sock_flow_table);
+ if (flow_table && sock_flow_table) {
+ u16 next_cpu;
+ struct rps_dev_flow *rflow;
+
+ rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+ tcpu = rflow->cpu;
+
+ next_cpu = sock_flow_table->ents[skb->rxhash &
+ sock_flow_table->mask];
+
+ /*
+ * If the desired CPU (where last recvmsg was done) is
+ * different from current CPU (one in the rx-queue flow
+ * table entry), switch if one of the following holds:
+ * - Current CPU is unset (equal to RPS_NO_CPU).
+ * - Current CPU is offline.
+ * - The current CPU's queue tail has advanced beyond the
+ * last packet that was enqueued using this table entry.
+ * This guarantees that all previous packets for the flow
+ * have been dequeued, thus preserving in order delivery.
+ */
+ if (unlikely(tcpu != next_cpu) &&
+ (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
+ ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
+ rflow->last_qtail)) >= 0)) {
+ tcpu = rflow->cpu = next_cpu;
+ if (tcpu != RPS_NO_CPU)
+ rflow->last_qtail = per_cpu(softnet_data,
+ tcpu).input_queue_head;
+ }
+ if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
+ *rflowp = rflow;
+ cpu = tcpu;
+ goto done;
+ }
+ }
+
map = rcu_dereference(rxqueue->rps_map);
if (map) {
- u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+ tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
if (cpu_online(tcpu)) {
cpu = tcpu;
@@ -2267,72 +2354,78 @@ got_hash:
}
done:
- rcu_read_unlock();
return cpu;
}
-/*
- * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
- * to be sent to kick remote softirq processing. There are two masks since
- * the sending of IPIs must be done with interrupts enabled. The select field
- * indicates the current mask that enqueue_backlog uses to schedule IPIs.
- * select is flipped before net_rps_action is called while still under lock,
- * net_rps_action then uses the non-selected mask to send the IPIs and clears
- * it without conflicting with enqueue_backlog operation.
- */
-struct rps_remote_softirq_cpus {
- cpumask_t mask[2];
- int select;
-};
-static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
-
/* Called from hardirq (IPI) context */
-static void trigger_softirq(void *data)
+static void rps_trigger_softirq(void *data)
{
- struct softnet_data *queue = data;
- __napi_schedule(&queue->backlog);
+ struct softnet_data *sd = data;
+
+ __napi_schedule(&sd->backlog);
__get_cpu_var(netdev_rx_stat).received_rps++;
}
+#endif /* CONFIG_RPS */
+
+/*
+ * Check if this softnet_data structure is another cpu one
+ * If yes, queue it to our IPI list and return 1
+ * If no, return 0
+ */
+static int rps_ipi_queued(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+ struct softnet_data *mysd = &__get_cpu_var(softnet_data);
+
+ if (sd != mysd) {
+ sd->rps_ipi_next = mysd->rps_ipi_list;
+ mysd->rps_ipi_list = sd;
+
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ return 1;
+ }
+#endif /* CONFIG_RPS */
+ return 0;
+}
+
/*
* enqueue_to_backlog is called to queue an skb to a per CPU backlog
* queue (may be a remote CPU queue).
*/
-static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+ unsigned int *qtail)
{
- struct softnet_data *queue;
+ struct softnet_data *sd;
unsigned long flags;
- queue = &per_cpu(softnet_data, cpu);
+ sd = &per_cpu(softnet_data, cpu);
local_irq_save(flags);
__get_cpu_var(netdev_rx_stat).total++;
- spin_lock(&queue->input_pkt_queue.lock);
- if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
- if (queue->input_pkt_queue.qlen) {
+ rps_lock(sd);
+ if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
+ if (sd->input_pkt_queue.qlen) {
enqueue:
- __skb_queue_tail(&queue->input_pkt_queue, skb);
- spin_unlock_irqrestore(&queue->input_pkt_queue.lock,
- flags);
+ __skb_queue_tail(&sd->input_pkt_queue, skb);
+#ifdef CONFIG_RPS
+ *qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
+#endif
+ rps_unlock(sd);
+ local_irq_restore(flags);
return NET_RX_SUCCESS;
}
/* Schedule NAPI for backlog device */
- if (napi_schedule_prep(&queue->backlog)) {
- if (cpu != smp_processor_id()) {
- struct rps_remote_softirq_cpus *rcpus =
- &__get_cpu_var(rps_remote_softirq_cpus);
-
- cpu_set(cpu, rcpus->mask[rcpus->select]);
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- } else
- __napi_schedule(&queue->backlog);
+ if (napi_schedule_prep(&sd->backlog)) {
+ if (!rps_ipi_queued(sd))
+ __napi_schedule(&sd->backlog);
}
goto enqueue;
}
- spin_unlock(&queue->input_pkt_queue.lock);
+ rps_unlock(sd);
__get_cpu_var(netdev_rx_stat).dropped++;
local_irq_restore(flags);
@@ -2358,7 +2451,7 @@ enqueue:
int netif_rx(struct sk_buff *skb)
{
- int cpu;
+ int ret;
/* if netpoll wants it, pretend we never saw it */
if (netpoll_rx(skb))
@@ -2367,11 +2460,29 @@ int netif_rx(struct sk_buff *skb)
if (!skb->tstamp.tv64)
net_timestamp(skb);
- cpu = get_rps_cpu(skb->dev, skb);
- if (cpu < 0)
- cpu = smp_processor_id();
+#ifdef CONFIG_RPS
+ {
+ struct rps_dev_flow voidflow, *rflow = &voidflow;
+ int cpu;
- return enqueue_to_backlog(skb, cpu);
+ rcu_read_lock();
+
+ cpu = get_rps_cpu(skb->dev, skb, &rflow);
+ if (cpu < 0)
+ cpu = smp_processor_id();
+
+ ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+
+ rcu_read_unlock();
+ }
+#else
+ {
+ unsigned int qtail;
+ ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+ put_cpu();
+ }
+#endif
+ return ret;
}
EXPORT_SYMBOL(netif_rx);
@@ -2608,10 +2719,60 @@ void netif_nit_deliver(struct sk_buff *skb)
rcu_read_unlock();
}
-int __netif_receive_skb(struct sk_buff *skb)
+static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
+ struct net_device *master)
+{
+ if (skb->pkt_type == PACKET_HOST) {
+ u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
+
+ memcpy(dest, master->dev_addr, ETH_ALEN);
+ }
+}
+
+/* On bonding slaves other than the currently active slave, suppress
+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
+ * ARP on active-backup slaves with arp_validate enabled.
+ */
+int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
+{
+ struct net_device *dev = skb->dev;
+
+ if (master->priv_flags & IFF_MASTER_ARPMON)
+ dev->last_rx = jiffies;
+
+ if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
+ /* Do address unmangle. The local destination address
+ * will be always the one master has. Provides the right
+ * functionality in a bridge.
+ */
+ skb_bond_set_mac_by_master(skb, master);
+ }
+
+ if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
+ if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
+ skb->protocol == __cpu_to_be16(ETH_P_ARP))
+ return 0;
+
+ if (master->priv_flags & IFF_MASTER_ALB) {
+ if (skb->pkt_type != PACKET_BROADCAST &&
+ skb->pkt_type != PACKET_MULTICAST)
+ return 0;
+ }
+ if (master->priv_flags & IFF_MASTER_8023AD &&
+ skb->protocol == __cpu_to_be16(ETH_P_SLOW))
+ return 0;
+
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(__skb_bond_should_drop);
+
+static int __netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
+ struct net_device *master;
struct net_device *null_or_orig;
struct net_device *null_or_bond;
int ret = NET_RX_DROP;
@@ -2632,11 +2793,12 @@ int __netif_receive_skb(struct sk_buff *skb)
null_or_orig = NULL;
orig_dev = skb->dev;
- if (orig_dev->master) {
- if (skb_bond_should_drop(skb))
+ master = ACCESS_ONCE(orig_dev->master);
+ if (master) {
+ if (skb_bond_should_drop(skb, master))
null_or_orig = orig_dev; /* deliver only exact match */
else
- skb->dev = orig_dev->master;
+ skb->dev = master;
}
__get_cpu_var(netdev_rx_stat).total++;
@@ -2735,29 +2897,46 @@ out:
*/
int netif_receive_skb(struct sk_buff *skb)
{
- int cpu;
+#ifdef CONFIG_RPS
+ struct rps_dev_flow voidflow, *rflow = &voidflow;
+ int cpu, ret;
+
+ rcu_read_lock();
- cpu = get_rps_cpu(skb->dev, skb);
+ cpu = get_rps_cpu(skb->dev, skb, &rflow);
- if (cpu < 0)
- return __netif_receive_skb(skb);
- else
- return enqueue_to_backlog(skb, cpu);
+ if (cpu >= 0) {
+ ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+ rcu_read_unlock();
+ } else {
+ rcu_read_unlock();
+ ret = __netif_receive_skb(skb);
+ }
+
+ return ret;
+#else
+ return __netif_receive_skb(skb);
+#endif
}
EXPORT_SYMBOL(netif_receive_skb);
-/* Network device is going away, flush any packets still pending */
+/* Network device is going away, flush any packets still pending
+ * Called with irqs disabled.
+ */
static void flush_backlog(void *arg)
{
struct net_device *dev = arg;
- struct softnet_data *queue = &__get_cpu_var(softnet_data);
+ struct softnet_data *sd = &__get_cpu_var(softnet_data);
struct sk_buff *skb, *tmp;
- skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
+ rps_lock(sd);
+ skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
if (skb->dev == dev) {
- __skb_unlink(skb, &queue->input_pkt_queue);
+ __skb_unlink(skb, &sd->input_pkt_queue);
kfree_skb(skb);
+ input_queue_head_incr(sd);
}
+ rps_unlock(sd);
}
static int napi_gro_complete(struct sk_buff *skb)
@@ -3063,24 +3242,27 @@ EXPORT_SYMBOL(napi_gro_frags);
static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
- struct softnet_data *queue = &__get_cpu_var(softnet_data);
- unsigned long start_time = jiffies;
+ struct softnet_data *sd = &__get_cpu_var(softnet_data);
napi->weight = weight_p;
do {
struct sk_buff *skb;
- spin_lock_irq(&queue->input_pkt_queue.lock);
- skb = __skb_dequeue(&queue->input_pkt_queue);
+ local_irq_disable();
+ rps_lock(sd);
+ skb = __skb_dequeue(&sd->input_pkt_queue);
if (!skb) {
__napi_complete(napi);
- spin_unlock_irq(&queue->input_pkt_queue.lock);
+ rps_unlock(sd);
+ local_irq_enable();
break;
}
- spin_unlock_irq(&queue->input_pkt_queue.lock);
+ input_queue_head_incr(sd);
+ rps_unlock(sd);
+ local_irq_enable();
__netif_receive_skb(skb);
- } while (++work < quota && jiffies == start_time);
+ } while (++work < quota);
return work;
}
@@ -3169,20 +3351,32 @@ void netif_napi_del(struct napi_struct *napi)
EXPORT_SYMBOL(netif_napi_del);
/*
- * net_rps_action sends any pending IPI's for rps. This is only called from
- * softirq and interrupts must be enabled.
+ * net_rps_action sends any pending IPI's for rps.
+ * Note: called with local irq disabled, but exits with local irq enabled.
*/
-static void net_rps_action(cpumask_t *mask)
+static void net_rps_action_and_irq_disable(void)
{
- int cpu;
+#ifdef CONFIG_RPS
+ struct softnet_data *sd = &__get_cpu_var(softnet_data);
+ struct softnet_data *remsd = sd->rps_ipi_list;
- /* Send pending IPI's to kick RPS processing on remote cpus. */
- for_each_cpu_mask_nr(cpu, *mask) {
- struct softnet_data *queue = &per_cpu(softnet_data, cpu);
- if (cpu_online(cpu))
- __smp_call_function_single(cpu, &queue->csd, 0);
- }
- cpus_clear(*mask);
+ if (remsd) {
+ sd->rps_ipi_list = NULL;
+
+ local_irq_enable();
+
+ /* Send pending IPI's to kick RPS processing on remote cpus. */
+ while (remsd) {
+ struct softnet_data *next = remsd->rps_ipi_next;
+
+ if (cpu_online(remsd->cpu))
+ __smp_call_function_single(remsd->cpu,
+ &remsd->csd, 0);
+ remsd = next;
+ }
+ } else
+#endif
+ local_irq_enable();
}
static void net_rx_action(struct softirq_action *h)
@@ -3191,8 +3385,6 @@ static void net_rx_action(struct softirq_action *h)
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
void *have;
- int select;
- struct rps_remote_softirq_cpus *rcpus;
local_irq_disable();
@@ -3255,13 +3447,7 @@ static void net_rx_action(struct softirq_action *h)
netpoll_poll_unlock(have);
}
out:
- rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
- select = rcpus->select;
- rcpus->select ^= 1;
-
- local_irq_enable();
-
- net_rps_action(&rcpus->mask[select]);
+ net_rps_action_and_irq_disable();
#ifdef CONFIG_NET_DMA
/*
@@ -3733,11 +3919,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
slave->master = master;
- synchronize_net();
-
- if (old)
+ if (old) {
+ synchronize_net();
dev_put(old);
-
+ }
if (master)
slave->flags |= IFF_SLAVE;
else
@@ -3914,562 +4099,6 @@ void dev_set_rx_mode(struct net_device *dev)
netif_addr_unlock_bh(dev);
}
-/* hw addresses list handling functions */
-
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
- int addr_len, unsigned char addr_type)
-{
- struct netdev_hw_addr *ha;
- int alloc_size;
-
- if (addr_len > MAX_ADDR_LEN)
- return -EINVAL;
-
- list_for_each_entry(ha, &list->list, list) {
- if (!memcmp(ha->addr, addr, addr_len) &&
- ha->type == addr_type) {
- ha->refcount++;
- return 0;
- }
- }
-
-
- alloc_size = sizeof(*ha);
- if (alloc_size < L1_CACHE_BYTES)
- alloc_size = L1_CACHE_BYTES;
- ha = kmalloc(alloc_size, GFP_ATOMIC);
- if (!ha)
- return -ENOMEM;
- memcpy(ha->addr, addr, addr_len);
- ha->type = addr_type;
- ha->refcount = 1;
- ha->synced = false;
- list_add_tail_rcu(&ha->list, &list->list);
- list->count++;
- return 0;
-}
-
-static void ha_rcu_free(struct rcu_head *head)
-{
- struct netdev_hw_addr *ha;
-
- ha = container_of(head, struct netdev_hw_addr, rcu_head);
- kfree(ha);
-}
-
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
- int addr_len, unsigned char addr_type)
-{
- struct netdev_hw_addr *ha;
-
- list_for_each_entry(ha, &list->list, list) {
- if (!memcmp(ha->addr, addr, addr_len) &&
- (ha->type == addr_type || !addr_type)) {
- if (--ha->refcount)
- return 0;
- list_del_rcu(&ha->list);
- call_rcu(&ha->rcu_head, ha_rcu_free);
- list->count--;
- return 0;
- }
- }
- return -ENOENT;
-}
-
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len,
- unsigned char addr_type)
-{
- int err;
- struct netdev_hw_addr *ha, *ha2;
- unsigned char type;
-
- list_for_each_entry(ha, &from_list->list, list) {
- type = addr_type ? addr_type : ha->type;
- err = __hw_addr_add(to_list, ha->addr, addr_len, type);
- if (err)
- goto unroll;
- }
- return 0;
-
-unroll:
- list_for_each_entry(ha2, &from_list->list, list) {
- if (ha2 == ha)
- break;
- type = addr_type ? addr_type : ha2->type;
- __hw_addr_del(to_list, ha2->addr, addr_len, type);
- }
- return err;
-}
-
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len,
- unsigned char addr_type)
-{
- struct netdev_hw_addr *ha;
- unsigned char type;
-
- list_for_each_entry(ha, &from_list->list, list) {
- type = addr_type ? addr_type : ha->type;
- __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
- }
-}
-
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len)
-{
- int err = 0;
- struct netdev_hw_addr *ha, *tmp;
-
- list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
- if (!ha->synced) {
- err = __hw_addr_add(to_list, ha->addr,
- addr_len, ha->type);
- if (err)
- break;
- ha->synced = true;
- ha->refcount++;
- } else if (ha->refcount == 1) {
- __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
- __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
- }
- }
- return err;
-}
-
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len)
-{
- struct netdev_hw_addr *ha, *tmp;
-
- list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
- if (ha->synced) {
- __hw_addr_del(to_list, ha->addr,
- addr_len, ha->type);
- ha->synced = false;
- __hw_addr_del(from_list, ha->addr,
- addr_len, ha->type);
- }
- }
-}
-
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
-{
- struct netdev_hw_addr *ha, *tmp;
-
- list_for_each_entry_safe(ha, tmp, &list->list, list) {
- list_del_rcu(&ha->list);
- call_rcu(&ha->rcu_head, ha_rcu_free);
- }
- list->count = 0;
-}
-
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
-{
- INIT_LIST_HEAD(&list->list);
- list->count = 0;
-}
-
-/* Device addresses handling functions */
-
-static void dev_addr_flush(struct net_device *dev)
-{
- /* rtnl_mutex must be held here */
-
- __hw_addr_flush(&dev->dev_addrs);
- dev->dev_addr = NULL;
-}
-
-static int dev_addr_init(struct net_device *dev)
-{
- unsigned char addr[MAX_ADDR_LEN];
- struct netdev_hw_addr *ha;
- int err;
-
- /* rtnl_mutex must be held here */
-
- __hw_addr_init(&dev->dev_addrs);
- memset(addr, 0, sizeof(addr));
- err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
- NETDEV_HW_ADDR_T_LAN);
- if (!err) {
- /*
- * Get the first (previously created) address from the list
- * and set dev_addr pointer to this location.
- */
- ha = list_first_entry(&dev->dev_addrs.list,
- struct netdev_hw_addr, list);
- dev->dev_addr = ha->addr;
- }
- return err;
-}
-
-/**
- * dev_addr_add - Add a device address
- * @dev: device
- * @addr: address to add
- * @addr_type: address type
- *
- * Add a device address to the device or increase the reference count if
- * it already exists.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
- unsigned char addr_type)
-{
- int err;
-
- ASSERT_RTNL();
-
- err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
- if (!err)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
- return err;
-}
-EXPORT_SYMBOL(dev_addr_add);
-
-/**
- * dev_addr_del - Release a device address.
- * @dev: device
- * @addr: address to delete
- * @addr_type: address type
- *
- * Release reference to a device address and remove it from the device
- * if the reference count drops to zero.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
- unsigned char addr_type)
-{
- int err;
- struct netdev_hw_addr *ha;
-
- ASSERT_RTNL();
-
- /*
- * We can not remove the first address from the list because
- * dev->dev_addr points to that.
- */
- ha = list_first_entry(&dev->dev_addrs.list,
- struct netdev_hw_addr, list);
- if (ha->addr == dev->dev_addr && ha->refcount == 1)
- return -ENOENT;
-
- err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
- addr_type);
- if (!err)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
- return err;
-}
-EXPORT_SYMBOL(dev_addr_del);
-
-/**
- * dev_addr_add_multiple - Add device addresses from another device
- * @to_dev: device to which addresses will be added
- * @from_dev: device from which addresses will be added
- * @addr_type: address type - 0 means type will be used from from_dev
- *
- * Add device addresses of the one device to another.
- **
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_add_multiple(struct net_device *to_dev,
- struct net_device *from_dev,
- unsigned char addr_type)
-{
- int err;
-
- ASSERT_RTNL();
-
- if (from_dev->addr_len != to_dev->addr_len)
- return -EINVAL;
- err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
- to_dev->addr_len, addr_type);
- if (!err)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
- return err;
-}
-EXPORT_SYMBOL(dev_addr_add_multiple);
-
-/**
- * dev_addr_del_multiple - Delete device addresses by another device
- * @to_dev: device where the addresses will be deleted
- * @from_dev: device by which addresses the addresses will be deleted
- * @addr_type: address type - 0 means type will used from from_dev
- *
- * Deletes addresses in to device by the list of addresses in from device.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_del_multiple(struct net_device *to_dev,
- struct net_device *from_dev,
- unsigned char addr_type)
-{
- ASSERT_RTNL();
-
- if (from_dev->addr_len != to_dev->addr_len)
- return -EINVAL;
- __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
- to_dev->addr_len, addr_type);
- call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
- return 0;
-}
-EXPORT_SYMBOL(dev_addr_del_multiple);
-
-/* multicast addresses handling functions */
-
-int __dev_addr_delete(struct dev_addr_list **list, int *count,
- void *addr, int alen, int glbl)
-{
- struct dev_addr_list *da;
-
- for (; (da = *list) != NULL; list = &da->next) {
- if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
- alen == da->da_addrlen) {
- if (glbl) {
- int old_glbl = da->da_gusers;
- da->da_gusers = 0;
- if (old_glbl == 0)
- break;
- }
- if (--da->da_users)
- return 0;
-
- *list = da->next;
- kfree(da);
- (*count)--;
- return 0;
- }
- }
- return -ENOENT;
-}
-
-int __dev_addr_add(struct dev_addr_list **list, int *count,
- void *addr, int alen, int glbl)
-{
- struct dev_addr_list *da;
-
- for (da = *list; da != NULL; da = da->next) {
- if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
- da->da_addrlen == alen) {
- if (glbl) {
- int old_glbl = da->da_gusers;
- da->da_gusers = 1;
- if (old_glbl)
- return 0;
- }
- da->da_users++;
- return 0;
- }
- }
-
- da = kzalloc(sizeof(*da), GFP_ATOMIC);
- if (da == NULL)
- return -ENOMEM;
- memcpy(da->da_addr, addr, alen);
- da->da_addrlen = alen;
- da->da_users = 1;
- da->da_gusers = glbl ? 1 : 0;
- da->next = *list;
- *list = da;
- (*count)++;
- return 0;
-}
-
-/**
- * dev_unicast_delete - Release secondary unicast address.
- * @dev: device
- * @addr: address to delete
- *
- * Release reference to a secondary unicast address and remove it
- * from the device if the reference count drops to zero.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_unicast_delete(struct net_device *dev, void *addr)
-{
- int err;
-
- ASSERT_RTNL();
-
- netif_addr_lock_bh(dev);
- err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_UNICAST);
- if (!err)
- __dev_set_rx_mode(dev);
- netif_addr_unlock_bh(dev);
- return err;
-}
-EXPORT_SYMBOL(dev_unicast_delete);
-
-/**
- * dev_unicast_add - add a secondary unicast address
- * @dev: device
- * @addr: address to add
- *
- * Add a secondary unicast address to the device or increase
- * the reference count if it already exists.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_unicast_add(struct net_device *dev, void *addr)
-{
- int err;
-
- ASSERT_RTNL();
-
- netif_addr_lock_bh(dev);
- err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_UNICAST);
- if (!err)
- __dev_set_rx_mode(dev);
- netif_addr_unlock_bh(dev);
- return err;
-}
-EXPORT_SYMBOL(dev_unicast_add);
-
-int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
- struct dev_addr_list **from, int *from_count)
-{
- struct dev_addr_list *da, *next;
- int err = 0;
-
- da = *from;
- while (da != NULL) {
- next = da->next;
- if (!da->da_synced) {
- err = __dev_addr_add(to, to_count,
- da->da_addr, da->da_addrlen, 0);
- if (err < 0)
- break;
- da->da_synced = 1;
- da->da_users++;
- } else if (da->da_users == 1) {
- __dev_addr_delete(to, to_count,
- da->da_addr, da->da_addrlen, 0);
- __dev_addr_delete(from, from_count,
- da->da_addr, da->da_addrlen, 0);
- }
- da = next;
- }
- return err;
-}
-EXPORT_SYMBOL_GPL(__dev_addr_sync);
-
-void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
- struct dev_addr_list **from, int *from_count)
-{
- struct dev_addr_list *da, *next;
-
- da = *from;
- while (da != NULL) {
- next = da->next;
- if (da->da_synced) {
- __dev_addr_delete(to, to_count,
- da->da_addr, da->da_addrlen, 0);
- da->da_synced = 0;
- __dev_addr_delete(from, from_count,
- da->da_addr, da->da_addrlen, 0);
- }
- da = next;
- }
-}
-EXPORT_SYMBOL_GPL(__dev_addr_unsync);
-
-/**
- * dev_unicast_sync - Synchronize device's unicast list to another device
- * @to: destination device
- * @from: source device
- *
- * Add newly added addresses to the destination device and release
- * addresses that have no users left. The source device must be
- * locked by netif_tx_lock_bh.
- *
- * This function is intended to be called from the dev->set_rx_mode
- * function of layered software devices.
- */
-int dev_unicast_sync(struct net_device *to, struct net_device *from)
-{
- int err = 0;
-
- if (to->addr_len != from->addr_len)
- return -EINVAL;
-
- netif_addr_lock_bh(to);
- err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
- if (!err)
- __dev_set_rx_mode(to);
- netif_addr_unlock_bh(to);
- return err;
-}
-EXPORT_SYMBOL(dev_unicast_sync);
-
-/**
- * dev_unicast_unsync - Remove synchronized addresses from the destination device
- * @to: destination device
- * @from: source device
- *
- * Remove all addresses that were added to the destination device by
- * dev_unicast_sync(). This function is intended to be called from the
- * dev->stop function of layered software devices.
- */
-void dev_unicast_unsync(struct net_device *to, struct net_device *from)
-{
- if (to->addr_len != from->addr_len)
- return;
-
- netif_addr_lock_bh(from);
- netif_addr_lock(to);
- __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
- __dev_set_rx_mode(to);
- netif_addr_unlock(to);
- netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_unicast_unsync);
-
-static void dev_unicast_flush(struct net_device *dev)
-{
- netif_addr_lock_bh(dev);
- __hw_addr_flush(&dev->uc);
- netif_addr_unlock_bh(dev);
-}
-
-static void dev_unicast_init(struct net_device *dev)
-{
- __hw_addr_init(&dev->uc);
-}
-
-
-static void __dev_addr_discard(struct dev_addr_list **list)
-{
- struct dev_addr_list *tmp;
-
- while (*list != NULL) {
- tmp = *list;
- *list = tmp->next;
- if (tmp->da_users > tmp->da_gusers)
- printk("__dev_addr_discard: address leakage! "
- "da_users=%d\n", tmp->da_users);
- kfree(tmp);
- }
-}
-
-static void dev_addr_discard(struct net_device *dev)
-{
- netif_addr_lock_bh(dev);
-
- __dev_addr_discard(&dev->mc_list);
- netdev_mc_count(dev) = 0;
-
- netif_addr_unlock_bh(dev);
-}
-
/**
* dev_get_flags - get flags reported to userspace
* @dev: device
@@ -4780,8 +4409,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
- return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
- dev->addr_len, 1);
+ return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
case SIOCDELMULTI:
if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
@@ -4789,8 +4417,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
- return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
- dev->addr_len, 1);
+ return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen < 0)
@@ -5097,8 +4724,8 @@ static void rollback_registered_many(struct list_head *head)
/*
* Flush the unicast and multicast chains
*/
- dev_unicast_flush(dev);
- dev_addr_discard(dev);
+ dev_uc_flush(dev);
+ dev_mc_flush(dev);
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
@@ -5247,6 +4874,7 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
+#ifdef CONFIG_RPS
if (!dev->num_rx_queues) {
/*
* Allocate a single RX queue if driver never called
@@ -5263,7 +4891,7 @@ int register_netdevice(struct net_device *dev)
atomic_set(&dev->_rx->count, 1);
dev->num_rx_queues = 1;
}
-
+#endif
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@ -5621,11 +5249,13 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
{
struct netdev_queue *tx;
- struct netdev_rx_queue *rx;
struct net_device *dev;
size_t alloc_size;
struct net_device *p;
+#ifdef CONFIG_RPS
+ struct netdev_rx_queue *rx;
int i;
+#endif
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -5651,6 +5281,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
goto free_p;
}
+#ifdef CONFIG_RPS
rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
if (!rx) {
printk(KERN_ERR "alloc_netdev: Unable to allocate "
@@ -5666,6 +5297,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
*/
for (i = 0; i < queue_count; i++)
rx[i].first = rx;
+#endif
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
@@ -5673,7 +5305,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
if (dev_addr_init(dev))
goto free_rx;
- dev_unicast_init(dev);
+ dev_mc_init(dev);
+ dev_uc_init(dev);
dev_net_set(dev, &init_net);
@@ -5681,8 +5314,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
+#ifdef CONFIG_RPS
dev->_rx = rx;
dev->num_rx_queues = queue_count;
+#endif
dev->gso_max_size = GSO_MAX_SIZE;
@@ -5699,8 +5334,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
return dev;
free_rx:
+#ifdef CONFIG_RPS
kfree(rx);
free_tx:
+#endif
kfree(tx);
free_p:
kfree(p);
@@ -5903,8 +5540,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/*
* Flush the unicast and multicast chains
*/
- dev_unicast_flush(dev);
- dev_addr_discard(dev);
+ dev_uc_flush(dev);
+ dev_mc_flush(dev);
netdev_unregister_kobject(dev);
@@ -5980,8 +5617,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
local_irq_enable();
/* Process offline CPU's input_pkt_queue */
- while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
+ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
netif_rx(skb);
+ input_queue_head_incr(oldsd);
+ }
return NOTIFY_OK;
}
@@ -6197,21 +5836,23 @@ static int __init net_dev_init(void)
*/
for_each_possible_cpu(i) {
- struct softnet_data *queue;
+ struct softnet_data *sd = &per_cpu(softnet_data, i);
- queue = &per_cpu(softnet_data, i);
- skb_queue_head_init(&queue->input_pkt_queue);
- queue->completion_queue = NULL;
- INIT_LIST_HEAD(&queue->poll_list);
+ skb_queue_head_init(&sd->input_pkt_queue);
+ sd->completion_queue = NULL;
+ INIT_LIST_HEAD(&sd->poll_list);
- queue->csd.func = trigger_softirq;
- queue->csd.info = queue;
- queue->csd.flags = 0;
+#ifdef CONFIG_RPS
+ sd->csd.func = rps_trigger_softirq;
+ sd->csd.info = sd;
+ sd->csd.flags = 0;
+ sd->cpu = i;
+#endif
- queue->backlog.poll = process_backlog;
- queue->backlog.weight = weight_p;
- queue->backlog.gro_list = NULL;
- queue->backlog.gro_count = 0;
+ sd->backlog.poll = process_backlog;
+ sd->backlog.weight = weight_p;
+ sd->backlog.gro_list = NULL;
+ sd->backlog.gro_count = 0;
}
dev_boot_phase = 0;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
new file mode 100644
index 000000000000..508f9c18992f
--- /dev/null
+++ b/net/core/dev_addr_lists.c
@@ -0,0 +1,741 @@
+/*
+ * net/core/dev_addr_lists.c - Functions for handling net device lists
+ * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This file contains functions for working with unicast, multicast and device
+ * addresses lists.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+
+/*
+ * General list handling functions
+ */
+
+static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
+ unsigned char *addr, int addr_len,
+ unsigned char addr_type, bool global)
+{
+ struct netdev_hw_addr *ha;
+ int alloc_size;
+
+ if (addr_len > MAX_ADDR_LEN)
+ return -EINVAL;
+
+ list_for_each_entry(ha, &list->list, list) {
+ if (!memcmp(ha->addr, addr, addr_len) &&
+ ha->type == addr_type) {
+ if (global) {
+ /* check if addr is already used as global */
+ if (ha->global_use)
+ return 0;
+ else
+ ha->global_use = true;
+ }
+ ha->refcount++;
+ return 0;
+ }
+ }
+
+
+ alloc_size = sizeof(*ha);
+ if (alloc_size < L1_CACHE_BYTES)
+ alloc_size = L1_CACHE_BYTES;
+ ha = kmalloc(alloc_size, GFP_ATOMIC);
+ if (!ha)
+ return -ENOMEM;
+ memcpy(ha->addr, addr, addr_len);
+ ha->type = addr_type;
+ ha->refcount = 1;
+ ha->global_use = global;
+ ha->synced = false;
+ list_add_tail_rcu(&ha->list, &list->list);
+ list->count++;
+ return 0;
+}
+
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+ int addr_len, unsigned char addr_type)
+{
+ return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+}
+
+static void ha_rcu_free(struct rcu_head *head)
+{
+ struct netdev_hw_addr *ha;
+
+ ha = container_of(head, struct netdev_hw_addr, rcu_head);
+ kfree(ha);
+}
+
+static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
+ unsigned char *addr, int addr_len,
+ unsigned char addr_type, bool global)
+{
+ struct netdev_hw_addr *ha;
+
+ list_for_each_entry(ha, &list->list, list) {
+ if (!memcmp(ha->addr, addr, addr_len) &&
+ (ha->type == addr_type || !addr_type)) {
+ if (global) {
+ if (!ha->global_use)
+ break;
+ else
+ ha->global_use = false;
+ }
+ if (--ha->refcount)
+ return 0;
+ list_del_rcu(&ha->list);
+ call_rcu(&ha->rcu_head, ha_rcu_free);
+ list->count--;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+ int addr_len, unsigned char addr_type)
+{
+ return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+}
+
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ int addr_len, unsigned char addr_type)
+{
+ int err;
+ struct netdev_hw_addr *ha, *ha2;
+ unsigned char type;
+
+ list_for_each_entry(ha, &from_list->list, list) {
+ type = addr_type ? addr_type : ha->type;
+ err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+ if (err)
+ goto unroll;
+ }
+ return 0;
+
+unroll:
+ list_for_each_entry(ha2, &from_list->list, list) {
+ if (ha2 == ha)
+ break;
+ type = addr_type ? addr_type : ha2->type;
+ __hw_addr_del(to_list, ha2->addr, addr_len, type);
+ }
+ return err;
+}
+EXPORT_SYMBOL(__hw_addr_add_multiple);
+
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ int addr_len, unsigned char addr_type)
+{
+ struct netdev_hw_addr *ha;
+ unsigned char type;
+
+ list_for_each_entry(ha, &from_list->list, list) {
+ type = addr_type ? addr_type : ha->type;
+ __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+ }
+}
+EXPORT_SYMBOL(__hw_addr_del_multiple);
+
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ int addr_len)
+{
+ int err = 0;
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+ if (!ha->synced) {
+ err = __hw_addr_add(to_list, ha->addr,
+ addr_len, ha->type);
+ if (err)
+ break;
+ ha->synced = true;
+ ha->refcount++;
+ } else if (ha->refcount == 1) {
+ __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
+ __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
+ }
+ }
+ return err;
+}
+EXPORT_SYMBOL(__hw_addr_sync);
+
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ int addr_len)
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+ if (ha->synced) {
+ __hw_addr_del(to_list, ha->addr,
+ addr_len, ha->type);
+ ha->synced = false;
+ __hw_addr_del(from_list, ha->addr,
+ addr_len, ha->type);
+ }
+ }
+}
+EXPORT_SYMBOL(__hw_addr_unsync);
+
+void __hw_addr_flush(struct netdev_hw_addr_list *list)
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ list_del_rcu(&ha->list);
+ call_rcu(&ha->rcu_head, ha_rcu_free);
+ }
+ list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_flush);
+
+void __hw_addr_init(struct netdev_hw_addr_list *list)
+{
+ INIT_LIST_HEAD(&list->list);
+ list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_init);
+
+/*
+ * Device addresses handling functions
+ */
+
+/**
+ * dev_addr_flush - Flush device address list
+ * @dev: device
+ *
+ * Flush device address list and reset ->dev_addr.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+void dev_addr_flush(struct net_device *dev)
+{
+ /* rtnl_mutex must be held here */
+
+ __hw_addr_flush(&dev->dev_addrs);
+ dev->dev_addr = NULL;
+}
+EXPORT_SYMBOL(dev_addr_flush);
+
+/**
+ * dev_addr_init - Init device address list
+ * @dev: device
+ *
+ * Init device address list and create the first element,
+ * used by ->dev_addr.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_init(struct net_device *dev)
+{
+ unsigned char addr[MAX_ADDR_LEN];
+ struct netdev_hw_addr *ha;
+ int err;
+
+ /* rtnl_mutex must be held here */
+
+ __hw_addr_init(&dev->dev_addrs);
+ memset(addr, 0, sizeof(addr));
+ err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
+ NETDEV_HW_ADDR_T_LAN);
+ if (!err) {
+ /*
+ * Get the first (previously created) address from the list
+ * and set dev_addr pointer to this location.
+ */
+ ha = list_first_entry(&dev->dev_addrs.list,
+ struct netdev_hw_addr, list);
+ dev->dev_addr = ha->addr;
+ }
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_init);
+
+/**
+ * dev_addr_add - Add a device address
+ * @dev: device
+ * @addr: address to add
+ * @addr_type: address type
+ *
+ * Add a device address to the device or increase the reference count if
+ * it already exists.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+ unsigned char addr_type)
+{
+ int err;
+
+ ASSERT_RTNL();
+
+ err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ * dev_addr_del - Release a device address.
+ * @dev: device
+ * @addr: address to delete
+ * @addr_type: address type
+ *
+ * Release reference to a device address and remove it from the device
+ * if the reference count drops to zero.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+ unsigned char addr_type)
+{
+ int err;
+ struct netdev_hw_addr *ha;
+
+ ASSERT_RTNL();
+
+ /*
+ * We can not remove the first address from the list because
+ * dev->dev_addr points to that.
+ */
+ ha = list_first_entry(&dev->dev_addrs.list,
+ struct netdev_hw_addr, list);
+ if (ha->addr == dev->dev_addr && ha->refcount == 1)
+ return -ENOENT;
+
+ err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
+ addr_type);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ * dev_addr_add_multiple - Add device addresses from another device
+ * @to_dev: device to which addresses will be added
+ * @from_dev: device from which addresses will be added
+ * @addr_type: address type - 0 means type will be used from from_dev
+ *
+ * Add device addresses of the one device to another.
+ **
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+ struct net_device *from_dev,
+ unsigned char addr_type)
+{
+ int err;
+
+ ASSERT_RTNL();
+
+ if (from_dev->addr_len != to_dev->addr_len)
+ return -EINVAL;
+ err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+ to_dev->addr_len, addr_type);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ * dev_addr_del_multiple - Delete device addresses by another device
+ * @to_dev: device where the addresses will be deleted
+ * @from_dev: device by which addresses the addresses will be deleted
+ * @addr_type: address type - 0 means type will used from from_dev
+ *
+ * Deletes addresses in to device by the list of addresses in from device.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+ struct net_device *from_dev,
+ unsigned char addr_type)
+{
+ ASSERT_RTNL();
+
+ if (from_dev->addr_len != to_dev->addr_len)
+ return -EINVAL;
+ __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+ to_dev->addr_len, addr_type);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+ return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/*
+ * Unicast list handling functions
+ */
+
+/**
+ * dev_uc_add - Add a secondary unicast address
+ * @dev: device
+ * @addr: address to add
+ *
+ * Add a secondary unicast address to the device or increase
+ * the reference count if it already exists.
+ */
+int dev_uc_add(struct net_device *dev, unsigned char *addr)
+{
+ int err;
+
+ netif_addr_lock_bh(dev);
+ err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_UNICAST);
+ if (!err)
+ __dev_set_rx_mode(dev);
+ netif_addr_unlock_bh(dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_uc_add);
+
+/**
+ * dev_uc_del - Release secondary unicast address.
+ * @dev: device
+ * @addr: address to delete
+ *
+ * Release reference to a secondary unicast address and remove it
+ * from the device if the reference count drops to zero.
+ */
+int dev_uc_del(struct net_device *dev, unsigned char *addr)
+{
+ int err;
+
+ netif_addr_lock_bh(dev);
+ err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_UNICAST);
+ if (!err)
+ __dev_set_rx_mode(dev);
+ netif_addr_unlock_bh(dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_uc_del);
+
+/**
+ * dev_uc_sync - Synchronize device's unicast list to another device
+ * @to: destination device
+ * @from: source device
+ *
+ * Add newly added addresses to the destination device and release
+ * addresses that have no users left. The source device must be
+ * locked by netif_tx_lock_bh.
+ *
+ * This function is intended to be called from the dev->set_rx_mode
+ * function of layered software devices.
+ */
+int dev_uc_sync(struct net_device *to, struct net_device *from)
+{
+ int err = 0;
+
+ if (to->addr_len != from->addr_len)
+ return -EINVAL;
+
+ netif_addr_lock_bh(to);
+ err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
+ if (!err)
+ __dev_set_rx_mode(to);
+ netif_addr_unlock_bh(to);
+ return err;
+}
+EXPORT_SYMBOL(dev_uc_sync);
+
+/**
+ * dev_uc_unsync - Remove synchronized addresses from the destination device
+ * @to: destination device
+ * @from: source device
+ *
+ * Remove all addresses that were added to the destination device by
+ * dev_uc_sync(). This function is intended to be called from the
+ * dev->stop function of layered software devices.
+ */
+void dev_uc_unsync(struct net_device *to, struct net_device *from)
+{
+ if (to->addr_len != from->addr_len)
+ return;
+
+ netif_addr_lock_bh(from);
+ netif_addr_lock(to);
+ __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
+ __dev_set_rx_mode(to);
+ netif_addr_unlock(to);
+ netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_uc_unsync);
+
+/**
+ * dev_uc_flush - Flush unicast addresses
+ * @dev: device
+ *
+ * Flush unicast addresses.
+ */
+void dev_uc_flush(struct net_device *dev)
+{
+ netif_addr_lock_bh(dev);
+ __hw_addr_flush(&dev->uc);
+ netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_uc_flush);
+
+/**
+ * dev_uc_flush - Init unicast address list
+ * @dev: device
+ *
+ * Init unicast address list.
+ */
+void dev_uc_init(struct net_device *dev)
+{
+ __hw_addr_init(&dev->uc);
+}
+EXPORT_SYMBOL(dev_uc_init);
+
+/*
+ * Multicast list handling functions
+ */
+
+static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+ bool global)
+{
+ int err;
+
+ netif_addr_lock_bh(dev);
+ err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_MULTICAST, global);
+ if (!err)
+ __dev_set_rx_mode(dev);
+ netif_addr_unlock_bh(dev);
+ return err;
+}
+/**
+ * dev_mc_add - Add a multicast address
+ * @dev: device
+ * @addr: address to add
+ *
+ * Add a multicast address to the device or increase
+ * the reference count if it already exists.
+ */
+int dev_mc_add(struct net_device *dev, unsigned char *addr)
+{
+ return __dev_mc_add(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_add);
+
+/**
+ * dev_mc_add_global - Add a global multicast address
+ * @dev: device
+ * @addr: address to add
+ *
+ * Add a global multicast address to the device.
+ */
+int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+{
+ return __dev_mc_add(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_add_global);
+
+static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+ bool global)
+{
+ int err;
+
+ netif_addr_lock_bh(dev);
+ err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_MULTICAST, global);
+ if (!err)
+ __dev_set_rx_mode(dev);
+ netif_addr_unlock_bh(dev);
+ return err;
+}
+
+/**
+ * dev_mc_del - Delete a multicast address.
+ * @dev: device
+ * @addr: address to delete
+ *
+ * Release reference to a multicast address and remove it
+ * from the device if the reference count drops to zero.
+ */
+int dev_mc_del(struct net_device *dev, unsigned char *addr)
+{
+ return __dev_mc_del(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_del);
+
+/**
+ * dev_mc_del_global - Delete a global multicast address.
+ * @dev: device
+ * @addr: address to delete
+ *
+ * Release reference to a multicast address and remove it
+ * from the device if the reference count drops to zero.
+ */
+int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+{
+ return __dev_mc_del(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_del_global);
+
+/**
+ * dev_mc_sync - Synchronize device's unicast list to another device
+ * @to: destination device
+ * @from: source device
+ *
+ * Add newly added addresses to the destination device and release
+ * addresses that have no users left. The source device must be
+ * locked by netif_tx_lock_bh.
+ *
+ * This function is intended to be called from the dev->set_multicast_list
+ * or dev->set_rx_mode function of layered software devices.
+ */
+int dev_mc_sync(struct net_device *to, struct net_device *from)
+{
+ int err = 0;
+
+ if (to->addr_len != from->addr_len)
+ return -EINVAL;
+
+ netif_addr_lock_bh(to);
+ err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+ if (!err)
+ __dev_set_rx_mode(to);
+ netif_addr_unlock_bh(to);
+ return err;
+}
+EXPORT_SYMBOL(dev_mc_sync);
+
+/**
+ * dev_mc_unsync - Remove synchronized addresses from the destination device
+ * @to: destination device
+ * @from: source device
+ *
+ * Remove all addresses that were added to the destination device by
+ * dev_mc_sync(). This function is intended to be called from the
+ * dev->stop function of layered software devices.
+ */
+void dev_mc_unsync(struct net_device *to, struct net_device *from)
+{
+ if (to->addr_len != from->addr_len)
+ return;
+
+ netif_addr_lock_bh(from);
+ netif_addr_lock(to);
+ __hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
+ __dev_set_rx_mode(to);
+ netif_addr_unlock(to);
+ netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_mc_unsync);
+
+/**
+ * dev_mc_flush - Flush multicast addresses
+ * @dev: device
+ *
+ * Flush multicast addresses.
+ */
+void dev_mc_flush(struct net_device *dev)
+{
+ netif_addr_lock_bh(dev);
+ __hw_addr_flush(&dev->mc);
+ netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_mc_flush);
+
+/**
+ * dev_mc_flush - Init multicast address list
+ * @dev: device
+ *
+ * Init multicast address list.
+ */
+void dev_mc_init(struct net_device *dev)
+{
+ __hw_addr_init(&dev->mc);
+}
+EXPORT_SYMBOL(dev_mc_init);
+
+#ifdef CONFIG_PROC_FS
+#include <linux/seq_file.h>
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+ struct netdev_hw_addr *ha;
+ struct net_device *dev = v;
+
+ if (v == SEQ_START_TOKEN)
+ return 0;
+
+ netif_addr_lock_bh(dev);
+ netdev_for_each_mc_addr(ha, dev) {
+ int i;
+
+ seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+ dev->name, ha->refcount, ha->global_use);
+
+ for (i = 0; i < dev->addr_len; i++)
+ seq_printf(seq, "%02x", ha->addr[i]);
+
+ seq_putc(seq, '\n');
+ }
+ netif_addr_unlock_bh(dev);
+ return 0;
+}
+
+static const struct seq_operations dev_mc_seq_ops = {
+ .start = dev_seq_start,
+ .next = dev_seq_next,
+ .stop = dev_seq_stop,
+ .show = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &dev_mc_seq_ops,
+ sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_mc_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = dev_mc_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+#endif
+
+static int __net_init dev_mc_net_init(struct net *net)
+{
+ if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
+ return -ENOMEM;
+ return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+ proc_net_remove(net, "dev_mcast");
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+ .init = dev_mc_net_init,
+ .exit = dev_mc_net_exit,
+};
+
+void __init dev_mcast_init(void)
+{
+ register_pernet_subsys(&dev_mc_net_ops);
+}
+
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
deleted file mode 100644
index 3dc295beb483..000000000000
--- a/net/core/dev_mcast.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Linux NET3: Multicast List maintenance.
- *
- * Authors:
- * Tim Kordas <tjk@nostromo.eeap.cwru.edu>
- * Richard Underwood <richard@wuzz.demon.co.uk>
- *
- * Stir fried together from the IP multicast and CAP patches above
- * Alan Cox <alan@lxorguk.ukuu.org.uk>
- *
- * Fixes:
- * Alan Cox : Update the device on a real delete
- * rather than any time but...
- * Alan Cox : IFF_ALLMULTI support.
- * Alan Cox : New format set_multicast_list() calls.
- * Gleb Natapov : Remove dev_mc_lock.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/arp.h>
-
-
-/*
- * Device multicast list maintenance.
- *
- * This is used both by IP and by the user level maintenance functions.
- * Unlike BSD we maintain a usage count on a given multicast address so
- * that a casual user application can add/delete multicasts used by
- * protocols without doing damage to the protocols when it deletes the
- * entries. It also helps IP as it tracks overlapping maps.
- *
- * Device mc lists are changed by bh at least if IPv6 is enabled,
- * so that it must be bh protected.
- *
- * We block accesses to device mc filters with netif_tx_lock.
- */
-
-/*
- * Delete a device level multicast
- */
-
-int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
-{
- int err;
-
- netif_addr_lock_bh(dev);
- err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
- addr, alen, glbl);
- if (!err) {
- /*
- * We have altered the list, so the card
- * loaded filter is now wrong. Fix it
- */
-
- __dev_set_rx_mode(dev);
- }
- netif_addr_unlock_bh(dev);
- return err;
-}
-
-/*
- * Add a device level multicast
- */
-
-int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
-{
- int err;
-
- netif_addr_lock_bh(dev);
- if (alen != dev->addr_len)
- err = -EINVAL;
- else
- err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
- if (!err)
- __dev_set_rx_mode(dev);
- netif_addr_unlock_bh(dev);
- return err;
-}
-
-/**
- * dev_mc_sync - Synchronize device's multicast list to another device
- * @to: destination device
- * @from: source device
- *
- * Add newly added addresses to the destination device and release
- * addresses that have no users left. The source device must be
- * locked by netif_tx_lock_bh.
- *
- * This function is intended to be called from the dev->set_multicast_list
- * or dev->set_rx_mode function of layered software devices.
- */
-int dev_mc_sync(struct net_device *to, struct net_device *from)
-{
- int err = 0;
-
- netif_addr_lock_bh(to);
- err = __dev_addr_sync(&to->mc_list, &to->mc_count,
- &from->mc_list, &from->mc_count);
- if (!err)
- __dev_set_rx_mode(to);
- netif_addr_unlock_bh(to);
-
- return err;
-}
-EXPORT_SYMBOL(dev_mc_sync);
-
-
-/**
- * dev_mc_unsync - Remove synchronized addresses from the destination
- * device
- * @to: destination device
- * @from: source device
- *
- * Remove all addresses that were added to the destination device by
- * dev_mc_sync(). This function is intended to be called from the
- * dev->stop function of layered software devices.
- */
-void dev_mc_unsync(struct net_device *to, struct net_device *from)
-{
- netif_addr_lock_bh(from);
- netif_addr_lock(to);
-
- __dev_addr_unsync(&to->mc_list, &to->mc_count,
- &from->mc_list, &from->mc_count);
- __dev_set_rx_mode(to);
-
- netif_addr_unlock(to);
- netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_mc_unsync);
-
-#ifdef CONFIG_PROC_FS
-static int dev_mc_seq_show(struct seq_file *seq, void *v)
-{
- struct dev_addr_list *m;
- struct net_device *dev = v;
-
- if (v == SEQ_START_TOKEN)
- return 0;
-
- netif_addr_lock_bh(dev);
- for (m = dev->mc_list; m; m = m->next) {
- int i;
-
- seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
- dev->name, m->dmi_users, m->dmi_gusers);
-
- for (i = 0; i < m->dmi_addrlen; i++)
- seq_printf(seq, "%02x", m->dmi_addr[i]);
-
- seq_putc(seq, '\n');
- }
- netif_addr_unlock_bh(dev);
- return 0;
-}
-
-static const struct seq_operations dev_mc_seq_ops = {
- .start = dev_seq_start,
- .next = dev_seq_next,
- .stop = dev_seq_stop,
- .show = dev_mc_seq_show,
-};
-
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &dev_mc_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_mc_seq_fops = {
- .owner = THIS_MODULE,
- .open = dev_mc_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-#endif
-
-static int __net_init dev_mc_net_init(struct net *net)
-{
- if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
- return -ENOMEM;
- return 0;
-}
-
-static void __net_exit dev_mc_net_exit(struct net *net)
-{
- proc_net_remove(net, "dev_mcast");
-}
-
-static struct pernet_operations __net_initdata dev_mc_net_ops = {
- .init = dev_mc_net_init,
- .exit = dev_mc_net_exit,
-};
-
-void __init dev_mcast_init(void)
-{
- register_pernet_subsys(&dev_mc_net_ops);
-}
-
-EXPORT_SYMBOL(dev_mc_add);
-EXPORT_SYMBOL(dev_mc_delete);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index f8c874975350..cf208d8042b1 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -21,6 +21,7 @@
#include <linux/percpu.h>
#include <linux/timer.h>
#include <linux/bitops.h>
+#include <linux/slab.h>
#include <net/genetlink.h>
#include <net/netevent.h>
diff --git a/net/core/dst.c b/net/core/dst.c
index cb1b3488b739..9920722cc82b 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -12,6 +12,7 @@
#include <linux/workqueue.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/string.h>
@@ -43,7 +44,7 @@ static atomic_t dst_total = ATOMIC_INIT(0);
*/
static struct {
spinlock_t lock;
- struct dst_entry *list;
+ struct dst_entry *list;
unsigned long timer_inc;
unsigned long timer_expires;
} dst_garbage = {
@@ -51,7 +52,7 @@ static struct {
.timer_inc = DST_GC_MAX,
};
static void dst_gc_task(struct work_struct *work);
-static void ___dst_free(struct dst_entry * dst);
+static void ___dst_free(struct dst_entry *dst);
static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
@@ -135,8 +136,8 @@ loop:
}
expires = dst_garbage.timer_expires;
/*
- * if the next desired timer is more than 4 seconds in the future
- * then round the timer to whole seconds
+ * if the next desired timer is more than 4 seconds in the
+ * future then round the timer to whole seconds
*/
if (expires > 4*HZ)
expires = round_jiffies_relative(expires);
@@ -151,7 +152,8 @@ loop:
" expires: %lu elapsed: %lu us\n",
atomic_read(&dst_total), delayed, work_performed,
expires,
- elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC);
+ elapsed.tv_sec * USEC_PER_SEC +
+ elapsed.tv_nsec / NSEC_PER_USEC);
#endif
}
@@ -162,9 +164,9 @@ int dst_discard(struct sk_buff *skb)
}
EXPORT_SYMBOL(dst_discard);
-void * dst_alloc(struct dst_ops * ops)
+void *dst_alloc(struct dst_ops *ops)
{
- struct dst_entry * dst;
+ struct dst_entry *dst;
if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
if (ops->gc(ops))
@@ -184,19 +186,20 @@ void * dst_alloc(struct dst_ops * ops)
atomic_inc(&ops->entries);
return dst;
}
+EXPORT_SYMBOL(dst_alloc);
-static void ___dst_free(struct dst_entry * dst)
+static void ___dst_free(struct dst_entry *dst)
{
/* The first case (dev==NULL) is required, when
protocol module is unloaded.
*/
- if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
dst->input = dst->output = dst_discard;
- }
dst->obsolete = 2;
}
+EXPORT_SYMBOL(__dst_free);
-void __dst_free(struct dst_entry * dst)
+void __dst_free(struct dst_entry *dst)
{
spin_lock_bh(&dst_garbage.lock);
___dst_free(dst);
@@ -261,15 +264,16 @@ again:
}
return NULL;
}
+EXPORT_SYMBOL(dst_destroy);
void dst_release(struct dst_entry *dst)
{
if (dst) {
- int newrefcnt;
+ int newrefcnt;
smp_mb__before_atomic_dec();
- newrefcnt = atomic_dec_return(&dst->__refcnt);
- WARN_ON(newrefcnt < 0);
+ newrefcnt = atomic_dec_return(&dst->__refcnt);
+ WARN_ON(newrefcnt < 0);
}
}
EXPORT_SYMBOL(dst_release);
@@ -282,8 +286,8 @@ EXPORT_SYMBOL(dst_release);
*
* Commented and originally written by Alexey.
*/
-static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int unregister)
+static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+ int unregister)
{
if (dst->ops->ifdown)
dst->ops->ifdown(dst, dev, unregister);
@@ -305,7 +309,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
}
}
-static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+static int dst_dev_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
{
struct net_device *dev = ptr;
struct dst_entry *dst, *last = NULL;
@@ -328,9 +333,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
last->next = dst;
else
dst_busy_list = dst;
- for (; dst; dst = dst->next) {
+ for (; dst; dst = dst->next)
dst_ifdown(dst, dev, event != NETDEV_DOWN);
- }
mutex_unlock(&dst_gc_mutex);
break;
}
@@ -345,7 +349,3 @@ void __init dst_init(void)
{
register_netdevice_notifier(&dst_dev_notifier);
}
-
-EXPORT_SYMBOL(__dst_free);
-EXPORT_SYMBOL(dst_alloc);
-EXPORT_SYMBOL(dst_destroy);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f4cb6b6299d9..1a7db92037fa 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -18,7 +18,8 @@
#include <linux/ethtool.h>
#include <linux/netdevice.h>
#include <linux/bitops.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
/*
* Some useful ethtool_ops methods that're device independent.
@@ -30,6 +31,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
{
return netif_carrier_ok(dev) ? 1 : 0;
}
+EXPORT_SYMBOL(ethtool_op_get_link);
u32 ethtool_op_get_rx_csum(struct net_device *dev)
{
@@ -62,6 +64,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
{
@@ -72,11 +75,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
u32 ethtool_op_get_sg(struct net_device *dev)
{
return (dev->features & NETIF_F_SG) != 0;
}
+EXPORT_SYMBOL(ethtool_op_get_sg);
int ethtool_op_set_sg(struct net_device *dev, u32 data)
{
@@ -87,11 +92,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data)
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_sg);
u32 ethtool_op_get_tso(struct net_device *dev)
{
return (dev->features & NETIF_F_TSO) != 0;
}
+EXPORT_SYMBOL(ethtool_op_get_tso);
int ethtool_op_set_tso(struct net_device *dev, u32 data)
{
@@ -102,11 +109,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_tso);
u32 ethtool_op_get_ufo(struct net_device *dev)
{
return (dev->features & NETIF_F_UFO) != 0;
}
+EXPORT_SYMBOL(ethtool_op_get_ufo);
int ethtool_op_set_ufo(struct net_device *dev, u32 data)
{
@@ -116,12 +125,13 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
dev->features &= ~NETIF_F_UFO;
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_ufo);
/* the following list of flags are the same as their associated
* NETIF_F_xxx values in include/linux/netdevice.h
*/
static const u32 flags_dup_features =
- (ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
+ (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
u32 ethtool_op_get_flags(struct net_device *dev)
{
@@ -132,6 +142,7 @@ u32 ethtool_op_get_flags(struct net_device *dev)
return dev->features & flags_dup_features;
}
+EXPORT_SYMBOL(ethtool_op_get_flags);
int ethtool_op_set_flags(struct net_device *dev, u32 data)
{
@@ -152,9 +163,15 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
features &= ~NETIF_F_NTUPLE;
}
+ if (data & ETH_FLAG_RXHASH)
+ features |= NETIF_F_RXHASH;
+ else
+ features &= ~NETIF_F_RXHASH;
+
dev->features = features;
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_flags);
void ethtool_ntuple_flush(struct net_device *dev)
{
@@ -200,7 +217,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
return dev->ethtool_ops->set_settings(dev, &cmd);
}
-static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_drvinfo info;
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -240,7 +258,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void _
}
static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
- void __user *useraddr)
+ void __user *useraddr)
{
struct ethtool_sset_info info;
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -299,7 +317,8 @@ out:
return ret;
}
-static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_rxnfc cmd;
@@ -312,7 +331,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __u
return dev->ethtool_ops->set_rxnfc(dev, &cmd);
}
-static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_rxnfc info;
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -357,8 +377,8 @@ err_out:
}
static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
- struct ethtool_rx_ntuple_flow_spec *spec,
- struct ethtool_rx_ntuple_flow_spec_container *fsc)
+ struct ethtool_rx_ntuple_flow_spec *spec,
+ struct ethtool_rx_ntuple_flow_spec_container *fsc)
{
/* don't add filters forever */
@@ -384,7 +404,8 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
list->count++;
}
-static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_rx_ntuple cmd;
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -509,125 +530,125 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
case UDP_V4_FLOW:
case SCTP_V4_FLOW:
sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.ip4src);
+ fsc->fs.h_u.tcp_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.tcp_ip4_spec.ip4src);
+ fsc->fs.m_u.tcp_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.ip4dst);
+ fsc->fs.h_u.tcp_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.tcp_ip4_spec.ip4dst);
+ fsc->fs.m_u.tcp_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.psrc,
- fsc->fs.m_u.tcp_ip4_spec.psrc);
+ fsc->fs.h_u.tcp_ip4_spec.psrc,
+ fsc->fs.m_u.tcp_ip4_spec.psrc);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.pdst,
- fsc->fs.m_u.tcp_ip4_spec.pdst);
+ fsc->fs.h_u.tcp_ip4_spec.pdst,
+ fsc->fs.m_u.tcp_ip4_spec.pdst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.tos,
- fsc->fs.m_u.tcp_ip4_spec.tos);
+ fsc->fs.h_u.tcp_ip4_spec.tos,
+ fsc->fs.m_u.tcp_ip4_spec.tos);
p += ETH_GSTRING_LEN;
num_strings++;
break;
case AH_ESP_V4_FLOW:
case ESP_V4_FLOW:
sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.ip4src);
+ fsc->fs.h_u.ah_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.ah_ip4_spec.ip4src);
+ fsc->fs.m_u.ah_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.ip4dst);
+ fsc->fs.h_u.ah_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.ah_ip4_spec.ip4dst);
+ fsc->fs.m_u.ah_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSPI: %d, mask: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.spi,
- fsc->fs.m_u.ah_ip4_spec.spi);
+ fsc->fs.h_u.ah_ip4_spec.spi,
+ fsc->fs.m_u.ah_ip4_spec.spi);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.tos,
- fsc->fs.m_u.ah_ip4_spec.tos);
+ fsc->fs.h_u.ah_ip4_spec.tos,
+ fsc->fs.m_u.ah_ip4_spec.tos);
p += ETH_GSTRING_LEN;
num_strings++;
break;
case IP_USER_FLOW:
sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.raw_ip4_spec.ip4src);
+ fsc->fs.h_u.raw_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.raw_ip4_spec.ip4src);
+ fsc->fs.m_u.raw_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.raw_ip4_spec.ip4dst);
+ fsc->fs.h_u.raw_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.raw_ip4_spec.ip4dst);
+ fsc->fs.m_u.raw_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
break;
case IPV4_FLOW:
sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4src);
+ fsc->fs.h_u.usr_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4src);
+ fsc->fs.m_u.usr_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4dst);
+ fsc->fs.h_u.usr_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4dst);
+ fsc->fs.m_u.usr_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
- fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
+ fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
+ fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.tos,
- fsc->fs.m_u.usr_ip4_spec.tos);
+ fsc->fs.h_u.usr_ip4_spec.tos,
+ fsc->fs.m_u.usr_ip4_spec.tos);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip_ver,
- fsc->fs.m_u.usr_ip4_spec.ip_ver);
+ fsc->fs.h_u.usr_ip4_spec.ip_ver,
+ fsc->fs.m_u.usr_ip4_spec.ip_ver);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.proto,
- fsc->fs.m_u.usr_ip4_spec.proto);
+ fsc->fs.h_u.usr_ip4_spec.proto,
+ fsc->fs.m_u.usr_ip4_spec.proto);
p += ETH_GSTRING_LEN;
num_strings++;
break;
};
sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
- fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
+ fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
@@ -640,7 +661,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
sprintf(p, "\tAction: Drop\n");
else
sprintf(p, "\tAction: Direct to queue %d\n",
- fsc->fs.action);
+ fsc->fs.action);
p += ETH_GSTRING_LEN;
num_strings++;
unknown_filter:
@@ -852,7 +873,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
return ret;
}
-static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
@@ -866,7 +888,8 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void
return 0;
}
-static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_coalesce coalesce;
@@ -970,6 +993,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_tx_csum(dev, edata.data);
}
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
{
@@ -1041,7 +1065,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
edata.data = dev->features & NETIF_F_GSO;
if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
+ return -EFAULT;
return 0;
}
@@ -1064,7 +1088,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
edata.data = dev->features & NETIF_F_GRO;
if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
+ return -EFAULT;
return 0;
}
@@ -1276,7 +1300,8 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
return actor(dev, edata.data);
}
-static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
+ char __user *useraddr)
{
struct ethtool_flash efl;
@@ -1305,11 +1330,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
if (!dev->ethtool_ops)
return -EOPNOTSUPP;
- if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+ if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
return -EFAULT;
/* Allow some commands to be done by anyone */
- switch(ethcmd) {
+ switch (ethcmd) {
case ETHTOOL_GDRVINFO:
case ETHTOOL_GMSGLVL:
case ETHTOOL_GCOALESCE:
@@ -1337,10 +1362,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
return -EPERM;
}
- if (dev->ethtool_ops->begin)
- if ((rc = dev->ethtool_ops->begin(dev)) < 0)
+ if (dev->ethtool_ops->begin) {
+ rc = dev->ethtool_ops->begin(dev);
+ if (rc < 0)
return rc;
-
+ }
old_features = dev->features;
switch (ethcmd) {
@@ -1530,16 +1556,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
return rc;
}
-
-EXPORT_SYMBOL(ethtool_op_get_link);
-EXPORT_SYMBOL(ethtool_op_get_sg);
-EXPORT_SYMBOL(ethtool_op_get_tso);
-EXPORT_SYMBOL(ethtool_op_set_sg);
-EXPORT_SYMBOL(ethtool_op_set_tso);
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-EXPORT_SYMBOL(ethtool_op_set_flags);
-EXPORT_SYMBOL(ethtool_op_get_flags);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 2ff34894357a..1bc66592453c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -10,6 +10,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <linux/list.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -38,6 +39,24 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
}
EXPORT_SYMBOL(fib_default_rule_add);
+u32 fib_default_rule_pref(struct fib_rules_ops *ops)
+{
+ struct list_head *pos;
+ struct fib_rule *rule;
+
+ if (!list_empty(&ops->rules_list)) {
+ pos = ops->rules_list.next;
+ if (pos->next != &ops->rules_list) {
+ rule = list_entry(pos->next, struct fib_rule, list);
+ if (rule->pref)
+ return rule->pref - 1;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(fib_default_rule_pref);
+
static void notify_rule_change(int event, struct fib_rule *rule,
struct fib_rules_ops *ops, struct nlmsghdr *nlh,
u32 pid);
@@ -516,6 +535,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
return -EMSGSIZE;
frh = nlmsg_data(nlh);
+ frh->family = ops->family;
frh->table = rule->table;
NLA_PUT_U32(skb, FRA_TABLE, rule->table);
frh->res1 = 0;
diff --git a/net/core/filter.c b/net/core/filter.c
index d38ef7fd50f0..ff943bed21af 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -25,6 +25,7 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
+#include <linux/gfp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/netlink.h>
diff --git a/net/core/flow.c b/net/core/flow.c
index 96015871ecea..161900674009 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,113 +26,158 @@
#include <linux/security.h>
struct flow_cache_entry {
- struct flow_cache_entry *next;
- u16 family;
- u8 dir;
- u32 genid;
- struct flowi key;
- void *object;
- atomic_t *object_ref;
+ union {
+ struct hlist_node hlist;
+ struct list_head gc_list;
+ } u;
+ u16 family;
+ u8 dir;
+ u32 genid;
+ struct flowi key;
+ struct flow_cache_object *object;
};
-atomic_t flow_cache_genid = ATOMIC_INIT(0);
-
-static u32 flow_hash_shift;
-#define flow_hash_size (1 << flow_hash_shift)
-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
-
-#define flow_table(cpu) (per_cpu(flow_tables, cpu))
-
-static struct kmem_cache *flow_cachep __read_mostly;
+struct flow_cache_percpu {
+ struct hlist_head *hash_table;
+ int hash_count;
+ u32 hash_rnd;
+ int hash_rnd_recalc;
+ struct tasklet_struct flush_tasklet;
+};
-static int flow_lwm, flow_hwm;
+struct flow_flush_info {
+ struct flow_cache *cache;
+ atomic_t cpuleft;
+ struct completion completion;
+};
-struct flow_percpu_info {
- int hash_rnd_recalc;
- u32 hash_rnd;
- int count;
+struct flow_cache {
+ u32 hash_shift;
+ unsigned long order;
+ struct flow_cache_percpu *percpu;
+ struct notifier_block hotcpu_notifier;
+ int low_watermark;
+ int high_watermark;
+ struct timer_list rnd_timer;
};
-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
-#define flow_hash_rnd_recalc(cpu) \
- (per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
-#define flow_hash_rnd(cpu) \
- (per_cpu(flow_hash_info, cpu).hash_rnd)
-#define flow_count(cpu) \
- (per_cpu(flow_hash_info, cpu).count)
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
+static struct flow_cache flow_cache_global;
+static struct kmem_cache *flow_cachep;
-static struct timer_list flow_hash_rnd_timer;
+static DEFINE_SPINLOCK(flow_cache_gc_lock);
+static LIST_HEAD(flow_cache_gc_list);
-#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
-
-struct flow_flush_info {
- atomic_t cpuleft;
- struct completion completion;
-};
-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
-
-#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
+#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
+#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
static void flow_cache_new_hashrnd(unsigned long arg)
{
+ struct flow_cache *fc = (void *) arg;
int i;
for_each_possible_cpu(i)
- flow_hash_rnd_recalc(i) = 1;
+ per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
- flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
- add_timer(&flow_hash_rnd_timer);
+ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+ add_timer(&fc->rnd_timer);
+}
+
+static int flow_entry_valid(struct flow_cache_entry *fle)
+{
+ if (atomic_read(&flow_cache_genid) != fle->genid)
+ return 0;
+ if (fle->object && !fle->object->ops->check(fle->object))
+ return 0;
+ return 1;
}
-static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle)
{
if (fle->object)
- atomic_dec(fle->object_ref);
+ fle->object->ops->delete(fle->object);
kmem_cache_free(flow_cachep, fle);
- flow_count(cpu)--;
}
-static void __flow_cache_shrink(int cpu, int shrink_to)
+static void flow_cache_gc_task(struct work_struct *work)
{
- struct flow_cache_entry *fle, **flp;
- int i;
+ struct list_head gc_list;
+ struct flow_cache_entry *fce, *n;
- for (i = 0; i < flow_hash_size; i++) {
- int k = 0;
+ INIT_LIST_HEAD(&gc_list);
+ spin_lock_bh(&flow_cache_gc_lock);
+ list_splice_tail_init(&flow_cache_gc_list, &gc_list);
+ spin_unlock_bh(&flow_cache_gc_lock);
- flp = &flow_table(cpu)[i];
- while ((fle = *flp) != NULL && k < shrink_to) {
- k++;
- flp = &fle->next;
- }
- while ((fle = *flp) != NULL) {
- *flp = fle->next;
- flow_entry_kill(cpu, fle);
- }
+ list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
+ flow_entry_kill(fce);
+}
+static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
+
+static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
+ int deleted, struct list_head *gc_list)
+{
+ if (deleted) {
+ fcp->hash_count -= deleted;
+ spin_lock_bh(&flow_cache_gc_lock);
+ list_splice_tail(gc_list, &flow_cache_gc_list);
+ spin_unlock_bh(&flow_cache_gc_lock);
+ schedule_work(&flow_cache_gc_work);
}
}
-static void flow_cache_shrink(int cpu)
+static void __flow_cache_shrink(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp,
+ int shrink_to)
{
- int shrink_to = flow_lwm / flow_hash_size;
+ struct flow_cache_entry *fle;
+ struct hlist_node *entry, *tmp;
+ LIST_HEAD(gc_list);
+ int i, deleted = 0;
+
+ for (i = 0; i < flow_cache_hash_size(fc); i++) {
+ int saved = 0;
+
+ hlist_for_each_entry_safe(fle, entry, tmp,
+ &fcp->hash_table[i], u.hlist) {
+ if (saved < shrink_to &&
+ flow_entry_valid(fle)) {
+ saved++;
+ } else {
+ deleted++;
+ hlist_del(&fle->u.hlist);
+ list_add_tail(&fle->u.gc_list, &gc_list);
+ }
+ }
+ }
- __flow_cache_shrink(cpu, shrink_to);
+ flow_cache_queue_garbage(fcp, deleted, &gc_list);
}
-static void flow_new_hash_rnd(int cpu)
+static void flow_cache_shrink(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp)
{
- get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
- flow_hash_rnd_recalc(cpu) = 0;
+ int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
- __flow_cache_shrink(cpu, 0);
+ __flow_cache_shrink(fc, fcp, shrink_to);
}
-static u32 flow_hash_code(struct flowi *key, int cpu)
+static void flow_new_hash_rnd(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp)
+{
+ get_random_bytes(&fcp->hash_rnd, sizeof(u32));
+ fcp->hash_rnd_recalc = 0;
+ __flow_cache_shrink(fc, fcp, 0);
+}
+
+static u32 flow_hash_code(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp,
+ struct flowi *key)
{
u32 *k = (u32 *) key;
- return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
- (flow_hash_size - 1));
+ return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
+ & (flow_cache_hash_size(fc) - 1));
}
#if (BITS_PER_LONG == 64)
@@ -165,114 +210,117 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
return 0;
}
-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
- flow_resolve_t resolver)
+struct flow_cache_object *
+flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+ flow_resolve_t resolver, void *ctx)
{
- struct flow_cache_entry *fle, **head;
+ struct flow_cache *fc = &flow_cache_global;
+ struct flow_cache_percpu *fcp;
+ struct flow_cache_entry *fle, *tfle;
+ struct hlist_node *entry;
+ struct flow_cache_object *flo;
unsigned int hash;
- int cpu;
local_bh_disable();
- cpu = smp_processor_id();
+ fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
fle = NULL;
+ flo = NULL;
/* Packet really early in init? Making flow_cache_init a
* pre-smp initcall would solve this. --RR */
- if (!flow_table(cpu))
+ if (!fcp->hash_table)
goto nocache;
- if (flow_hash_rnd_recalc(cpu))
- flow_new_hash_rnd(cpu);
- hash = flow_hash_code(key, cpu);
+ if (fcp->hash_rnd_recalc)
+ flow_new_hash_rnd(fc, fcp);
- head = &flow_table(cpu)[hash];
- for (fle = *head; fle; fle = fle->next) {
- if (fle->family == family &&
- fle->dir == dir &&
- flow_key_compare(key, &fle->key) == 0) {
- if (fle->genid == atomic_read(&flow_cache_genid)) {
- void *ret = fle->object;
-
- if (ret)
- atomic_inc(fle->object_ref);
- local_bh_enable();
-
- return ret;
- }
+ hash = flow_hash_code(fc, fcp, key);
+ hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
+ if (tfle->family == family &&
+ tfle->dir == dir &&
+ flow_key_compare(key, &tfle->key) == 0) {
+ fle = tfle;
break;
}
}
- if (!fle) {
- if (flow_count(cpu) > flow_hwm)
- flow_cache_shrink(cpu);
+ if (unlikely(!fle)) {
+ if (fcp->hash_count > fc->high_watermark)
+ flow_cache_shrink(fc, fcp);
fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
if (fle) {
- fle->next = *head;
- *head = fle;
fle->family = family;
fle->dir = dir;
memcpy(&fle->key, key, sizeof(*key));
fle->object = NULL;
- flow_count(cpu)++;
+ hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
+ fcp->hash_count++;
}
+ } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+ flo = fle->object;
+ if (!flo)
+ goto ret_object;
+ flo = flo->ops->get(flo);
+ if (flo)
+ goto ret_object;
+ } else if (fle->object) {
+ flo = fle->object;
+ flo->ops->delete(flo);
+ fle->object = NULL;
}
nocache:
- {
- int err;
- void *obj;
- atomic_t *obj_ref;
-
- err = resolver(net, key, family, dir, &obj, &obj_ref);
-
- if (fle && !err) {
- fle->genid = atomic_read(&flow_cache_genid);
-
- if (fle->object)
- atomic_dec(fle->object_ref);
-
- fle->object = obj;
- fle->object_ref = obj_ref;
- if (obj)
- atomic_inc(fle->object_ref);
- }
- local_bh_enable();
-
- if (err)
- obj = ERR_PTR(err);
- return obj;
+ flo = NULL;
+ if (fle) {
+ flo = fle->object;
+ fle->object = NULL;
}
+ flo = resolver(net, key, family, dir, flo, ctx);
+ if (fle) {
+ fle->genid = atomic_read(&flow_cache_genid);
+ if (!IS_ERR(flo))
+ fle->object = flo;
+ else
+ fle->genid--;
+ } else {
+ if (flo && !IS_ERR(flo))
+ flo->ops->delete(flo);
+ }
+ret_object:
+ local_bh_enable();
+ return flo;
}
static void flow_cache_flush_tasklet(unsigned long data)
{
struct flow_flush_info *info = (void *)data;
- int i;
- int cpu;
-
- cpu = smp_processor_id();
- for (i = 0; i < flow_hash_size; i++) {
- struct flow_cache_entry *fle;
-
- fle = flow_table(cpu)[i];
- for (; fle; fle = fle->next) {
- unsigned genid = atomic_read(&flow_cache_genid);
-
- if (!fle->object || fle->genid == genid)
+ struct flow_cache *fc = info->cache;
+ struct flow_cache_percpu *fcp;
+ struct flow_cache_entry *fle;
+ struct hlist_node *entry, *tmp;
+ LIST_HEAD(gc_list);
+ int i, deleted = 0;
+
+ fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+ for (i = 0; i < flow_cache_hash_size(fc); i++) {
+ hlist_for_each_entry_safe(fle, entry, tmp,
+ &fcp->hash_table[i], u.hlist) {
+ if (flow_entry_valid(fle))
continue;
- fle->object = NULL;
- atomic_dec(fle->object_ref);
+ deleted++;
+ hlist_del(&fle->u.hlist);
+ list_add_tail(&fle->u.gc_list, &gc_list);
}
}
+ flow_cache_queue_garbage(fcp, deleted, &gc_list);
+
if (atomic_dec_and_test(&info->cpuleft))
complete(&info->completion);
}
-static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
static void flow_cache_flush_per_cpu(void *data)
{
struct flow_flush_info *info = data;
@@ -280,8 +328,7 @@ static void flow_cache_flush_per_cpu(void *data)
struct tasklet_struct *tasklet;
cpu = smp_processor_id();
-
- tasklet = flow_flush_tasklet(cpu);
+ tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
tasklet->data = (unsigned long)info;
tasklet_schedule(tasklet);
}
@@ -294,6 +341,7 @@ void flow_cache_flush(void)
/* Don't want cpus going down or up during this. */
get_online_cpus();
mutex_lock(&flow_flush_sem);
+ info.cache = &flow_cache_global;
atomic_set(&info.cpuleft, num_online_cpus());
init_completion(&info.completion);
@@ -307,62 +355,75 @@ void flow_cache_flush(void)
put_online_cpus();
}
-static void __init flow_cache_cpu_prepare(int cpu)
+static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp)
{
- struct tasklet_struct *tasklet;
- unsigned long order;
-
- for (order = 0;
- (PAGE_SIZE << order) <
- (sizeof(struct flow_cache_entry *)*flow_hash_size);
- order++)
- /* NOTHING */;
-
- flow_table(cpu) = (struct flow_cache_entry **)
- __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
- if (!flow_table(cpu))
- panic("NET: failed to allocate flow cache order %lu\n", order);
-
- flow_hash_rnd_recalc(cpu) = 1;
- flow_count(cpu) = 0;
-
- tasklet = flow_flush_tasklet(cpu);
- tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
+ fcp->hash_table = (struct hlist_head *)
+ __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
+ if (!fcp->hash_table)
+ panic("NET: failed to allocate flow cache order %lu\n", fc->order);
+
+ fcp->hash_rnd_recalc = 1;
+ fcp->hash_count = 0;
+ tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
}
static int flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
+ struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+ int cpu = (unsigned long) hcpu;
+ struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
+
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
- __flow_cache_shrink((unsigned long)hcpu, 0);
+ __flow_cache_shrink(fc, fcp, 0);
return NOTIFY_OK;
}
-static int __init flow_cache_init(void)
+static int flow_cache_init(struct flow_cache *fc)
{
+ unsigned long order;
int i;
- flow_cachep = kmem_cache_create("flow_cache",
- sizeof(struct flow_cache_entry),
- 0, SLAB_PANIC,
- NULL);
- flow_hash_shift = 10;
- flow_lwm = 2 * flow_hash_size;
- flow_hwm = 4 * flow_hash_size;
+ fc->hash_shift = 10;
+ fc->low_watermark = 2 * flow_cache_hash_size(fc);
+ fc->high_watermark = 4 * flow_cache_hash_size(fc);
+
+ for (order = 0;
+ (PAGE_SIZE << order) <
+ (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
+ order++)
+ /* NOTHING */;
+ fc->order = order;
+ fc->percpu = alloc_percpu(struct flow_cache_percpu);
- setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
- flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
- add_timer(&flow_hash_rnd_timer);
+ setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
+ (unsigned long) fc);
+ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+ add_timer(&fc->rnd_timer);
for_each_possible_cpu(i)
- flow_cache_cpu_prepare(i);
+ flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
+
+ fc->hotcpu_notifier = (struct notifier_block){
+ .notifier_call = flow_cache_cpu,
+ };
+ register_hotcpu_notifier(&fc->hotcpu_notifier);
- hotcpu_notifier(flow_cache_cpu, 0);
return 0;
}
-module_init(flow_cache_init);
+static int __init flow_cache_init_global(void)
+{
+ flow_cachep = kmem_cache_create("flow_cache",
+ sizeof(struct flow_cache_entry),
+ 0, SLAB_PANIC, NULL);
+
+ return flow_cache_init(&flow_cache_global);
+}
+
+module_init(flow_cache_init_global);
EXPORT_SYMBOL(flow_cache_genid);
EXPORT_SYMBOL(flow_cache_lookup);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 493775f4f2f1..cf8e70392fe0 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -32,6 +32,7 @@
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rbtree.h>
+#include <linux/slab.h>
#include <net/sock.h>
#include <net/gen_stats.h>
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 16ad45d4882b..1e7f4e91a935 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -20,7 +20,6 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/slab.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <asm/uaccess.h>
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 5910b555a54a..bdbce2f5875b 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -19,7 +19,6 @@
#include <linux/rtnetlink.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
-#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <asm/types.h>
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6cee6434da67..bff37908bd55 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -15,6 +15,7 @@
* Harald Welte Add neighbour cache statistics like rtstat
*/
+#include <linux/slab.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7a46343d5ae3..c57c4b228bb5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -13,9 +13,11 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
+#include <linux/slab.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/wireless.h>
+#include <linux/vmalloc.h>
#include <net/wext.h>
#include "net-sysfs.h"
@@ -466,6 +468,7 @@ static struct attribute_group wireless_group = {
};
#endif
+#ifdef CONFIG_RPS
/*
* RX queue sysfs structures and functions.
*/
@@ -547,7 +550,7 @@ static void rps_map_release(struct rcu_head *rcu)
kfree(map);
}
-ssize_t store_rps_map(struct netdev_rx_queue *queue,
+static ssize_t store_rps_map(struct netdev_rx_queue *queue,
struct rx_queue_attribute *attribute,
const char *buf, size_t len)
{
@@ -599,22 +602,109 @@ ssize_t store_rps_map(struct netdev_rx_queue *queue,
return len;
}
+static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+ struct rx_queue_attribute *attr,
+ char *buf)
+{
+ struct rps_dev_flow_table *flow_table;
+ unsigned int val = 0;
+
+ rcu_read_lock();
+ flow_table = rcu_dereference(queue->rps_flow_table);
+ if (flow_table)
+ val = flow_table->mask + 1;
+ rcu_read_unlock();
+
+ return sprintf(buf, "%u\n", val);
+}
+
+static void rps_dev_flow_table_release_work(struct work_struct *work)
+{
+ struct rps_dev_flow_table *table = container_of(work,
+ struct rps_dev_flow_table, free_work);
+
+ vfree(table);
+}
+
+static void rps_dev_flow_table_release(struct rcu_head *rcu)
+{
+ struct rps_dev_flow_table *table = container_of(rcu,
+ struct rps_dev_flow_table, rcu);
+
+ INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
+ schedule_work(&table->free_work);
+}
+
+static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+ struct rx_queue_attribute *attr,
+ const char *buf, size_t len)
+{
+ unsigned int count;
+ char *endp;
+ struct rps_dev_flow_table *table, *old_table;
+ static DEFINE_SPINLOCK(rps_dev_flow_lock);
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ count = simple_strtoul(buf, &endp, 0);
+ if (endp == buf)
+ return -EINVAL;
+
+ if (count) {
+ int i;
+
+ if (count > 1<<30) {
+ /* Enforce a limit to prevent overflow */
+ return -EINVAL;
+ }
+ count = roundup_pow_of_two(count);
+ table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
+ if (!table)
+ return -ENOMEM;
+
+ table->mask = count - 1;
+ for (i = 0; i < count; i++)
+ table->flows[i].cpu = RPS_NO_CPU;
+ } else
+ table = NULL;
+
+ spin_lock(&rps_dev_flow_lock);
+ old_table = queue->rps_flow_table;
+ rcu_assign_pointer(queue->rps_flow_table, table);
+ spin_unlock(&rps_dev_flow_lock);
+
+ if (old_table)
+ call_rcu(&old_table->rcu, rps_dev_flow_table_release);
+
+ return len;
+}
+
static struct rx_queue_attribute rps_cpus_attribute =
__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+
+static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
+ __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+ show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
+
static struct attribute *rx_queue_default_attrs[] = {
&rps_cpus_attribute.attr,
+ &rps_dev_flow_table_cnt_attribute.attr,
NULL
};
static void rx_queue_release(struct kobject *kobj)
{
struct netdev_rx_queue *queue = to_rx_queue(kobj);
- struct rps_map *map = queue->rps_map;
struct netdev_rx_queue *first = queue->first;
- if (map)
- call_rcu(&map->rcu, rps_map_release);
+ if (queue->rps_map)
+ call_rcu(&queue->rps_map->rcu, rps_map_release);
+
+ if (queue->rps_flow_table)
+ call_rcu(&queue->rps_flow_table->rcu,
+ rps_dev_flow_table_release);
if (atomic_dec_and_test(&first->count))
kfree(first);
@@ -675,7 +765,7 @@ static void rx_queue_remove_kobjects(struct net_device *net)
kobject_put(&net->_rx[i].kobj);
kset_unregister(net->queues_kset);
}
-
+#endif /* CONFIG_RPS */
#endif /* CONFIG_SYSFS */
#ifdef CONFIG_HOTPLUG
@@ -739,7 +829,9 @@ void netdev_unregister_kobject(struct net_device * net)
if (!net_eq(dev_net(net), &init_net))
return;
+#ifdef CONFIG_RPS
rx_queue_remove_kobjects(net);
+#endif
device_del(dev);
}
@@ -780,11 +872,13 @@ int netdev_register_kobject(struct net_device *net)
if (error)
return error;
+#ifdef CONFIG_RPS
error = rx_queue_register_kobjects(net);
if (error) {
device_del(dev);
return error;
}
+#endif
return error;
}
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index f1e982c508bb..afa6380ed88a 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,6 +19,7 @@
#include <linux/workqueue.h>
#include <linux/netlink.h>
#include <linux/net_dropmon.h>
+#include <linux/slab.h>
#include <asm/unaligned.h>
#include <asm/bitops.h>
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d4ec38fa64e6..a58f59b97597 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -22,6 +22,7 @@
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
+#include <linux/slab.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
@@ -614,7 +615,7 @@ void netpoll_print_options(struct netpoll *np)
np->name, np->local_port);
printk(KERN_INFO "%s: local IP %pI4\n",
np->name, &np->local_ip);
- printk(KERN_INFO "%s: interface %s\n",
+ printk(KERN_INFO "%s: interface '%s'\n",
np->name, np->dev_name);
printk(KERN_INFO "%s: remote port %d\n",
np->name, np->remote_port);
@@ -661,6 +662,9 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
if ((delim = strchr(cur, '@')) == NULL)
goto parse_failed;
*delim = 0;
+ if (*cur == ' ' || *cur == '\t')
+ printk(KERN_INFO "%s: warning: whitespace"
+ "is not allowed\n", np->name);
np->remote_port = simple_strtol(cur, NULL, 10);
cur = delim;
}
@@ -708,7 +712,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
return 0;
parse_failed:
- printk(KERN_INFO "%s: couldn't parse config at %s!\n",
+ printk(KERN_INFO "%s: couldn't parse config at '%s'!\n",
np->name, cur);
return -1;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 43923811bd6a..2ad68da418df 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -169,7 +169,7 @@
#include <asm/dma.h>
#include <asm/div64.h> /* do_div */
-#define VERSION "2.72"
+#define VERSION "2.73"
#define IP_NAME_SZ 32
#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
#define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -190,6 +190,7 @@
#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
+#define F_NODE (1<<15) /* Node memory alloc*/
/* Thread control flag bits */
#define T_STOP (1<<0) /* Stop run */
@@ -372,6 +373,7 @@ struct pktgen_dev {
u16 queue_map_min;
u16 queue_map_max;
+ int node; /* Memory node */
#ifdef CONFIG_XFRM
__u8 ipsmode; /* IPSEC mode (config) */
@@ -607,6 +609,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->traffic_class)
seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class);
+ if (pkt_dev->node >= 0)
+ seq_printf(seq, " node: %d\n", pkt_dev->node);
+
seq_printf(seq, " Flags: ");
if (pkt_dev->flags & F_IPV6)
@@ -660,6 +665,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->flags & F_SVID_RND)
seq_printf(seq, "SVID_RND ");
+ if (pkt_dev->flags & F_NODE)
+ seq_printf(seq, "NODE_ALLOC ");
+
seq_puts(seq, "\n");
/* not really stopped, more like last-running-at */
@@ -1074,6 +1082,21 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->dst_mac_count);
return count;
}
+ if (!strcmp(name, "node")) {
+ len = num_arg(&user_buffer[i], 10, &value);
+ if (len < 0)
+ return len;
+
+ i += len;
+
+ if (node_possible(value)) {
+ pkt_dev->node = value;
+ sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+ }
+ else
+ sprintf(pg_result, "ERROR: node not possible");
+ return count;
+ }
if (!strcmp(name, "flag")) {
char f[32];
memset(f, 0, 32);
@@ -1166,12 +1189,18 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "!IPV6") == 0)
pkt_dev->flags &= ~F_IPV6;
+ else if (strcmp(f, "NODE_ALLOC") == 0)
+ pkt_dev->flags |= F_NODE;
+
+ else if (strcmp(f, "!NODE_ALLOC") == 0)
+ pkt_dev->flags &= ~F_NODE;
+
else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
- "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n");
+ "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
return count;
}
sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -2572,9 +2601,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
mod_cur_headers(pkt_dev);
datalen = (odev->hard_header_len + 16) & ~0xf;
- skb = __netdev_alloc_skb(odev,
- pkt_dev->cur_pkt_size + 64
- + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
+ if (pkt_dev->flags & F_NODE) {
+ int node;
+
+ if (pkt_dev->node >= 0)
+ node = pkt_dev->node;
+ else
+ node = numa_node_id();
+
+ skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
+ + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
+ if (likely(skb)) {
+ skb_reserve(skb, NET_SKB_PAD);
+ skb->dev = odev;
+ }
+ }
+ else
+ skb = __netdev_alloc_skb(odev,
+ pkt_dev->cur_pkt_size + 64
+ + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
if (!skb) {
sprintf(pkt_dev->result, "No memory");
return NULL;
@@ -3674,6 +3721,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->svlan_p = 0;
pkt_dev->svlan_cfi = 0;
pkt_dev->svlan_id = 0xffff;
+ pkt_dev->node = -1;
err = pktgen_setup_dev(pkt_dev, ifname);
if (err)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e1121f0bca6a..78c85985cb30 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -118,7 +118,11 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
{
struct rtnl_link *tab;
- tab = rtnl_msg_handlers[protocol];
+ if (protocol < NPROTO)
+ tab = rtnl_msg_handlers[protocol];
+ else
+ tab = NULL;
+
if (tab == NULL || tab[msgindex].doit == NULL)
tab = rtnl_msg_handlers[PF_UNSPEC];
@@ -129,7 +133,11 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
{
struct rtnl_link *tab;
- tab = rtnl_msg_handlers[protocol];
+ if (protocol < NPROTO)
+ tab = rtnl_msg_handlers[protocol];
+ else
+ tab = NULL;
+
if (tab == NULL || tab[msgindex].dumpit == NULL)
tab = rtnl_msg_handlers[PF_UNSPEC];
@@ -602,36 +610,38 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
a->tx_compressed = b->tx_compressed;
}
-static void copy_rtnl_link_stats64(struct rtnl_link_stats64 *a,
- const struct net_device_stats *b)
+static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b)
{
- a->rx_packets = b->rx_packets;
- a->tx_packets = b->tx_packets;
- a->rx_bytes = b->rx_bytes;
- a->tx_bytes = b->tx_bytes;
- a->rx_errors = b->rx_errors;
- a->tx_errors = b->tx_errors;
- a->rx_dropped = b->rx_dropped;
- a->tx_dropped = b->tx_dropped;
-
- a->multicast = b->multicast;
- a->collisions = b->collisions;
-
- a->rx_length_errors = b->rx_length_errors;
- a->rx_over_errors = b->rx_over_errors;
- a->rx_crc_errors = b->rx_crc_errors;
- a->rx_frame_errors = b->rx_frame_errors;
- a->rx_fifo_errors = b->rx_fifo_errors;
- a->rx_missed_errors = b->rx_missed_errors;
-
- a->tx_aborted_errors = b->tx_aborted_errors;
- a->tx_carrier_errors = b->tx_carrier_errors;
- a->tx_fifo_errors = b->tx_fifo_errors;
- a->tx_heartbeat_errors = b->tx_heartbeat_errors;
- a->tx_window_errors = b->tx_window_errors;
-
- a->rx_compressed = b->rx_compressed;
- a->tx_compressed = b->tx_compressed;
+ struct rtnl_link_stats64 a;
+
+ a.rx_packets = b->rx_packets;
+ a.tx_packets = b->tx_packets;
+ a.rx_bytes = b->rx_bytes;
+ a.tx_bytes = b->tx_bytes;
+ a.rx_errors = b->rx_errors;
+ a.tx_errors = b->tx_errors;
+ a.rx_dropped = b->rx_dropped;
+ a.tx_dropped = b->tx_dropped;
+
+ a.multicast = b->multicast;
+ a.collisions = b->collisions;
+
+ a.rx_length_errors = b->rx_length_errors;
+ a.rx_over_errors = b->rx_over_errors;
+ a.rx_crc_errors = b->rx_crc_errors;
+ a.rx_frame_errors = b->rx_frame_errors;
+ a.rx_fifo_errors = b->rx_fifo_errors;
+ a.rx_missed_errors = b->rx_missed_errors;
+
+ a.tx_aborted_errors = b->tx_aborted_errors;
+ a.tx_carrier_errors = b->tx_carrier_errors;
+ a.tx_fifo_errors = b->tx_fifo_errors;
+ a.tx_heartbeat_errors = b->tx_heartbeat_errors;
+ a.tx_window_errors = b->tx_window_errors;
+
+ a.rx_compressed = b->rx_compressed;
+ a.tx_compressed = b->tx_compressed;
+ memcpy(v, &a, sizeof(a));
}
static inline int rtnl_vfinfo_size(const struct net_device *dev)
@@ -651,6 +661,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
+ nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
+ nla_total_size(sizeof(struct rtnl_link_ifmap))
+ nla_total_size(sizeof(struct rtnl_link_stats))
+ + nla_total_size(sizeof(struct rtnl_link_stats64))
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
+ nla_total_size(4) /* IFLA_TXQLEN */
@@ -734,8 +745,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
sizeof(struct rtnl_link_stats64));
if (attr == NULL)
goto nla_put_failure;
-
- stats = dev_get_stats(dev);
copy_rtnl_link_stats64(nla_data(attr), stats);
if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
@@ -1443,9 +1452,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return 0;
family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
- if (family >= NPROTO)
- return -EAFNOSUPPORT;
-
sz_idx = type>>2;
kind = type&3;
@@ -1513,6 +1519,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_POST_INIT:
case NETDEV_REGISTER:
case NETDEV_CHANGE:
+ case NETDEV_PRE_TYPE_CHANGE:
case NETDEV_GOING_DOWN:
case NETDEV_UNREGISTER:
case NETDEV_UNREGISTER_BATCH:
diff --git a/net/core/scm.c b/net/core/scm.c
index 9b264634acfd..b88f6f9d0b97 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -26,6 +26,7 @@
#include <linux/security.h>
#include <linux/pid.h>
#include <linux/nsproxy.h>
+#include <linux/slab.h>
#include <asm/system.h>
#include <asm/uaccess.h>
diff --git a/net/core/sock.c b/net/core/sock.c
index c5812bbc2cc9..7effa1e689df 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -364,11 +364,11 @@ EXPORT_SYMBOL(sk_reset_txq);
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
- struct dst_entry *dst = sk->sk_dst_cache;
+ struct dst_entry *dst = __sk_dst_get(sk);
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
- sk->sk_dst_cache = NULL;
+ rcu_assign_pointer(sk->sk_dst_cache, NULL);
dst_release(dst);
return NULL;
}
@@ -1157,7 +1157,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
skb_queue_head_init(&newsk->sk_async_wait_queue);
#endif
- rwlock_init(&newsk->sk_dst_lock);
+ spin_lock_init(&newsk->sk_dst_lock);
rwlock_init(&newsk->sk_callback_lock);
lockdep_set_class_and_name(&newsk->sk_callback_lock,
af_callback_keys + newsk->sk_family,
@@ -1898,7 +1898,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
} else
sk->sk_sleep = NULL;
- rwlock_init(&sk->sk_dst_lock);
+ spin_lock_init(&sk->sk_dst_lock);
rwlock_init(&sk->sk_callback_lock);
lockdep_set_class_and_name(&sk->sk_callback_lock,
af_callback_keys + sk->sk_family,
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 06124872af5b..dcc7d25996ab 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,11 +11,72 @@
#include <linux/socket.h>
#include <linux/netdevice.h>
#include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
#include <linux/init.h>
+#include <linux/slab.h>
#include <net/ip.h>
#include <net/sock.h>
+#ifdef CONFIG_RPS
+static int rps_sock_flow_sysctl(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ unsigned int orig_size, size;
+ int ret, i;
+ ctl_table tmp = {
+ .data = &size,
+ .maxlen = sizeof(size),
+ .mode = table->mode
+ };
+ struct rps_sock_flow_table *orig_sock_table, *sock_table;
+ static DEFINE_MUTEX(sock_flow_mutex);
+
+ mutex_lock(&sock_flow_mutex);
+
+ orig_sock_table = rps_sock_flow_table;
+ size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
+
+ ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+
+ if (write) {
+ if (size) {
+ if (size > 1<<30) {
+ /* Enforce limit to prevent overflow */
+ mutex_unlock(&sock_flow_mutex);
+ return -EINVAL;
+ }
+ size = roundup_pow_of_two(size);
+ if (size != orig_size) {
+ sock_table =
+ vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
+ if (!sock_table) {
+ mutex_unlock(&sock_flow_mutex);
+ return -ENOMEM;
+ }
+
+ sock_table->mask = size - 1;
+ } else
+ sock_table = orig_sock_table;
+
+ for (i = 0; i < size; i++)
+ sock_table->ents[i] = RPS_NO_CPU;
+ } else
+ sock_table = NULL;
+
+ if (sock_table != orig_sock_table) {
+ rcu_assign_pointer(rps_sock_flow_table, sock_table);
+ synchronize_rcu();
+ vfree(orig_sock_table);
+ }
+ }
+
+ mutex_unlock(&sock_flow_mutex);
+
+ return ret;
+}
+#endif /* CONFIG_RPS */
+
static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
{
@@ -81,6 +142,14 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+#ifdef CONFIG_RPS
+ {
+ .procname = "rps_sock_flow_entries",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = rps_sock_flow_sysctl
+ },
+#endif
#endif /* CONFIG_NET */
{
.procname = "netdev_budget",