summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c44
-rw-r--r--net/core/dev_mcast.c37
-rw-r--r--net/core/dst.c15
-rw-r--r--net/core/ethtool.c65
-rw-r--r--net/core/fib_rules.c13
-rw-r--r--net/core/filter.c57
-rw-r--r--net/core/flow.c1
-rw-r--r--net/core/neighbour.c171
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/net_namespace.c104
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c51
-rw-r--r--net/core/skbuff.c96
-rw-r--r--net/core/sock.c161
-rw-r--r--net/core/sysctl_net_core.c12
16 files changed, 606 insertions, 231 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 460e7f99ce3e..e1df1ab3e04a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -216,7 +216,7 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
/* Device list insertion */
static int list_netdevice(struct net_device *dev)
{
- struct net *net = dev->nd_net;
+ struct net *net = dev_net(dev);
ASSERT_RTNL();
@@ -852,8 +852,8 @@ int dev_alloc_name(struct net_device *dev, const char *name)
struct net *net;
int ret;
- BUG_ON(!dev->nd_net);
- net = dev->nd_net;
+ BUG_ON(!dev_net(dev));
+ net = dev_net(dev);
ret = __dev_alloc_name(net, name, buf);
if (ret >= 0)
strlcpy(dev->name, buf, IFNAMSIZ);
@@ -877,9 +877,9 @@ int dev_change_name(struct net_device *dev, char *newname)
struct net *net;
ASSERT_RTNL();
- BUG_ON(!dev->nd_net);
+ BUG_ON(!dev_net(dev));
- net = dev->nd_net;
+ net = dev_net(dev);
if (dev->flags & IFF_UP)
return -EBUSY;
@@ -2615,7 +2615,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
seq_puts(seq, "Type Device Function\n");
- else {
+ else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
@@ -2639,7 +2639,8 @@ static const struct seq_operations ptype_seq_ops = {
static int ptype_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ptype_seq_ops);
+ return seq_open_net(inode, file, &ptype_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations ptype_seq_fops = {
@@ -2647,7 +2648,7 @@ static const struct file_operations ptype_seq_fops = {
.open = ptype_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
@@ -3688,8 +3689,8 @@ int register_netdevice(struct net_device *dev)
/* When net_device's are persistent, this will be fatal. */
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
- BUG_ON(!dev->nd_net);
- net = dev->nd_net;
+ BUG_ON(!dev_net(dev));
+ net = dev_net(dev);
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->_xmit_lock);
@@ -3995,11 +3996,15 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
BUG_ON(strlen(name) >= sizeof(dev->name));
- /* ensure 32-byte alignment of both the device and private area */
- alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
- (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
- ~NETDEV_ALIGN_CONST;
- alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
+ alloc_size = sizeof(struct net_device) +
+ sizeof(struct net_device_subqueue) * (queue_count - 1);
+ if (sizeof_priv) {
+ /* ensure 32-byte alignment of private area */
+ alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+ alloc_size += sizeof_priv;
+ }
+ /* ensure 32-byte alignment of whole construct */
+ alloc_size += NETDEV_ALIGN_CONST;
p = kzalloc(alloc_size, GFP_KERNEL);
if (!p) {
@@ -4010,7 +4015,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
dev->padded = (char *)dev - (char *)p;
- dev->nd_net = &init_net;
+ dev_net_set(dev, &init_net);
if (sizeof_priv) {
dev->priv = ((char *)dev +
@@ -4021,6 +4026,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
}
dev->egress_subqueue_count = queue_count;
+ dev->gso_max_size = GSO_MAX_SIZE;
dev->get_stats = internal_stats;
netpoll_netdev_init(dev);
@@ -4040,6 +4046,8 @@ EXPORT_SYMBOL(alloc_netdev_mq);
*/
void free_netdev(struct net_device *dev)
{
+ release_net(dev_net(dev));
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
@@ -4134,7 +4142,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* Get out if there is nothing todo */
err = 0;
- if (dev->nd_net == net)
+ if (net_eq(dev_net(dev), net))
goto out;
/* Pick the destination device name, and ensure
@@ -4185,7 +4193,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
dev_addr_discard(dev);
/* Actually switch the network namespace */
- dev->nd_net = net;
+ dev_net_set(dev, net);
/* Assign the new device name */
if (destname != dev->name)
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index cec582563e0d..f8a3455f4493 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -156,39 +156,14 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
EXPORT_SYMBOL(dev_mc_unsync);
#ifdef CONFIG_PROC_FS
-static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(dev_base_lock)
-{
- struct net *net = seq_file_net(seq);
- struct net_device *dev;
- loff_t off = 0;
-
- read_lock(&dev_base_lock);
- for_each_netdev(net, dev) {
- if (off++ == *pos)
- return dev;
- }
- return NULL;
-}
-
-static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- ++*pos;
- return next_net_device((struct net_device *)v);
-}
-
-static void dev_mc_seq_stop(struct seq_file *seq, void *v)
- __releases(dev_base_lock)
-{
- read_unlock(&dev_base_lock);
-}
-
-
static int dev_mc_seq_show(struct seq_file *seq, void *v)
{
struct dev_addr_list *m;
struct net_device *dev = v;
+ if (v == SEQ_START_TOKEN)
+ return 0;
+
netif_tx_lock_bh(dev);
for (m = dev->mc_list; m; m = m->next) {
int i;
@@ -206,9 +181,9 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
}
static const struct seq_operations dev_mc_seq_ops = {
- .start = dev_mc_seq_start,
- .next = dev_mc_seq_next,
- .stop = dev_mc_seq_stop,
+ .start = dev_seq_start,
+ .next = dev_seq_next,
+ .stop = dev_seq_stop,
.show = dev_mc_seq_show,
};
diff --git a/net/core/dst.c b/net/core/dst.c
index 7deef483c79f..fe03266130b6 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -259,6 +259,16 @@ again:
return NULL;
}
+void dst_release(struct dst_entry *dst)
+{
+ if (dst) {
+ WARN_ON(atomic_read(&dst->__refcnt) < 1);
+ smp_mb__before_atomic_dec();
+ atomic_dec(&dst->__refcnt);
+ }
+}
+EXPORT_SYMBOL(dst_release);
+
/* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat
* this mistake in 2.3, but we have no choice
@@ -279,7 +289,7 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (!unregister) {
dst->input = dst->output = dst_discard;
} else {
- dst->dev = dst->dev->nd_net->loopback_dev;
+ dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
dev_put(dev);
if (dst->neighbour && dst->neighbour->dev == dev) {
@@ -295,9 +305,6 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
struct net_device *dev = ptr;
struct dst_entry *dst, *last = NULL;
- if (dev->nd_net != &init_net)
- return NOTIFY_DONE;
-
switch (event) {
case NETDEV_UNREGISTER:
case NETDEV_DOWN:
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 1163eb2256d0..0133b5ebd545 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -284,8 +284,10 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
{
struct ethtool_eeprom eeprom;
const struct ethtool_ops *ops = dev->ethtool_ops;
+ void __user *userbuf = useraddr + sizeof(eeprom);
+ u32 bytes_remaining;
u8 *data;
- int ret;
+ int ret = 0;
if (!ops->get_eeprom || !ops->get_eeprom_len)
return -EOPNOTSUPP;
@@ -301,26 +303,31 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
return -EINVAL;
- data = kmalloc(eeprom.len, GFP_USER);
+ data = kmalloc(PAGE_SIZE, GFP_USER);
if (!data)
return -ENOMEM;
- ret = -EFAULT;
- if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
- goto out;
+ bytes_remaining = eeprom.len;
+ while (bytes_remaining > 0) {
+ eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE);
- ret = ops->get_eeprom(dev, &eeprom, data);
- if (ret)
- goto out;
+ ret = ops->get_eeprom(dev, &eeprom, data);
+ if (ret)
+ break;
+ if (copy_to_user(userbuf, data, eeprom.len)) {
+ ret = -EFAULT;
+ break;
+ }
+ userbuf += eeprom.len;
+ eeprom.offset += eeprom.len;
+ bytes_remaining -= eeprom.len;
+ }
- ret = -EFAULT;
+ eeprom.len = userbuf - (useraddr + sizeof(eeprom));
+ eeprom.offset -= eeprom.len;
if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
- goto out;
- if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
- goto out;
- ret = 0;
+ ret = -EFAULT;
- out:
kfree(data);
return ret;
}
@@ -329,8 +336,10 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
{
struct ethtool_eeprom eeprom;
const struct ethtool_ops *ops = dev->ethtool_ops;
+ void __user *userbuf = useraddr + sizeof(eeprom);
+ u32 bytes_remaining;
u8 *data;
- int ret;
+ int ret = 0;
if (!ops->set_eeprom || !ops->get_eeprom_len)
return -EOPNOTSUPP;
@@ -346,22 +355,26 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
return -EINVAL;
- data = kmalloc(eeprom.len, GFP_USER);
+ data = kmalloc(PAGE_SIZE, GFP_USER);
if (!data)
return -ENOMEM;
- ret = -EFAULT;
- if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
- goto out;
-
- ret = ops->set_eeprom(dev, &eeprom, data);
- if (ret)
- goto out;
+ bytes_remaining = eeprom.len;
+ while (bytes_remaining > 0) {
+ eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE);
- if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
- ret = -EFAULT;
+ if (copy_from_user(data, userbuf, eeprom.len)) {
+ ret = -EFAULT;
+ break;
+ }
+ ret = ops->set_eeprom(dev, &eeprom, data);
+ if (ret)
+ break;
+ userbuf += eeprom.len;
+ eeprom.offset += eeprom.len;
+ bytes_remaining -= eeprom.len;
+ }
- out:
kfree(data);
return ret;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 42ccaf5b8509..e3e9ab0f74e3 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -29,7 +29,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->pref = pref;
r->table = table;
r->flags = flags;
- r->fr_net = ops->fro_net;
+ r->fr_net = hold_net(ops->fro_net);
/* The lock is not required here, the list in unreacheable
* at the moment this function is called */
@@ -214,7 +214,7 @@ errout:
static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r, *last = NULL;
@@ -243,7 +243,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
err = -ENOMEM;
goto errout;
}
- rule->fr_net = net;
+ rule->fr_net = hold_net(net);
if (tb[FRA_PRIORITY])
rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
@@ -344,6 +344,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
return 0;
errout_free:
+ release_net(rule->fr_net);
kfree(rule);
errout:
rules_ops_put(ops);
@@ -352,7 +353,7 @@ errout:
static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *tmp;
@@ -534,7 +535,7 @@ skip:
static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct fib_rules_ops *ops;
int idx = 0, family;
@@ -618,7 +619,7 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = ptr;
- struct net *net = dev->nd_net;
+ struct net *net = dev_net(dev);
struct fib_rules_ops *ops;
ASSERT_RTNL();
diff --git a/net/core/filter.c b/net/core/filter.c
index e0a06942c025..f5f3cf603064 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -27,6 +27,7 @@
#include <linux/if_packet.h>
#include <net/ip.h>
#include <net/protocol.h>
+#include <net/netlink.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
@@ -64,6 +65,41 @@ static inline void *load_pointer(struct sk_buff *skb, int k,
}
/**
+ * sk_filter - run a packet through a socket filter
+ * @sk: sock associated with &sk_buff
+ * @skb: buffer to filter
+ * @needlock: set to 1 if the sock is not locked by caller.
+ *
+ * Run the filter code and then cut skb->data to correct size returned by
+ * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * than pkt_len we keep whole skb->data. This is the socket level
+ * wrapper to sk_run_filter. It returns 0 if the packet should
+ * be accepted or -EPERM if the packet should be tossed.
+ *
+ */
+int sk_filter(struct sock *sk, struct sk_buff *skb)
+{
+ int err;
+ struct sk_filter *filter;
+
+ err = security_sock_rcv_skb(sk, skb);
+ if (err)
+ return err;
+
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
+ if (filter) {
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns,
+ filter->len);
+ err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
+ }
+ rcu_read_unlock_bh();
+
+ return err;
+}
+EXPORT_SYMBOL(sk_filter);
+
+/**
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @filter: filter to apply
@@ -268,6 +304,22 @@ load_b:
case SKF_AD_IFINDEX:
A = skb->dev->ifindex;
continue;
+ case SKF_AD_NLATTR: {
+ struct nlattr *nla;
+
+ if (skb_is_nonlinear(skb))
+ return 0;
+ if (A > skb->len - sizeof(struct nlattr))
+ return 0;
+
+ nla = nla_find((struct nlattr *)&skb->data[A],
+ skb->len - A, X);
+ if (nla)
+ A = (void *)nla - (void *)skb->data;
+ else
+ A = 0;
+ continue;
+ }
default:
return 0;
}
@@ -275,6 +327,7 @@ load_b:
return 0;
}
+EXPORT_SYMBOL(sk_run_filter);
/**
* sk_chk_filter - verify socket filter code
@@ -385,6 +438,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
}
+EXPORT_SYMBOL(sk_chk_filter);
/**
* sk_filter_rcu_release: Release a socket filter by rcu_head
@@ -467,6 +521,3 @@ int sk_detach_filter(struct sock *sk)
rcu_read_unlock_bh();
return ret;
}
-
-EXPORT_SYMBOL(sk_chk_filter);
-EXPORT_SYMBOL(sk_run_filter);
diff --git a/net/core/flow.c b/net/core/flow.c
index a77531c139b7..19991175fdeb 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -23,7 +23,6 @@
#include <linux/mutex.h>
#include <net/flow.h>
#include <asm/atomic.h>
-#include <asm/semaphore.h>
#include <linux/security.h>
struct flow_cache_entry {
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 19b8e003f150..75075c303c44 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -123,6 +123,7 @@ unsigned long neigh_rand_reach_time(unsigned long base)
{
return (base ? (net_random() % base) + (base >> 1) : 0);
}
+EXPORT_SYMBOL(neigh_rand_reach_time);
static int neigh_forced_gc(struct neigh_table *tbl)
@@ -241,6 +242,7 @@ void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
neigh_flush_dev(tbl, dev);
write_unlock_bh(&tbl->lock);
}
+EXPORT_SYMBOL(neigh_changeaddr);
int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
@@ -253,6 +255,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
pneigh_queue_purge(&tbl->proxy_queue);
return 0;
}
+EXPORT_SYMBOL(neigh_ifdown);
static struct neighbour *neigh_alloc(struct neigh_table *tbl)
{
@@ -374,6 +377,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
read_unlock_bh(&tbl->lock);
return n;
}
+EXPORT_SYMBOL(neigh_lookup);
struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
const void *pkey)
@@ -388,7 +392,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
hash_val = tbl->hash(pkey, NULL);
for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
if (!memcmp(n->primary_key, pkey, key_len) &&
- (net == n->dev->nd_net)) {
+ net_eq(dev_net(n->dev), net)) {
neigh_hold(n);
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
@@ -397,6 +401,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
read_unlock_bh(&tbl->lock);
return n;
}
+EXPORT_SYMBOL(neigh_lookup_nodev);
struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev)
@@ -465,28 +470,44 @@ out_neigh_release:
neigh_release(n);
goto out;
}
+EXPORT_SYMBOL(neigh_create);
-struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
- struct net *net, const void *pkey, struct net_device *dev)
+static u32 pneigh_hash(const void *pkey, int key_len)
{
- struct pneigh_entry *n;
- int key_len = tbl->key_len;
u32 hash_val = *(u32 *)(pkey + key_len - 4);
-
hash_val ^= (hash_val >> 16);
hash_val ^= hash_val >> 8;
hash_val ^= hash_val >> 4;
hash_val &= PNEIGH_HASHMASK;
+ return hash_val;
+}
- for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
+ struct net *net,
+ const void *pkey,
+ int key_len,
+ struct net_device *dev)
+{
+ while (n) {
if (!memcmp(n->key, pkey, key_len) &&
- (n->net == net) &&
+ net_eq(pneigh_net(n), net) &&
(n->dev == dev || !n->dev))
- break;
+ return n;
+ n = n->next;
}
+ return NULL;
+}
- return n;
+struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
+ struct net *net, const void *pkey, struct net_device *dev)
+{
+ int key_len = tbl->key_len;
+ u32 hash_val = pneigh_hash(pkey, key_len);
+
+ return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
+ net, pkey, key_len, dev);
}
+EXPORT_SYMBOL_GPL(__pneigh_lookup);
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
struct net *net, const void *pkey,
@@ -494,26 +515,14 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
{
struct pneigh_entry *n;
int key_len = tbl->key_len;
- u32 hash_val = *(u32 *)(pkey + key_len - 4);
-
- hash_val ^= (hash_val >> 16);
- hash_val ^= hash_val >> 8;
- hash_val ^= hash_val >> 4;
- hash_val &= PNEIGH_HASHMASK;
+ u32 hash_val = pneigh_hash(pkey, key_len);
read_lock_bh(&tbl->lock);
-
- for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
- if (!memcmp(n->key, pkey, key_len) &&
- (n->net == net) &&
- (n->dev == dev || !n->dev)) {
- read_unlock_bh(&tbl->lock);
- goto out;
- }
- }
+ n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
+ net, pkey, key_len, dev);
read_unlock_bh(&tbl->lock);
- n = NULL;
- if (!creat)
+
+ if (n || !creat)
goto out;
ASSERT_RTNL();
@@ -522,7 +531,9 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
if (!n)
goto out;
+#ifdef CONFIG_NET_NS
n->net = hold_net(net);
+#endif
memcpy(n->key, pkey, key_len);
n->dev = dev;
if (dev)
@@ -544,6 +555,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
out:
return n;
}
+EXPORT_SYMBOL(pneigh_lookup);
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
@@ -551,25 +563,20 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
{
struct pneigh_entry *n, **np;
int key_len = tbl->key_len;
- u32 hash_val = *(u32 *)(pkey + key_len - 4);
-
- hash_val ^= (hash_val >> 16);
- hash_val ^= hash_val >> 8;
- hash_val ^= hash_val >> 4;
- hash_val &= PNEIGH_HASHMASK;
+ u32 hash_val = pneigh_hash(pkey, key_len);
write_lock_bh(&tbl->lock);
for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
np = &n->next) {
if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
- (n->net == net)) {
+ net_eq(pneigh_net(n), net)) {
*np = n->next;
write_unlock_bh(&tbl->lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
if (n->dev)
dev_put(n->dev);
- release_net(n->net);
+ release_net(pneigh_net(n));
kfree(n);
return 0;
}
@@ -592,7 +599,7 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
tbl->pdestructor(n);
if (n->dev)
dev_put(n->dev);
- release_net(n->net);
+ release_net(pneigh_net(n));
kfree(n);
continue;
}
@@ -651,6 +658,7 @@ void neigh_destroy(struct neighbour *neigh)
atomic_dec(&neigh->tbl->entries);
kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
}
+EXPORT_SYMBOL(neigh_destroy);
/* Neighbour state is suspicious;
disable fast path.
@@ -931,6 +939,7 @@ out_unlock_bh:
write_unlock_bh(&neigh->lock);
return rc;
}
+EXPORT_SYMBOL(__neigh_event_send);
static void neigh_update_hhs(struct neighbour *neigh)
{
@@ -1103,6 +1112,7 @@ out:
return err;
}
+EXPORT_SYMBOL(neigh_update);
struct neighbour *neigh_event_ns(struct neigh_table *tbl,
u8 *lladdr, void *saddr,
@@ -1115,6 +1125,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
NEIGH_UPDATE_F_OVERRIDE);
return neigh;
}
+EXPORT_SYMBOL(neigh_event_ns);
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
__be16 protocol)
@@ -1169,6 +1180,7 @@ int neigh_compat_output(struct sk_buff *skb)
return dev_queue_xmit(skb);
}
+EXPORT_SYMBOL(neigh_compat_output);
/* Slow and careful. */
@@ -1214,6 +1226,7 @@ out_kfree_skb:
kfree_skb(skb);
goto out;
}
+EXPORT_SYMBOL(neigh_resolve_output);
/* As fast as possible without hh cache */
@@ -1238,6 +1251,7 @@ int neigh_connected_output(struct sk_buff *skb)
}
return err;
}
+EXPORT_SYMBOL(neigh_connected_output);
static void neigh_proxy_process(unsigned long arg)
{
@@ -1299,6 +1313,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
mod_timer(&tbl->proxy_timer, sched_next);
spin_unlock(&tbl->proxy_queue.lock);
}
+EXPORT_SYMBOL(pneigh_enqueue);
static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
struct net *net, int ifindex)
@@ -1306,9 +1321,7 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
struct neigh_parms *p;
for (p = &tbl->parms; p; p = p->next) {
- if (p->net != net)
- continue;
- if ((p->dev && p->dev->ifindex == ifindex) ||
+ if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
(!p->dev && !ifindex))
return p;
}
@@ -1322,7 +1335,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
struct neigh_parms *p, *ref;
struct net *net;
- net = dev->nd_net;
+ net = dev_net(dev);
ref = lookup_neigh_params(tbl, net, 0);
if (!ref)
return NULL;
@@ -1342,7 +1355,9 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
dev_hold(dev);
p->dev = dev;
+#ifdef CONFIG_NET_NS
p->net = hold_net(net);
+#endif
p->sysctl_table = NULL;
write_lock_bh(&tbl->lock);
p->next = tbl->parms.next;
@@ -1351,6 +1366,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
}
return p;
}
+EXPORT_SYMBOL(neigh_parms_alloc);
static void neigh_rcu_free_parms(struct rcu_head *head)
{
@@ -1381,10 +1397,11 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
write_unlock_bh(&tbl->lock);
NEIGH_PRINTK1("neigh_parms_release: not found\n");
}
+EXPORT_SYMBOL(neigh_parms_release);
static void neigh_parms_destroy(struct neigh_parms *parms)
{
- release_net(parms->net);
+ release_net(neigh_parms_net(parms));
kfree(parms);
}
@@ -1395,7 +1412,9 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
unsigned long now = jiffies;
unsigned long phsize;
+#ifdef CONFIG_NET_NS
tbl->parms.net = &init_net;
+#endif
atomic_set(&tbl->parms.refcnt, 1);
INIT_RCU_HEAD(&tbl->parms.rcu_head);
tbl->parms.reachable_time =
@@ -1441,6 +1460,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
tbl->last_flush = now;
tbl->last_rand = now + tbl->parms.reachable_time * 20;
}
+EXPORT_SYMBOL(neigh_table_init_no_netlink);
void neigh_table_init(struct neigh_table *tbl)
{
@@ -1462,6 +1482,7 @@ void neigh_table_init(struct neigh_table *tbl)
dump_stack();
}
}
+EXPORT_SYMBOL(neigh_table_init);
int neigh_table_clear(struct neigh_table *tbl)
{
@@ -1499,10 +1520,11 @@ int neigh_table_clear(struct neigh_table *tbl)
return 0;
}
+EXPORT_SYMBOL(neigh_table_clear);
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *dst_attr;
struct neigh_table *tbl;
@@ -1568,7 +1590,7 @@ out:
static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
struct neigh_table *tbl;
@@ -1836,7 +1858,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct neigh_table *tbl;
struct ndtmsg *ndtmsg;
struct nlattr *tb[NDTA_MAX+1];
@@ -1961,7 +1983,7 @@ errout:
static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
int family, tidx, nidx = 0;
int tbl_skip = cb->args[0];
int neigh_skip = cb->args[1];
@@ -1982,7 +2004,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
break;
for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
- if (net != p->net)
+ if (!net_eq(neigh_parms_net(p), net))
continue;
if (nidx++ < neigh_skip)
@@ -2061,7 +2083,7 @@ static void neigh_update_notify(struct neighbour *neigh)
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct net * net = skb->sk->sk_net;
+ struct net * net = sock_net(skb->sk);
struct neighbour *n;
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
@@ -2074,7 +2096,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
s_idx = 0;
for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
int lidx;
- if (n->dev->nd_net != net)
+ if (dev_net(n->dev) != net)
continue;
lidx = idx++;
if (lidx < s_idx)
@@ -2169,7 +2191,7 @@ EXPORT_SYMBOL(__neigh_for_each_release);
static struct neighbour *neigh_get_first(struct seq_file *seq)
{
struct neigh_seq_state *state = seq->private;
- struct net *net = state->p.net;
+ struct net *net = seq_file_net(seq);
struct neigh_table *tbl = state->tbl;
struct neighbour *n = NULL;
int bucket = state->bucket;
@@ -2179,7 +2201,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
n = tbl->hash_buckets[bucket];
while (n) {
- if (n->dev->nd_net != net)
+ if (!net_eq(dev_net(n->dev), net))
goto next;
if (state->neigh_sub_iter) {
loff_t fakep = 0;
@@ -2210,7 +2232,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
- struct net *net = state->p.net;
+ struct net *net = seq_file_net(seq);
struct neigh_table *tbl = state->tbl;
if (state->neigh_sub_iter) {
@@ -2222,7 +2244,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
while (1) {
while (n) {
- if (n->dev->nd_net != net)
+ if (!net_eq(dev_net(n->dev), net))
goto next;
if (state->neigh_sub_iter) {
void *v = state->neigh_sub_iter(state, n, pos);
@@ -2270,7 +2292,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
{
struct neigh_seq_state *state = seq->private;
- struct net * net = state->p.net;
+ struct net *net = seq_file_net(seq);
struct neigh_table *tbl = state->tbl;
struct pneigh_entry *pn = NULL;
int bucket = state->bucket;
@@ -2278,7 +2300,7 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
state->flags |= NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
pn = tbl->phash_buckets[bucket];
- while (pn && (pn->net != net))
+ while (pn && !net_eq(pneigh_net(pn), net))
pn = pn->next;
if (pn)
break;
@@ -2293,7 +2315,7 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
- struct net * net = state->p.net;
+ struct net *net = seq_file_net(seq);
struct neigh_table *tbl = state->tbl;
pn = pn->next;
@@ -2301,7 +2323,7 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
if (++state->bucket > PNEIGH_HASHMASK)
break;
pn = tbl->phash_buckets[state->bucket];
- while (pn && (pn->net != net))
+ while (pn && !net_eq(pneigh_net(pn), net))
pn = pn->next;
if (pn)
break;
@@ -2506,7 +2528,7 @@ static inline size_t neigh_nlmsg_size(void)
static void __neigh_notify(struct neighbour *n, int type, int flags)
{
- struct net *net = n->dev->nd_net;
+ struct net *net = dev_net(n->dev);
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -2532,6 +2554,7 @@ void neigh_app_ns(struct neighbour *n)
{
__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
}
+EXPORT_SYMBOL(neigh_app_ns);
#endif /* CONFIG_ARPD */
#ifdef CONFIG_SYSCTL
@@ -2763,7 +2786,8 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id;
- t->sysctl_header = register_sysctl_paths(neigh_path, t->neigh_vars);
+ t->sysctl_header =
+ register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
if (!t->sysctl_header)
goto free_procname;
@@ -2777,6 +2801,7 @@ free:
err:
return -ENOBUFS;
}
+EXPORT_SYMBOL(neigh_sysctl_register);
void neigh_sysctl_unregister(struct neigh_parms *p)
{
@@ -2788,6 +2813,7 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
kfree(t);
}
}
+EXPORT_SYMBOL(neigh_sysctl_unregister);
#endif /* CONFIG_SYSCTL */
@@ -2805,32 +2831,3 @@ static int __init neigh_init(void)
subsys_initcall(neigh_init);
-EXPORT_SYMBOL(__neigh_event_send);
-EXPORT_SYMBOL(neigh_changeaddr);
-EXPORT_SYMBOL(neigh_compat_output);
-EXPORT_SYMBOL(neigh_connected_output);
-EXPORT_SYMBOL(neigh_create);
-EXPORT_SYMBOL(neigh_destroy);
-EXPORT_SYMBOL(neigh_event_ns);
-EXPORT_SYMBOL(neigh_ifdown);
-EXPORT_SYMBOL(neigh_lookup);
-EXPORT_SYMBOL(neigh_lookup_nodev);
-EXPORT_SYMBOL(neigh_parms_alloc);
-EXPORT_SYMBOL(neigh_parms_release);
-EXPORT_SYMBOL(neigh_rand_reach_time);
-EXPORT_SYMBOL(neigh_resolve_output);
-EXPORT_SYMBOL(neigh_table_clear);
-EXPORT_SYMBOL(neigh_table_init);
-EXPORT_SYMBOL(neigh_table_init_no_netlink);
-EXPORT_SYMBOL(neigh_update);
-EXPORT_SYMBOL(pneigh_enqueue);
-EXPORT_SYMBOL(pneigh_lookup);
-EXPORT_SYMBOL_GPL(__pneigh_lookup);
-
-#ifdef CONFIG_ARPD
-EXPORT_SYMBOL(neigh_app_ns);
-#endif
-#ifdef CONFIG_SYSCTL
-EXPORT_SYMBOL(neigh_sysctl_register);
-EXPORT_SYMBOL(neigh_sysctl_unregister);
-#endif
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7635d3f72723..4e7b847347f7 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -87,6 +87,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
return ret;
}
+NETDEVICE_SHOW(dev_id, fmt_hex);
NETDEVICE_SHOW(addr_len, fmt_dec);
NETDEVICE_SHOW(iflink, fmt_dec);
NETDEVICE_SHOW(ifindex, fmt_dec);
@@ -210,6 +211,7 @@ static ssize_t store_tx_queue_len(struct device *dev,
static struct device_attribute net_class_attributes[] = {
__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
+ __ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
__ATTR(iflink, S_IRUGO, show_iflink, NULL),
__ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
__ATTR(features, S_IRUGO, show_features, NULL),
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7b660834a4c2..72b4c184dd84 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -5,7 +5,9 @@
#include <linux/list.h>
#include <linux/delay.h>
#include <linux/sched.h>
+#include <linux/idr.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
/*
* Our network namespace constructor/destructor lists
@@ -20,6 +22,8 @@ LIST_HEAD(net_namespace_list);
struct net init_net;
EXPORT_SYMBOL(init_net);
+#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
+
/*
* setup_net runs the initializers for the network namespace object.
*/
@@ -28,9 +32,22 @@ static __net_init int setup_net(struct net *net)
/* Must be called with net_mutex held */
struct pernet_operations *ops;
int error;
+ struct net_generic *ng;
atomic_set(&net->count, 1);
+#ifdef NETNS_REFCNT_DEBUG
atomic_set(&net->use_count, 0);
+#endif
+
+ error = -ENOMEM;
+ ng = kzalloc(sizeof(struct net_generic) +
+ INITIAL_NET_GEN_PTRS * sizeof(void *), GFP_KERNEL);
+ if (ng == NULL)
+ goto out;
+
+ ng->len = INITIAL_NET_GEN_PTRS;
+ INIT_RCU_HEAD(&ng->rcu);
+ rcu_assign_pointer(net->gen, ng);
error = 0;
list_for_each_entry(ops, &pernet_list, list) {
@@ -53,6 +70,7 @@ out_undo:
}
rcu_barrier();
+ kfree(ng);
goto out;
}
@@ -70,11 +88,13 @@ static void net_free(struct net *net)
if (!net)
return;
+#ifdef NETNS_REFCNT_DEBUG
if (unlikely(atomic_read(&net->use_count) != 0)) {
printk(KERN_EMERG "network namespace not free! Usage: %d\n",
atomic_read(&net->use_count));
return;
}
+#endif
kmem_cache_free(net_cachep, net);
}
@@ -253,6 +273,8 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
}
#endif
+static DEFINE_IDA(net_generic_ids);
+
/**
* register_pernet_subsys - register a network namespace subsystem
* @ops: pernet operations structure for the subsystem
@@ -330,6 +352,30 @@ int register_pernet_device(struct pernet_operations *ops)
}
EXPORT_SYMBOL_GPL(register_pernet_device);
+int register_pernet_gen_device(int *id, struct pernet_operations *ops)
+{
+ int error;
+ mutex_lock(&net_mutex);
+again:
+ error = ida_get_new_above(&net_generic_ids, 1, id);
+ if (error) {
+ if (error == -EAGAIN) {
+ ida_pre_get(&net_generic_ids, GFP_KERNEL);
+ goto again;
+ }
+ goto out;
+ }
+ error = register_pernet_operations(&pernet_list, ops);
+ if (error)
+ ida_remove(&net_generic_ids, *id);
+ else if (first_device == &pernet_list)
+ first_device = &ops->list;
+out:
+ mutex_unlock(&net_mutex);
+ return error;
+}
+EXPORT_SYMBOL_GPL(register_pernet_gen_device);
+
/**
* unregister_pernet_device - unregister a network namespace netdevice
* @ops: pernet operations structure to manipulate
@@ -348,3 +394,61 @@ void unregister_pernet_device(struct pernet_operations *ops)
mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
+
+void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
+{
+ mutex_lock(&net_mutex);
+ if (&ops->list == first_device)
+ first_device = first_device->next;
+ unregister_pernet_operations(ops);
+ ida_remove(&net_generic_ids, id);
+ mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
+
+static void net_generic_release(struct rcu_head *rcu)
+{
+ struct net_generic *ng;
+
+ ng = container_of(rcu, struct net_generic, rcu);
+ kfree(ng);
+}
+
+int net_assign_generic(struct net *net, int id, void *data)
+{
+ struct net_generic *ng, *old_ng;
+
+ BUG_ON(!mutex_is_locked(&net_mutex));
+ BUG_ON(id == 0);
+
+ ng = old_ng = net->gen;
+ if (old_ng->len >= id)
+ goto assign;
+
+ ng = kzalloc(sizeof(struct net_generic) +
+ id * sizeof(void *), GFP_KERNEL);
+ if (ng == NULL)
+ return -ENOMEM;
+
+ /*
+ * Some synchronisation notes:
+ *
+ * The net_generic explores the net->gen array inside rcu
+ * read section. Besides once set the net->gen->ptr[x]
+ * pointer never changes (see rules in netns/generic.h).
+ *
+ * That said, we simply duplicate this array and schedule
+ * the old copy for kfree after a grace period.
+ */
+
+ ng->len = id;
+ INIT_RCU_HEAD(&ng->rcu);
+ memcpy(&ng->ptr, &old_ng->ptr, old_ng->len);
+
+ rcu_assign_pointer(net->gen, ng);
+ call_rcu(&old_ng->rcu, net_generic_release);
+assign:
+ ng->ptr[id - 1] = data;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(net_assign_generic);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c635de52526c..b04d643fc3c7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -390,9 +390,7 @@ static void arp_reply(struct sk_buff *skb)
if (skb->dev->flags & IFF_NOARP)
return;
- if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
- (2 * skb->dev->addr_len) +
- (2 * sizeof(u32)))))
+ if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
return;
skb_reset_network_header(skb);
@@ -420,7 +418,7 @@ static void arp_reply(struct sk_buff *skb)
ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
return;
- size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
+ size = arp_hdr_len(skb->dev);
send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
LL_RESERVED_SPACE(np->dev));
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 20e63b302ba6..a803b442234c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1874,7 +1874,7 @@ static int pktgen_device_event(struct notifier_block *unused,
{
struct net_device *dev = ptr;
- if (dev->nd_net != &init_net)
+ if (dev_net(dev) != &init_net)
return NOTIFY_DONE;
/* It is OK that we do not hold the group lock right now,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2bd9c5f7627d..cf857c4dc7b1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -82,6 +82,11 @@ int rtnl_trylock(void)
return mutex_trylock(&rtnl_mutex);
}
+int rtnl_is_locked(void)
+{
+ return mutex_is_locked(&rtnl_mutex);
+}
+
static struct rtnl_link *rtnl_msg_handlers[NPROTO];
static inline int rtm_msgindex(int msgtype)
@@ -269,6 +274,26 @@ int rtnl_link_register(struct rtnl_link_ops *ops)
EXPORT_SYMBOL_GPL(rtnl_link_register);
+static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
+{
+ struct net_device *dev;
+restart:
+ for_each_netdev(net, dev) {
+ if (dev->rtnl_link_ops == ops) {
+ ops->dellink(dev);
+ goto restart;
+ }
+ }
+}
+
+void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
+{
+ rtnl_lock();
+ __rtnl_kill_links(net, ops);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(rtnl_kill_links);
+
/**
* __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
* @ops: struct rtnl_link_ops * to unregister
@@ -277,17 +302,10 @@ EXPORT_SYMBOL_GPL(rtnl_link_register);
*/
void __rtnl_link_unregister(struct rtnl_link_ops *ops)
{
- struct net_device *dev, *n;
struct net *net;
for_each_net(net) {
-restart:
- for_each_netdev_safe(net, dev, n) {
- if (dev->rtnl_link_ops == ops) {
- ops->dellink(dev);
- goto restart;
- }
- }
+ __rtnl_kill_links(net, ops);
}
list_del(&ops->list);
}
@@ -662,7 +680,7 @@ nla_put_failure:
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
int idx;
int s_idx = cb->args[0];
struct net_device *dev;
@@ -879,7 +897,7 @@ errout:
static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct net_device *dev;
int err;
@@ -921,7 +939,7 @@ errout:
static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
struct net_device *dev;
struct ifinfomsg *ifm;
@@ -972,7 +990,7 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname,
goto err_free;
}
- dev->nd_net = net;
+ dev_net_set(dev, net);
dev->rtnl_link_ops = ops;
if (tb[IFLA_MTU])
@@ -1000,7 +1018,7 @@ err:
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
struct net_device *dev;
struct ifinfomsg *ifm;
@@ -1132,7 +1150,7 @@ replay:
static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
@@ -1198,7 +1216,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
{
- struct net *net = dev->nd_net;
+ struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -1227,7 +1245,7 @@ static int rtattr_max;
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
rtnl_doit_func doit;
int sz_idx, kind;
int min_len;
@@ -1389,6 +1407,7 @@ EXPORT_SYMBOL(rtnetlink_put_metrics);
EXPORT_SYMBOL(rtnl_lock);
EXPORT_SYMBOL(rtnl_trylock);
EXPORT_SYMBOL(rtnl_unlock);
+EXPORT_SYMBOL(rtnl_is_locked);
EXPORT_SYMBOL(rtnl_unicast);
EXPORT_SYMBOL(rtnl_notify);
EXPORT_SYMBOL(rtnl_set_sk_err);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 608701339620..4fe605fa6f8a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -263,6 +263,28 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
return skb;
}
+/**
+ * dev_alloc_skb - allocate an skbuff for receiving
+ * @length: length to allocate
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has unspecified headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned if there is no free memory. Although this function
+ * allocates memory it can be called from an interrupt.
+ */
+struct sk_buff *dev_alloc_skb(unsigned int length)
+{
+ /*
+ * There is more code here than it seems:
+ * __dev_alloc_skb is an inline
+ */
+ return __dev_alloc_skb(length, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(dev_alloc_skb);
+
static void skb_drop_list(struct sk_buff **listp)
{
struct sk_buff *list = *listp;
@@ -857,6 +879,78 @@ free_skb:
return err;
}
+/**
+ * skb_put - add data to a buffer
+ * @skb: buffer to use
+ * @len: amount of data to add
+ *
+ * This function extends the used data area of the buffer. If this would
+ * exceed the total buffer size the kernel will panic. A pointer to the
+ * first byte of the extra data is returned.
+ */
+unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+{
+ unsigned char *tmp = skb_tail_pointer(skb);
+ SKB_LINEAR_ASSERT(skb);
+ skb->tail += len;
+ skb->len += len;
+ if (unlikely(skb->tail > skb->end))
+ skb_over_panic(skb, len, __builtin_return_address(0));
+ return tmp;
+}
+EXPORT_SYMBOL(skb_put);
+
+/**
+ * skb_push - add data to the start of a buffer
+ * @skb: buffer to use
+ * @len: amount of data to add
+ *
+ * This function extends the used data area of the buffer at the buffer
+ * start. If this would exceed the total buffer headroom the kernel will
+ * panic. A pointer to the first byte of the extra data is returned.
+ */
+unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+{
+ skb->data -= len;
+ skb->len += len;
+ if (unlikely(skb->data<skb->head))
+ skb_under_panic(skb, len, __builtin_return_address(0));
+ return skb->data;
+}
+EXPORT_SYMBOL(skb_push);
+
+/**
+ * skb_pull - remove data from the start of a buffer
+ * @skb: buffer to use
+ * @len: amount of data to remove
+ *
+ * This function removes data from the start of a buffer, returning
+ * the memory to the headroom. A pointer to the next data in the buffer
+ * is returned. Once the data has been pulled future pushes will overwrite
+ * the old data.
+ */
+unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
+{
+ return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
+}
+EXPORT_SYMBOL(skb_pull);
+
+/**
+ * skb_trim - remove end from a buffer
+ * @skb: buffer to alter
+ * @len: new length
+ *
+ * Cut the length of a buffer down by removing data from the tail. If
+ * the buffer is already under the length specified it is not modified.
+ * The skb must be linear.
+ */
+void skb_trim(struct sk_buff *skb, unsigned int len)
+{
+ if (skb->len > len)
+ __skb_trim(skb, len);
+}
+EXPORT_SYMBOL(skb_trim);
+
/* Trims skb to length len. It can change skb pointers.
*/
@@ -1766,7 +1860,7 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
unsigned long flags;
spin_lock_irqsave(&list->lock, flags);
- __skb_append(old, newsk, list);
+ __skb_queue_after(list, old, newsk);
spin_unlock_irqrestore(&list->lock, flags);
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 7a0567b4b2c9..5dbb81bc9673 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -372,7 +372,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
- struct net *net = sk->sk_net;
+ struct net *net = sock_net(sk);
char devname[IFNAMSIZ];
int index;
@@ -450,15 +450,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
* Options without arguments
*/
-#ifdef SO_DONTLINGER /* Compatibility item... */
- if (optname == SO_DONTLINGER) {
- lock_sock(sk);
- sock_reset_flag(sk, SOCK_LINGER);
- release_sock(sk);
- return 0;
- }
-#endif
-
if (optname == SO_BINDTODEVICE)
return sock_bindtodevice(sk, optval, optlen);
@@ -942,7 +933,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
* @family: protocol family
* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
* @prot: struct proto associated with this new sock instance
- * @zero_it: if we should zero the newly allocated sock
*/
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot)
@@ -958,7 +948,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
*/
sk->sk_prot = sk->sk_prot_creator = prot;
sock_lock_init(sk);
- sk->sk_net = get_net(net);
+ sock_net_set(sk, get_net(net));
}
return sk;
@@ -981,12 +971,32 @@ void sk_free(struct sock *sk)
if (atomic_read(&sk->sk_omem_alloc))
printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
- __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
+ __func__, atomic_read(&sk->sk_omem_alloc));
- put_net(sk->sk_net);
+ put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
}
+/*
+ * Last sock_put should drop referrence to sk->sk_net. It has already
+ * been dropped in sk_change_net. Taking referrence to stopping namespace
+ * is not an option.
+ * Take referrence to a socket to remove it from hash _alive_ and after that
+ * destroy it in the context of init_net.
+ */
+void sk_release_kernel(struct sock *sk)
+{
+ if (sk == NULL || sk->sk_socket == NULL)
+ return;
+
+ sock_hold(sk);
+ sock_release(sk->sk_socket);
+ release_net(sock_net(sk));
+ sock_net_set(sk, get_net(&init_net));
+ sock_put(sk);
+}
+EXPORT_SYMBOL(sk_release_kernel);
+
struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
{
struct sock *newsk;
@@ -998,7 +1008,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
sock_copy(newsk, sk);
/* SANITY */
- get_net(newsk->sk_net);
+ get_net(sock_net(newsk));
sk_node_init(&newsk->sk_node);
sock_lock_init(newsk);
bh_lock_sock(newsk);
@@ -1076,10 +1086,12 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
if (sk_can_gso(sk)) {
- if (dst->header_len)
+ if (dst->header_len) {
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
- else
+ } else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
+ sk->sk_gso_max_size = dst->dev->gso_max_size;
+ }
}
}
EXPORT_SYMBOL_GPL(sk_setup_caps);
@@ -1919,16 +1931,113 @@ EXPORT_SYMBOL(sk_common_release);
static DEFINE_RWLOCK(proto_list_lock);
static LIST_HEAD(proto_list);
+#ifdef CONFIG_PROC_FS
+#define PROTO_INUSE_NR 64 /* should be enough for the first time */
+struct prot_inuse {
+ int val[PROTO_INUSE_NR];
+};
+
+static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
+
+#ifdef CONFIG_NET_NS
+void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
+{
+ int cpu = smp_processor_id();
+ per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
+
+int sock_prot_inuse_get(struct net *net, struct proto *prot)
+{
+ int cpu, idx = prot->inuse_idx;
+ int res = 0;
+
+ for_each_possible_cpu(cpu)
+ res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
+
+ return res >= 0 ? res : 0;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+
+static int sock_inuse_init_net(struct net *net)
+{
+ net->core.inuse = alloc_percpu(struct prot_inuse);
+ return net->core.inuse ? 0 : -ENOMEM;
+}
+
+static void sock_inuse_exit_net(struct net *net)
+{
+ free_percpu(net->core.inuse);
+}
+
+static struct pernet_operations net_inuse_ops = {
+ .init = sock_inuse_init_net,
+ .exit = sock_inuse_exit_net,
+};
+
+static __init int net_inuse_init(void)
+{
+ if (register_pernet_subsys(&net_inuse_ops))
+ panic("Cannot initialize net inuse counters");
+
+ return 0;
+}
+
+core_initcall(net_inuse_init);
+#else
+static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
+
+void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
+{
+ __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
+
+int sock_prot_inuse_get(struct net *net, struct proto *prot)
+{
+ int cpu, idx = prot->inuse_idx;
+ int res = 0;
+
+ for_each_possible_cpu(cpu)
+ res += per_cpu(prot_inuse, cpu).val[idx];
+
+ return res >= 0 ? res : 0;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+#endif
+
+static void assign_proto_idx(struct proto *prot)
+{
+ prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
+
+ if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
+ printk(KERN_ERR "PROTO_INUSE_NR exhausted\n");
+ return;
+ }
+
+ set_bit(prot->inuse_idx, proto_inuse_idx);
+}
+
+static void release_proto_idx(struct proto *prot)
+{
+ if (prot->inuse_idx != PROTO_INUSE_NR - 1)
+ clear_bit(prot->inuse_idx, proto_inuse_idx);
+}
+#else
+static inline void assign_proto_idx(struct proto *prot)
+{
+}
+
+static inline void release_proto_idx(struct proto *prot)
+{
+}
+#endif
+
int proto_register(struct proto *prot, int alloc_slab)
{
char *request_sock_slab_name = NULL;
char *timewait_sock_slab_name;
- if (sock_prot_inuse_init(prot) != 0) {
- printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name);
- goto out;
- }
-
if (alloc_slab) {
prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
SLAB_HWCACHE_ALIGN, NULL);
@@ -1936,7 +2045,7 @@ int proto_register(struct proto *prot, int alloc_slab)
if (prot->slab == NULL) {
printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
prot->name);
- goto out_free_inuse;
+ goto out;
}
if (prot->rsk_prot != NULL) {
@@ -1979,6 +2088,7 @@ int proto_register(struct proto *prot, int alloc_slab)
write_lock(&proto_list_lock);
list_add(&prot->node, &proto_list);
+ assign_proto_idx(prot);
write_unlock(&proto_list_lock);
return 0;
@@ -1994,8 +2104,6 @@ out_free_request_sock_slab_name:
out_free_sock_slab:
kmem_cache_destroy(prot->slab);
prot->slab = NULL;
-out_free_inuse:
- sock_prot_inuse_free(prot);
out:
return -ENOBUFS;
}
@@ -2005,11 +2113,10 @@ EXPORT_SYMBOL(proto_register);
void proto_unregister(struct proto *prot)
{
write_lock(&proto_list_lock);
+ release_proto_idx(prot);
list_del(&prot->node);
write_unlock(&proto_list_lock);
- sock_prot_inuse_free(prot);
-
if (prot->slab != NULL) {
kmem_cache_destroy(prot->slab);
prot->slab = NULL;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 130338f83ae5..5fc801057244 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -127,7 +127,7 @@ static struct ctl_table net_core_table[] = {
{
.ctl_name = NET_CORE_SOMAXCONN,
.procname = "somaxconn",
- .data = &init_net.sysctl_somaxconn,
+ .data = &init_net.core.sysctl_somaxconn,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
@@ -161,7 +161,7 @@ static __net_init int sysctl_core_net_init(struct net *net)
{
struct ctl_table *tbl, *tmp;
- net->sysctl_somaxconn = SOMAXCONN;
+ net->core.sysctl_somaxconn = SOMAXCONN;
tbl = net_core_table;
if (net != &init_net) {
@@ -178,9 +178,9 @@ static __net_init int sysctl_core_net_init(struct net *net)
}
}
- net->sysctl_core_hdr = register_net_sysctl_table(net,
+ net->core.sysctl_hdr = register_net_sysctl_table(net,
net_core_path, tbl);
- if (net->sysctl_core_hdr == NULL)
+ if (net->core.sysctl_hdr == NULL)
goto err_reg;
return 0;
@@ -196,8 +196,8 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
{
struct ctl_table *tbl;
- tbl = net->sysctl_core_hdr->ctl_table_arg;
- unregister_net_sysctl_table(net->sysctl_core_hdr);
+ tbl = net->core.sysctl_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->core.sysctl_hdr);
BUG_ON(tbl == net_core_table);
kfree(tbl);
}