summaryrefslogtreecommitdiff
path: root/net/core/neighbour.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/neighbour.c')
-rw-r--r--net/core/neighbour.c486
1 files changed, 303 insertions, 183 deletions
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a4e0a7482c2b..8cc8f9a79db9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,7 +122,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
unsigned long neigh_rand_reach_time(unsigned long base)
{
- return (base ? (net_random() % base) + (base >> 1) : 0);
+ return base ? (net_random() % base) + (base >> 1) : 0;
}
EXPORT_SYMBOL(neigh_rand_reach_time);
@@ -131,15 +131,20 @@ static int neigh_forced_gc(struct neigh_table *tbl)
{
int shrunk = 0;
int i;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
write_lock_bh(&tbl->lock);
- for (i = 0; i <= tbl->hash_mask; i++) {
- struct neighbour *n, **np;
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+ for (i = 0; i <= nht->hash_mask; i++) {
+ struct neighbour *n;
+ struct neighbour __rcu **np;
- np = &tbl->hash_buckets[i];
- while ((n = *np) != NULL) {
+ np = &nht->hash_buckets[i];
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
/* Neighbour record may be discarded if:
* - nobody refers to it.
* - it is not permanent
@@ -147,7 +152,9 @@ static int neigh_forced_gc(struct neigh_table *tbl)
write_lock(&n->lock);
if (atomic_read(&n->refcnt) == 1 &&
!(n->nud_state & NUD_PERMANENT)) {
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
n->dead = 1;
shrunk = 1;
write_unlock(&n->lock);
@@ -199,16 +206,24 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
{
int i;
+ struct neigh_hash_table *nht;
- for (i = 0; i <= tbl->hash_mask; i++) {
- struct neighbour *n, **np = &tbl->hash_buckets[i];
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
- while ((n = *np) != NULL) {
+ for (i = 0; i <= nht->hash_mask; i++) {
+ struct neighbour *n;
+ struct neighbour __rcu **np = &nht->hash_buckets[i];
+
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
if (dev && n->dev != dev) {
np = &n->next;
continue;
}
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
write_lock(&n->lock);
neigh_del_timer(n);
n->dead = 1;
@@ -279,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
skb_queue_head_init(&n->arp_queue);
rwlock_init(&n->lock);
+ seqlock_init(&n->ha_lock);
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
@@ -297,64 +313,86 @@ out_entries:
goto out;
}
-static struct neighbour **neigh_hash_alloc(unsigned int entries)
+static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
{
- unsigned long size = entries * sizeof(struct neighbour *);
- struct neighbour **ret;
+ size_t size = entries * sizeof(struct neighbour *);
+ struct neigh_hash_table *ret;
+ struct neighbour **buckets;
- if (size <= PAGE_SIZE) {
- ret = kzalloc(size, GFP_ATOMIC);
- } else {
- ret = (struct neighbour **)
- __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
+ ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
+ if (!ret)
+ return NULL;
+ if (size <= PAGE_SIZE)
+ buckets = kzalloc(size, GFP_ATOMIC);
+ else
+ buckets = (struct neighbour **)
+ __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
+ get_order(size));
+ if (!buckets) {
+ kfree(ret);
+ return NULL;
}
+ rcu_assign_pointer(ret->hash_buckets, buckets);
+ ret->hash_mask = entries - 1;
+ get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
return ret;
}
-static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
+static void neigh_hash_free_rcu(struct rcu_head *head)
{
- unsigned long size = entries * sizeof(struct neighbour *);
+ struct neigh_hash_table *nht = container_of(head,
+ struct neigh_hash_table,
+ rcu);
+ size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
+ struct neighbour **buckets = nht->hash_buckets;
if (size <= PAGE_SIZE)
- kfree(hash);
+ kfree(buckets);
else
- free_pages((unsigned long)hash, get_order(size));
+ free_pages((unsigned long)buckets, get_order(size));
+ kfree(nht);
}
-static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
+static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
+ unsigned long new_entries)
{
- struct neighbour **new_hash, **old_hash;
- unsigned int i, new_hash_mask, old_entries;
+ unsigned int i, hash;
+ struct neigh_hash_table *new_nht, *old_nht;
NEIGH_CACHE_STAT_INC(tbl, hash_grows);
BUG_ON(!is_power_of_2(new_entries));
- new_hash = neigh_hash_alloc(new_entries);
- if (!new_hash)
- return;
-
- old_entries = tbl->hash_mask + 1;
- new_hash_mask = new_entries - 1;
- old_hash = tbl->hash_buckets;
+ old_nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+ new_nht = neigh_hash_alloc(new_entries);
+ if (!new_nht)
+ return old_nht;
- get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
- for (i = 0; i < old_entries; i++) {
+ for (i = 0; i <= old_nht->hash_mask; i++) {
struct neighbour *n, *next;
- for (n = old_hash[i]; n; n = next) {
- unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
-
- hash_val &= new_hash_mask;
- next = n->next;
-
- n->next = new_hash[hash_val];
- new_hash[hash_val] = n;
+ for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
+ lockdep_is_held(&tbl->lock));
+ n != NULL;
+ n = next) {
+ hash = tbl->hash(n->primary_key, n->dev,
+ new_nht->hash_rnd);
+
+ hash &= new_nht->hash_mask;
+ next = rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock));
+
+ rcu_assign_pointer(n->next,
+ rcu_dereference_protected(
+ new_nht->hash_buckets[hash],
+ lockdep_is_held(&tbl->lock)));
+ rcu_assign_pointer(new_nht->hash_buckets[hash], n);
}
}
- tbl->hash_buckets = new_hash;
- tbl->hash_mask = new_hash_mask;
- neigh_hash_free(old_hash, old_entries);
+ rcu_assign_pointer(tbl->nht, new_nht);
+ call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
+ return new_nht;
}
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
@@ -363,19 +401,26 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
struct neighbour *n;
int key_len = tbl->key_len;
u32 hash_val;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, lookups);
- read_lock_bh(&tbl->lock);
- hash_val = tbl->hash(pkey, dev);
- for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+ hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
+
+ for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+ n != NULL;
+ n = rcu_dereference_bh(n->next)) {
if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
- neigh_hold(n);
+ if (!atomic_inc_not_zero(&n->refcnt))
+ n = NULL;
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
}
}
- read_unlock_bh(&tbl->lock);
+
+ rcu_read_unlock_bh();
return n;
}
EXPORT_SYMBOL(neigh_lookup);
@@ -386,20 +431,27 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
struct neighbour *n;
int key_len = tbl->key_len;
u32 hash_val;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, lookups);
- read_lock_bh(&tbl->lock);
- hash_val = tbl->hash(pkey, NULL);
- for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+ hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
+
+ for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+ n != NULL;
+ n = rcu_dereference_bh(n->next)) {
if (!memcmp(n->primary_key, pkey, key_len) &&
net_eq(dev_net(n->dev), net)) {
- neigh_hold(n);
+ if (!atomic_inc_not_zero(&n->refcnt))
+ n = NULL;
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
}
}
- read_unlock_bh(&tbl->lock);
+
+ rcu_read_unlock_bh();
return n;
}
EXPORT_SYMBOL(neigh_lookup_nodev);
@@ -411,6 +463,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
int key_len = tbl->key_len;
int error;
struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+ struct neigh_hash_table *nht;
if (!n) {
rc = ERR_PTR(-ENOBUFS);
@@ -437,18 +490,24 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
write_lock_bh(&tbl->lock);
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
- if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
- neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
+ if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
+ nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
- hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+ hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
goto out_tbl_unlock;
}
- for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
+ for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
+ lockdep_is_held(&tbl->lock));
+ n1 != NULL;
+ n1 = rcu_dereference_protected(n1->next,
+ lockdep_is_held(&tbl->lock))) {
if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
neigh_hold(n1);
rc = n1;
@@ -456,10 +515,12 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
}
}
- n->next = tbl->hash_buckets[hash_val];
- tbl->hash_buckets[hash_val] = n;
n->dead = 0;
neigh_hold(n);
+ rcu_assign_pointer(n->next,
+ rcu_dereference_protected(nht->hash_buckets[hash_val],
+ lockdep_is_held(&tbl->lock)));
+ rcu_assign_pointer(nht->hash_buckets[hash_val], n);
write_unlock_bh(&tbl->lock);
NEIGH_PRINTK2("neigh %p is created.\n", n);
rc = n;
@@ -616,6 +677,12 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
neigh_parms_destroy(parms);
}
+static void neigh_destroy_rcu(struct rcu_head *head)
+{
+ struct neighbour *neigh = container_of(head, struct neighbour, rcu);
+
+ kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+}
/*
* neighbour must already be out of the table;
*
@@ -643,8 +710,7 @@ void neigh_destroy(struct neighbour *neigh)
write_seqlock_bh(&hh->hh_lock);
hh->hh_output = neigh_blackhole;
write_sequnlock_bh(&hh->hh_lock);
- if (atomic_dec_and_test(&hh->hh_refcnt))
- kfree(hh);
+ hh_cache_put(hh);
}
skb_queue_purge(&neigh->arp_queue);
@@ -655,7 +721,7 @@ void neigh_destroy(struct neighbour *neigh)
NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
atomic_dec(&neigh->tbl->entries);
- kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+ call_rcu(&neigh->rcu, neigh_destroy_rcu);
}
EXPORT_SYMBOL(neigh_destroy);
@@ -696,12 +762,16 @@ static void neigh_connect(struct neighbour *neigh)
static void neigh_periodic_work(struct work_struct *work)
{
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
- struct neighbour *n, **np;
+ struct neighbour *n;
+ struct neighbour __rcu **np;
unsigned int i;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
write_lock_bh(&tbl->lock);
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
/*
* periodically recompute ReachableTime from random function
@@ -715,10 +785,11 @@ static void neigh_periodic_work(struct work_struct *work)
neigh_rand_reach_time(p->base_reachable_time);
}
- for (i = 0 ; i <= tbl->hash_mask; i++) {
- np = &tbl->hash_buckets[i];
+ for (i = 0 ; i <= nht->hash_mask; i++) {
+ np = &nht->hash_buckets[i];
- while ((n = *np) != NULL) {
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
unsigned int state;
write_lock(&n->lock);
@@ -766,9 +837,9 @@ next_elt:
static __inline__ int neigh_max_probes(struct neighbour *n)
{
struct neigh_parms *p = n->parms;
- return (n->nud_state & NUD_PROBE ?
+ return (n->nud_state & NUD_PROBE) ?
p->ucast_probes :
- p->ucast_probes + p->app_probes + p->mcast_probes);
+ p->ucast_probes + p->app_probes + p->mcast_probes;
}
static void neigh_invalidate(struct neighbour *neigh)
@@ -945,7 +1016,7 @@ out_unlock_bh:
}
EXPORT_SYMBOL(__neigh_event_send);
-static void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(const struct neighbour *neigh)
{
struct hh_cache *hh;
void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1081,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
}
if (lladdr != neigh->ha) {
+ write_seqlock(&neigh->ha_lock);
memcpy(&neigh->ha, lladdr, dev->addr_len);
+ write_sequnlock(&neigh->ha_lock);
neigh_update_hhs(neigh);
if (!(new & NUD_CONNECTED))
neigh->confirmed = jiffies -
@@ -1139,44 +1212,73 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
}
EXPORT_SYMBOL(neigh_event_ns);
+static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
+ __be16 protocol)
+{
+ struct hh_cache *hh;
+
+ smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
+ for (hh = n->hh; hh; hh = hh->hh_next) {
+ if (hh->hh_type == protocol) {
+ atomic_inc(&hh->hh_refcnt);
+ if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
+ hh_cache_put(hh);
+ return true;
+ }
+ }
+ return false;
+}
+
+/* called with read_lock_bh(&n->lock); */
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
__be16 protocol)
{
struct hh_cache *hh;
struct net_device *dev = dst->dev;
- for (hh = n->hh; hh; hh = hh->hh_next)
- if (hh->hh_type == protocol)
- break;
+ if (likely(neigh_hh_lookup(n, dst, protocol)))
+ return;
- if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
- seqlock_init(&hh->hh_lock);
- hh->hh_type = protocol;
- atomic_set(&hh->hh_refcnt, 0);
- hh->hh_next = NULL;
+ /* slow path */
+ hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
+ if (!hh)
+ return;
- if (dev->header_ops->cache(n, hh)) {
- kfree(hh);
- hh = NULL;
- } else {
- atomic_inc(&hh->hh_refcnt);
- hh->hh_next = n->hh;
- n->hh = hh;
- if (n->nud_state & NUD_CONNECTED)
- hh->hh_output = n->ops->hh_output;
- else
- hh->hh_output = n->ops->output;
- }
+ seqlock_init(&hh->hh_lock);
+ hh->hh_type = protocol;
+ atomic_set(&hh->hh_refcnt, 2);
+
+ if (dev->header_ops->cache(n, hh)) {
+ kfree(hh);
+ return;
}
- if (hh) {
- atomic_inc(&hh->hh_refcnt);
- dst->hh = hh;
+
+ write_lock_bh(&n->lock);
+
+ /* must check if another thread already did the insert */
+ if (neigh_hh_lookup(n, dst, protocol)) {
+ kfree(hh);
+ goto end;
}
+
+ if (n->nud_state & NUD_CONNECTED)
+ hh->hh_output = n->ops->hh_output;
+ else
+ hh->hh_output = n->ops->output;
+
+ hh->hh_next = n->hh;
+ smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
+ n->hh = hh;
+
+ if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
+ hh_cache_put(hh);
+end:
+ write_unlock_bh(&n->lock);
}
/* This function can be used in contexts, where only old dev_queue_xmit
- worked, f.e. if you want to override normal output path (eql, shaper),
- but resolution is not made yet.
+ * worked, f.e. if you want to override normal output path (eql, shaper),
+ * but resolution is not made yet.
*/
int neigh_compat_output(struct sk_buff *skb)
@@ -1210,19 +1312,19 @@ int neigh_resolve_output(struct sk_buff *skb)
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
- if (dev->header_ops->cache && !dst->hh) {
- write_lock_bh(&neigh->lock);
- if (!dst->hh)
- neigh_hh_init(neigh, dst, dst->ops->protocol);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
- write_unlock_bh(&neigh->lock);
- } else {
- read_lock_bh(&neigh->lock);
+ unsigned int seq;
+
+ if (dev->header_ops->cache &&
+ !dst->hh &&
+ !(dst->flags & DST_NOCACHE))
+ neigh_hh_init(neigh, dst, dst->ops->protocol);
+
+ do {
+ seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
- read_unlock_bh(&neigh->lock);
- }
+ } while (read_seqretry(&neigh->ha_lock, seq));
+
if (err >= 0)
rc = neigh->ops->queue_xmit(skb);
else
@@ -1248,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
struct neighbour *neigh = dst->neighbour;
struct net_device *dev = neigh->dev;
+ unsigned int seq;
__skb_pull(skb, skb_network_offset(skb));
- read_lock_bh(&neigh->lock);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
- read_unlock_bh(&neigh->lock);
+ do {
+ seq = read_seqbegin(&neigh->ha_lock);
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ neigh->ha, NULL, skb->len);
+ } while (read_seqretry(&neigh->ha_lock, seq));
+
if (err >= 0)
err = neigh->ops->queue_xmit(skb);
else {
@@ -1436,17 +1541,14 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
panic("cannot create neighbour proc dir entry");
#endif
- tbl->hash_mask = 1;
- tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
+ tbl->nht = neigh_hash_alloc(8);
phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
- if (!tbl->hash_buckets || !tbl->phash_buckets)
+ if (!tbl->nht || !tbl->phash_buckets)
panic("cannot allocate neighbour cache hashes");
- get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
-
rwlock_init(&tbl->lock);
INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
@@ -1486,8 +1588,7 @@ int neigh_table_clear(struct neigh_table *tbl)
struct neigh_table **tp;
/* It is not clean... Fix it to unload IPv6 module safely */
- cancel_delayed_work(&tbl->gc_work);
- flush_scheduled_work();
+ cancel_delayed_work_sync(&tbl->gc_work);
del_timer_sync(&tbl->proxy_timer);
pneigh_queue_purge(&tbl->proxy_queue);
neigh_ifdown(tbl, NULL);
@@ -1502,8 +1603,8 @@ int neigh_table_clear(struct neigh_table *tbl)
}
write_unlock(&neigh_tbl_lock);
- neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
- tbl->hash_buckets = NULL;
+ call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+ tbl->nht = NULL;
kfree(tbl->phash_buckets);
tbl->phash_buckets = NULL;
@@ -1529,6 +1630,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct net_device *dev = NULL;
int err = -EINVAL;
+ ASSERT_RTNL();
if (nlmsg_len(nlh) < sizeof(*ndm))
goto out;
@@ -1538,7 +1640,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
- dev = dev_get_by_index(net, ndm->ndm_ifindex);
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
err = -ENODEV;
goto out;
@@ -1554,34 +1656,31 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
read_unlock(&neigh_tbl_lock);
if (nla_len(dst_attr) < tbl->key_len)
- goto out_dev_put;
+ goto out;
if (ndm->ndm_flags & NTF_PROXY) {
err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
- goto out_dev_put;
+ goto out;
}
if (dev == NULL)
- goto out_dev_put;
+ goto out;
neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
if (neigh == NULL) {
err = -ENOENT;
- goto out_dev_put;
+ goto out;
}
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE |
NEIGH_UPDATE_F_ADMIN);
neigh_release(neigh);
- goto out_dev_put;
+ goto out;
}
read_unlock(&neigh_tbl_lock);
err = -EAFNOSUPPORT;
-out_dev_put:
- if (dev)
- dev_put(dev);
out:
return err;
}
@@ -1595,6 +1694,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct net_device *dev = NULL;
int err;
+ ASSERT_RTNL();
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
if (err < 0)
goto out;
@@ -1605,14 +1705,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
- dev = dev_get_by_index(net, ndm->ndm_ifindex);
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
err = -ENODEV;
goto out;
}
if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
- goto out_dev_put;
+ goto out;
}
read_lock(&neigh_tbl_lock);
@@ -1626,7 +1726,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
read_unlock(&neigh_tbl_lock);
if (nla_len(tb[NDA_DST]) < tbl->key_len)
- goto out_dev_put;
+ goto out;
dst = nla_data(tb[NDA_DST]);
lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
@@ -1639,29 +1739,29 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
pn->flags = ndm->ndm_flags;
err = 0;
}
- goto out_dev_put;
+ goto out;
}
if (dev == NULL)
- goto out_dev_put;
+ goto out;
neigh = neigh_lookup(tbl, dst, dev);
if (neigh == NULL) {
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
err = -ENOENT;
- goto out_dev_put;
+ goto out;
}
neigh = __neigh_lookup_errno(tbl, dst, dev);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
- goto out_dev_put;
+ goto out;
}
} else {
if (nlh->nlmsg_flags & NLM_F_EXCL) {
err = -EEXIST;
neigh_release(neigh);
- goto out_dev_put;
+ goto out;
}
if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
@@ -1674,15 +1774,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
} else
err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
neigh_release(neigh);
- goto out_dev_put;
+ goto out;
}
read_unlock(&neigh_tbl_lock);
err = -EAFNOSUPPORT;
-
-out_dev_put:
- if (dev)
- dev_put(dev);
out:
return err;
}
@@ -1748,18 +1844,22 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
unsigned long now = jiffies;
unsigned int flush_delta = now - tbl->last_flush;
unsigned int rand_delta = now - tbl->last_rand;
-
+ struct neigh_hash_table *nht;
struct ndt_config ndc = {
.ndtc_key_len = tbl->key_len,
.ndtc_entry_size = tbl->entry_size,
.ndtc_entries = atomic_read(&tbl->entries),
.ndtc_last_flush = jiffies_to_msecs(flush_delta),
.ndtc_last_rand = jiffies_to_msecs(rand_delta),
- .ndtc_hash_rnd = tbl->hash_rnd,
- .ndtc_hash_mask = tbl->hash_mask,
.ndtc_proxy_qlen = tbl->proxy_queue.qlen,
};
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+ ndc.ndtc_hash_rnd = nht->hash_rnd;
+ ndc.ndtc_hash_mask = nht->hash_mask;
+ rcu_read_unlock_bh();
+
NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
}
@@ -2056,10 +2156,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
read_lock_bh(&neigh->lock);
ndm->ndm_state = neigh->nud_state;
- if ((neigh->nud_state & NUD_VALID) &&
- nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
- read_unlock_bh(&neigh->lock);
- goto nla_put_failure;
+ if (neigh->nud_state & NUD_VALID) {
+ char haddr[MAX_ADDR_LEN];
+
+ neigh_ha_snapshot(haddr, neigh, neigh->dev);
+ if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
+ read_unlock_bh(&neigh->lock);
+ goto nla_put_failure;
+ }
}
ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
@@ -2087,18 +2191,23 @@ static void neigh_update_notify(struct neighbour *neigh)
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct net * net = sock_net(skb->sk);
+ struct net *net = sock_net(skb->sk);
struct neighbour *n;
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
+ struct neigh_hash_table *nht;
- read_lock_bh(&tbl->lock);
- for (h = 0; h <= tbl->hash_mask; h++) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+
+ for (h = 0; h <= nht->hash_mask; h++) {
if (h < s_h)
continue;
if (h > s_h)
s_idx = 0;
- for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
+ for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
+ n != NULL;
+ n = rcu_dereference_bh(n->next)) {
if (!net_eq(dev_net(n->dev), net))
continue;
if (idx < s_idx)
@@ -2107,17 +2216,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
NLM_F_MULTI) <= 0) {
- read_unlock_bh(&tbl->lock);
rc = -1;
goto out;
}
- next:
+next:
idx++;
}
}
- read_unlock_bh(&tbl->lock);
rc = skb->len;
out:
+ rcu_read_unlock_bh();
cb->args[1] = h;
cb->args[2] = idx;
return rc;
@@ -2150,15 +2258,22 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
{
int chain;
+ struct neigh_hash_table *nht;
- read_lock_bh(&tbl->lock);
- for (chain = 0; chain <= tbl->hash_mask; chain++) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+
+ read_lock(&tbl->lock); /* avoid resizes */
+ for (chain = 0; chain <= nht->hash_mask; chain++) {
struct neighbour *n;
- for (n = tbl->hash_buckets[chain]; n; n = n->next)
+ for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
+ n != NULL;
+ n = rcu_dereference_bh(n->next))
cb(n, cookie);
}
- read_unlock_bh(&tbl->lock);
+ read_unlock(&tbl->lock);
+ rcu_read_unlock_bh();
}
EXPORT_SYMBOL(neigh_for_each);
@@ -2167,18 +2282,25 @@ void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *))
{
int chain;
+ struct neigh_hash_table *nht;
- for (chain = 0; chain <= tbl->hash_mask; chain++) {
- struct neighbour *n, **np;
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+ for (chain = 0; chain <= nht->hash_mask; chain++) {
+ struct neighbour *n;
+ struct neighbour __rcu **np;
- np = &tbl->hash_buckets[chain];
- while ((n = *np) != NULL) {
+ np = &nht->hash_buckets[chain];
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
int release;
write_lock(&n->lock);
release = cb(n);
if (release) {
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
n->dead = 1;
} else
np = &n->next;
@@ -2196,13 +2318,13 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
{
struct neigh_seq_state *state = seq->private;
struct net *net = seq_file_net(seq);
- struct neigh_table *tbl = state->tbl;
+ struct neigh_hash_table *nht = state->nht;
struct neighbour *n = NULL;
int bucket = state->bucket;
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
- for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
- n = tbl->hash_buckets[bucket];
+ for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
+ n = rcu_dereference_bh(nht->hash_buckets[bucket]);
while (n) {
if (!net_eq(dev_net(n->dev), net))
@@ -2219,8 +2341,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
break;
if (n->nud_state & ~NUD_NOARP)
break;
- next:
- n = n->next;
+next:
+ n = rcu_dereference_bh(n->next);
}
if (n)
@@ -2237,14 +2359,14 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
{
struct neigh_seq_state *state = seq->private;
struct net *net = seq_file_net(seq);
- struct neigh_table *tbl = state->tbl;
+ struct neigh_hash_table *nht = state->nht;
if (state->neigh_sub_iter) {
void *v = state->neigh_sub_iter(state, n, pos);
if (v)
return n;
}
- n = n->next;
+ n = rcu_dereference_bh(n->next);
while (1) {
while (n) {
@@ -2261,17 +2383,17 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
if (n->nud_state & ~NUD_NOARP)
break;
- next:
- n = n->next;
+next:
+ n = rcu_dereference_bh(n->next);
}
if (n)
break;
- if (++state->bucket > tbl->hash_mask)
+ if (++state->bucket > nht->hash_mask)
break;
- n = tbl->hash_buckets[state->bucket];
+ n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
}
if (n && pos)
@@ -2369,7 +2491,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
}
void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
- __acquires(tbl->lock)
+ __acquires(rcu_bh)
{
struct neigh_seq_state *state = seq->private;
@@ -2377,7 +2499,8 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
state->bucket = 0;
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
- read_lock_bh(&tbl->lock);
+ rcu_read_lock_bh();
+ state->nht = rcu_dereference_bh(tbl->nht);
return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
}
@@ -2411,12 +2534,9 @@ out:
EXPORT_SYMBOL(neigh_seq_next);
void neigh_seq_stop(struct seq_file *seq, void *v)
- __releases(tbl->lock)
+ __releases(rcu_bh)
{
- struct neigh_seq_state *state = seq->private;
- struct neigh_table *tbl = state->tbl;
-
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock_bh();
}
EXPORT_SYMBOL(neigh_seq_stop);