summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/rculist_nulls.h59
-rw-r--r--include/net/sock.h13
-rw-r--r--net/ipv4/inet_hashtables.c8
-rw-r--r--net/ipv4/inet_timewait_sock.c35
4 files changed, 90 insertions, 25 deletions
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 89186c499dd4..c26cb83ca071 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -53,6 +53,13 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
(*((struct hlist_nulls_node __rcu __force **)&(node)->next))
/**
+ * hlist_nulls_pprev_rcu - returns the dereferenced pprev of @node.
+ * @node: element of the list.
+ */
+#define hlist_nulls_pprev_rcu(node) \
+ (*((struct hlist_nulls_node __rcu __force **)(node)->pprev))
+
+/**
* hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
* @n: the element to delete from the hash list.
*
@@ -153,6 +160,58 @@ static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n)
}
/**
+ * hlist_nulls_replace_rcu - replace an old entry by a new one
+ * @old: the element to be replaced
+ * @new: the new element to insert
+ *
+ * Description:
+ * Replace the old entry with the new one in a RCU-protected hlist_nulls, while
+ * permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary (such as holding
+ * appropriate locks) to avoid racing with another list-mutation primitive, such
+ * as hlist_nulls_add_head_rcu() or hlist_nulls_del_rcu(), running on this same
+ * list. However, it is perfectly legal to run concurrently with the _rcu
+ * list-traversal primitives, such as hlist_nulls_for_each_entry_rcu().
+ */
+static inline void hlist_nulls_replace_rcu(struct hlist_nulls_node *old,
+ struct hlist_nulls_node *new)
+{
+ struct hlist_nulls_node *next = old->next;
+
+ WRITE_ONCE(new->next, next);
+ WRITE_ONCE(new->pprev, old->pprev);
+ rcu_assign_pointer(hlist_nulls_pprev_rcu(new), new);
+ if (!is_a_nulls(next))
+ WRITE_ONCE(next->pprev, &new->next);
+}
+
+/**
+ * hlist_nulls_replace_init_rcu - replace an old entry by a new one and
+ * initialize the old
+ * @old: the element to be replaced
+ * @new: the new element to insert
+ *
+ * Description:
+ * Replace the old entry with the new one in a RCU-protected hlist_nulls, while
+ * permitting racing traversals, and reinitialize the old entry.
+ *
+ * Note: @old must be hashed.
+ *
+ * The caller must take whatever precautions are necessary (such as holding
+ * appropriate locks) to avoid racing with another list-mutation primitive, such
+ * as hlist_nulls_add_head_rcu() or hlist_nulls_del_rcu(), running on this same
+ * list. However, it is perfectly legal to run concurrently with the _rcu
+ * list-traversal primitives, such as hlist_nulls_for_each_entry_rcu().
+ */
+static inline void hlist_nulls_replace_init_rcu(struct hlist_nulls_node *old,
+ struct hlist_nulls_node *new)
+{
+ hlist_nulls_replace_rcu(old, new);
+ WRITE_ONCE(old->pprev, NULL);
+}
+
+/**
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_nulls_node to use as a loop cursor.
diff --git a/include/net/sock.h b/include/net/sock.h
index 30ac2eb4ef9b..335d0da82d79 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -856,6 +856,19 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk)
return rc;
}
+static inline bool sk_nulls_replace_node_init_rcu(struct sock *old,
+ struct sock *new)
+{
+ if (sk_hashed(old)) {
+ hlist_nulls_replace_init_rcu(&old->sk_nulls_node,
+ &new->sk_nulls_node);
+ __sock_put(old);
+ return true;
+ }
+
+ return false;
+}
+
static inline void __sk_add_node(struct sock *sk, struct hlist_head *list)
{
hlist_add_head(&sk->sk_node, list);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index b7024e3d9ac3..f5826ec4bcaa 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -720,8 +720,11 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
spin_lock(lock);
if (osk) {
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
- ret = sk_nulls_del_node_init_rcu(osk);
- } else if (found_dup_sk) {
+ ret = sk_nulls_replace_node_init_rcu(osk, sk);
+ goto unlock;
+ }
+
+ if (found_dup_sk) {
*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
if (*found_dup_sk)
ret = false;
@@ -730,6 +733,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
if (ret)
__sk_nulls_add_node_rcu(sk, list);
+unlock:
spin_unlock(lock);
return ret;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c96d61d08854..d4c781a0667f 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -88,12 +88,6 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL_GPL(inet_twsk_put);
-static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
- struct hlist_nulls_head *list)
-{
- hlist_nulls_add_head_rcu(&tw->tw_node, list);
-}
-
static void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo)
{
__inet_twsk_schedule(tw, timeo, false);
@@ -113,13 +107,12 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
{
const struct inet_sock *inet = inet_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
struct inet_bind_hashbucket *bhead, *bhead2;
- /* Step 1: Put TW into bind hash. Original socket stays there too.
- Note, that any socket with inet->num != 0 MUST be bound in
- binding cache, even if it is closed.
+ /* Put TW into bind hash. Original socket stays there too.
+ * Note, that any socket with inet->num != 0 MUST be bound in
+ * binding cache, even if it is closed.
*/
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
hashinfo->bhash_size)];
@@ -141,19 +134,6 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
spin_lock(lock);
- /* Step 2: Hash TW into tcp ehash chain */
- inet_twsk_add_node_rcu(tw, &ehead->chain);
-
- /* Step 3: Remove SK from hash chain */
- if (__sk_nulls_del_node_init_rcu(sk))
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-
-
- /* Ensure above writes are committed into memory before updating the
- * refcount.
- * Provides ordering vs later refcount_inc().
- */
- smp_wmb();
/* tw_refcnt is set to 3 because we have :
* - one reference for bhash chain.
* - one reference for ehash chain.
@@ -163,6 +143,15 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
*/
refcount_set(&tw->tw_refcnt, 3);
+ /* Ensure tw_refcnt has been set before tw is published.
+ * smp_wmb() provides the necessary memory barrier to enforce this
+ * ordering.
+ */
+ smp_wmb();
+
+ hlist_nulls_replace_init_rcu(&sk->sk_nulls_node, &tw->tw_node);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+
inet_twsk_schedule(tw, timeo);
spin_unlock(lock);