summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorThomas Graf <tgraf@suug.ch>2015-01-03 01:00:20 +0300
committerDavid S. Miller <davem@davemloft.net>2015-01-03 22:32:57 +0300
commit97defe1ecf868b8127f8e62395499d6a06e4c4b1 (patch)
treed3ed6d3db4943e01b1ae58e73580537ba1642d9e /net
parent113948d841e8d78039e5dbbb5248f5b73e99eafa (diff)
downloadlinux-97defe1ecf868b8127f8e62395499d6a06e4c4b1.tar.xz
rhashtable: Per bucket locks & deferred expansion/shrinking
Introduces an array of spinlocks to protect bucket mutations. The number of spinlocks per CPU is configurable and selected based on the hash of the bucket. This allows for parallel insertions and removals of entries which do not share a lock. The patch also defers expansion and shrinking to a worker queue which allows insertion and removal from atomic context. Insertions and deletions may occur in parallel to it and are only held up briefly while the particular bucket is linked or unzipped. Mutations of the bucket table pointer is protected by a new mutex, read access is RCU protected. In the event of an expansion or shrinking, the new bucket table allocated is exposed as a so called future table as soon as the resize process starts. Lookups, deletions, and insertions will briefly use both tables. The future table becomes the main table after an RCU grace period and initial linking of the old to the new table was performed. Optimization of the chains to make use of the new number of buckets follows only the new table is in use. The side effect of this is that during that RCU grace period, a bucket traversal using any rht_for_each() variant on the main table will not see any insertions performed during the RCU grace period which would at that point land in the future table. The lookup will see them as it searches both tables if needed. Having multiple insertions and removals occur in parallel requires nelems to become an atomic counter. Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/nft_hash.c27
-rw-r--r--net/netlink/af_netlink.c15
2 files changed, 14 insertions, 28 deletions
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 7f903cf9a1b9..75887d7d2c6a 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -33,7 +33,7 @@ static bool nft_hash_lookup(const struct nft_set *set,
const struct nft_data *key,
struct nft_data *data)
{
- const struct rhashtable *priv = nft_set_priv(set);
+ struct rhashtable *priv = nft_set_priv(set);
const struct nft_hash_elem *he;
he = rhashtable_lookup(priv, key);
@@ -113,7 +113,7 @@ static bool nft_hash_compare(void *ptr, void *arg)
static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
{
- const struct rhashtable *priv = nft_set_priv(set);
+ struct rhashtable *priv = nft_set_priv(set);
struct nft_compare_arg arg = {
.set = set,
.elem = elem,
@@ -129,7 +129,7 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_iter *iter)
{
- const struct rhashtable *priv = nft_set_priv(set);
+ struct rhashtable *priv = nft_set_priv(set);
const struct bucket_table *tbl;
const struct nft_hash_elem *he;
struct nft_set_elem elem;
@@ -162,13 +162,6 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
return sizeof(struct rhashtable);
}
-#ifdef CONFIG_PROVE_LOCKING
-static int lockdep_nfnl_lock_is_held(void *parent)
-{
- return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES);
-}
-#endif
-
static int nft_hash_init(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const tb[])
@@ -182,9 +175,6 @@ static int nft_hash_init(const struct nft_set *set,
.hashfn = jhash,
.grow_decision = rht_grow_above_75,
.shrink_decision = rht_shrink_below_30,
-#ifdef CONFIG_PROVE_LOCKING
- .mutex_is_held = lockdep_nfnl_lock_is_held,
-#endif
};
return rhashtable_init(priv, &params);
@@ -192,16 +182,23 @@ static int nft_hash_init(const struct nft_set *set,
static void nft_hash_destroy(const struct nft_set *set)
{
- const struct rhashtable *priv = nft_set_priv(set);
- const struct bucket_table *tbl = priv->tbl;
+ struct rhashtable *priv = nft_set_priv(set);
+ const struct bucket_table *tbl;
struct nft_hash_elem *he;
struct rhash_head *pos, *next;
unsigned int i;
+ /* Stop an eventual async resizing */
+ priv->being_destroyed = true;
+ mutex_lock(&priv->mutex);
+
+ tbl = rht_dereference(priv->tbl, priv);
for (i = 0; i < tbl->size; i++) {
rht_for_each_entry_safe(he, pos, next, tbl, i, node)
nft_hash_elem_destroy(set, he);
}
+ mutex_unlock(&priv->mutex);
+
rhashtable_destroy(priv);
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 57449b6089c2..738c3bfaa564 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -114,15 +114,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
DEFINE_MUTEX(nl_sk_hash_lock);
EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
-#ifdef CONFIG_PROVE_LOCKING
-static int lockdep_nl_sk_hash_is_held(void *parent)
-{
- if (debug_locks)
- return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock);
- return 1;
-}
-#endif
-
static ATOMIC_NOTIFIER_HEAD(netlink_chain);
static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -1063,7 +1054,8 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
goto err;
err = -ENOMEM;
- if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX))
+ if (BITS_PER_LONG > 32 &&
+ unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
goto err;
nlk_sk(sk)->portid = portid;
@@ -3122,9 +3114,6 @@ static int __init netlink_proto_init(void)
.max_shift = 16, /* 64K */
.grow_decision = rht_grow_above_75,
.shrink_decision = rht_shrink_below_30,
-#ifdef CONFIG_PROVE_LOCKING
- .mutex_is_held = lockdep_nl_sk_hash_is_held,
-#endif
};
if (err != 0)