summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig17
-rw-r--r--lib/Kconfig.debug8
-rw-r--r--lib/Makefile1
-rw-r--r--lib/nlattr.c200
-rw-r--r--lib/packing.c213
-rw-r--r--lib/rhashtable.c210
-rw-r--r--lib/test_rhashtable.c2
7 files changed, 458 insertions, 193 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index e86975bfca6a..f323b85ad11c 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -18,6 +18,23 @@ config RAID6_PQ_BENCHMARK
Benchmark all available RAID6 PQ functions on init and choose the
fastest one.
+config PACKING
+ bool "Generic bitfield packing and unpacking"
+ default n
+ help
+ This option provides the packing() helper function, which permits
+ converting bitfields between a CPU-usable representation and a
+ memory representation that can have any combination of these quirks:
+ - Is little endian (bytes are reversed within a 32-bit group)
+ - The least-significant 32-bit word comes first (within a 64-bit
+ group)
+ - The most significant bit of a byte is at its right (bit 0 of a
+ register description is numerically 2^7).
+ Drivers may use these helpers to match the bit indices as described
+ in the data sheets of the peripherals they are in control of.
+
+ When in doubt, say N.
+
config BITREVERSE
tristate
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 971c6c70891e..d695ec1477f3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -219,6 +219,14 @@ config DEBUG_INFO_DWARF4
But it significantly improves the success of resolving
variables in gdb on optimized code.
+config DEBUG_INFO_BTF
+ bool "Generate BTF typeinfo"
+ depends on DEBUG_INFO
+ help
+ Generate deduplicated BTF type information from DWARF debug info.
+ Turning this on expects presence of pahole tool, which will convert
+ DWARF type info into equivalent deduplicated BTF type info.
+
config GDB_SCRIPTS
bool "Provide GDB scripts for kernel debugging"
depends on DEBUG_INFO
diff --git a/lib/Makefile b/lib/Makefile
index 07506e3891a0..83d7df2661ff 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -120,6 +120,7 @@ obj-$(CONFIG_DEBUG_LIST) += list_debug.o
obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
obj-$(CONFIG_BITREVERSE) += bitrev.o
+obj-$(CONFIG_PACKING) += packing.o
obj-$(CONFIG_RATIONAL) += rational.o
obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o
obj-$(CONFIG_CRC16) += crc16.o
diff --git a/lib/nlattr.c b/lib/nlattr.c
index d26de6156b97..cace9b307781 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -69,7 +69,8 @@ static int validate_nla_bitfield32(const struct nlattr *nla,
static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ unsigned int validate)
{
const struct nlattr *entry;
int rem;
@@ -86,8 +87,8 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
return -ERANGE;
}
- ret = nla_validate(nla_data(entry), nla_len(entry),
- maxtype, policy, extack);
+ ret = __nla_validate(nla_data(entry), nla_len(entry),
+ maxtype, policy, validate, extack);
if (ret < 0)
return ret;
}
@@ -154,13 +155,17 @@ static int nla_validate_int_range(const struct nla_policy *pt,
}
static int validate_nla(const struct nlattr *nla, int maxtype,
- const struct nla_policy *policy,
+ const struct nla_policy *policy, unsigned int validate,
struct netlink_ext_ack *extack)
{
+ u16 strict_start_type = policy[0].strict_start_type;
const struct nla_policy *pt;
int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla);
int err = -ERANGE;
+ if (strict_start_type && type >= strict_start_type)
+ validate |= NL_VALIDATE_STRICT;
+
if (type <= 0 || type > maxtype)
return 0;
@@ -172,6 +177,26 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
(pt->type == NLA_EXACT_LEN_WARN && attrlen != pt->len)) {
pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
current->comm, type);
+ if (validate & NL_VALIDATE_STRICT_ATTRS) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "invalid attribute length");
+ return -EINVAL;
+ }
+ }
+
+ if (validate & NL_VALIDATE_NESTED) {
+ if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) &&
+ !(nla->nla_type & NLA_F_NESTED)) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "NLA_F_NESTED is missing");
+ return -EINVAL;
+ }
+ if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY &&
+ pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "NLA_F_NESTED not expected");
+ return -EINVAL;
+ }
}
switch (pt->type) {
@@ -244,8 +269,9 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
if (attrlen < NLA_HDRLEN)
goto out_err;
if (pt->validation_data) {
- err = nla_validate(nla_data(nla), nla_len(nla), pt->len,
- pt->validation_data, extack);
+ err = __nla_validate(nla_data(nla), nla_len(nla), pt->len,
+ pt->validation_data, validate,
+ extack);
if (err < 0) {
/*
* return directly to preserve the inner
@@ -268,7 +294,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
err = nla_validate_array(nla_data(nla), nla_len(nla),
pt->len, pt->validation_data,
- extack);
+ extack, validate);
if (err < 0) {
/*
* return directly to preserve the inner
@@ -278,10 +304,23 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
}
}
break;
+
+ case NLA_UNSPEC:
+ if (validate & NL_VALIDATE_UNSPEC) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "Unsupported attribute");
+ return -EINVAL;
+ }
+ /* fall through */
+ case NLA_MIN_LEN:
+ if (attrlen < pt->len)
+ goto out_err;
+ break;
+
default:
if (pt->len)
minlen = pt->len;
- else if (pt->type != NLA_UNSPEC)
+ else
minlen = nla_attr_minlen[pt->type];
if (attrlen < minlen)
@@ -315,37 +354,76 @@ out_err:
return err;
}
+static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
+ const struct nla_policy *policy,
+ unsigned int validate,
+ struct netlink_ext_ack *extack,
+ struct nlattr **tb)
+{
+ const struct nlattr *nla;
+ int rem;
+
+ if (tb)
+ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+
+ nla_for_each_attr(nla, head, len, rem) {
+ u16 type = nla_type(nla);
+
+ if (type == 0 || type > maxtype) {
+ if (validate & NL_VALIDATE_MAXTYPE) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "Unknown attribute type");
+ return -EINVAL;
+ }
+ continue;
+ }
+ if (policy) {
+ int err = validate_nla(nla, maxtype, policy,
+ validate, extack);
+
+ if (err < 0)
+ return err;
+ }
+
+ if (tb)
+ tb[type] = (struct nlattr *)nla;
+ }
+
+ if (unlikely(rem > 0)) {
+ pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n",
+ rem, current->comm);
+ NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes");
+ if (validate & NL_VALIDATE_TRAILING)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/**
- * nla_validate - Validate a stream of attributes
+ * __nla_validate - Validate a stream of attributes
* @head: head of attribute stream
* @len: length of attribute stream
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
+ * @validate: validation strictness
* @extack: extended ACK report struct
*
* Validates all attributes in the specified attribute stream against the
- * specified policy. Attributes with a type exceeding maxtype will be
- * ignored. See documenation of struct nla_policy for more details.
+ * specified policy. Validation depends on the validate flags passed, see
+ * &enum netlink_validation for more details on that.
+ * See documenation of struct nla_policy for more details.
*
* Returns 0 on success or a negative error code.
*/
-int nla_validate(const struct nlattr *head, int len, int maxtype,
- const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
+int __nla_validate(const struct nlattr *head, int len, int maxtype,
+ const struct nla_policy *policy, unsigned int validate,
+ struct netlink_ext_ack *extack)
{
- const struct nlattr *nla;
- int rem;
-
- nla_for_each_attr(nla, head, len, rem) {
- int err = validate_nla(nla, maxtype, policy, extack);
-
- if (err < 0)
- return err;
- }
-
- return 0;
+ return __nla_validate_parse(head, len, maxtype, policy, validate,
+ extack, NULL);
}
-EXPORT_SYMBOL(nla_validate);
+EXPORT_SYMBOL(__nla_validate);
/**
* nla_policy_len - Determin the max. length of a policy
@@ -377,76 +455,30 @@ nla_policy_len(const struct nla_policy *p, int n)
EXPORT_SYMBOL(nla_policy_len);
/**
- * nla_parse - Parse a stream of attributes into a tb buffer
+ * __nla_parse - Parse a stream of attributes into a tb buffer
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
* @head: head of attribute stream
* @len: length of attribute stream
* @policy: validation policy
+ * @validate: validation strictness
+ * @extack: extended ACK pointer
*
* Parses a stream of attributes and stores a pointer to each attribute in
- * the tb array accessible via the attribute type. Attributes with a type
- * exceeding maxtype will be silently ignored for backwards compatibility
- * reasons. policy may be set to NULL if no validation is required.
+ * the tb array accessible via the attribute type.
+ * Validation is controlled by the @validate parameter.
*
* Returns 0 on success or a negative error code.
*/
-static int __nla_parse(struct nlattr **tb, int maxtype,
- const struct nlattr *head, int len,
- bool strict, const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
-{
- const struct nlattr *nla;
- int rem;
-
- memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
-
- nla_for_each_attr(nla, head, len, rem) {
- u16 type = nla_type(nla);
-
- if (type == 0 || type > maxtype) {
- if (strict) {
- NL_SET_ERR_MSG(extack, "Unknown attribute type");
- return -EINVAL;
- }
- continue;
- }
- if (policy) {
- int err = validate_nla(nla, maxtype, policy, extack);
-
- if (err < 0)
- return err;
- }
-
- tb[type] = (struct nlattr *)nla;
- }
-
- if (unlikely(rem > 0)) {
- pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n",
- rem, current->comm);
- NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes");
- if (strict)
- return -EINVAL;
- }
-
- return 0;
-}
-
-int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
- int len, const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
-{
- return __nla_parse(tb, maxtype, head, len, false, policy, extack);
-}
-EXPORT_SYMBOL(nla_parse);
-
-int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head,
- int len, const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
+int __nla_parse(struct nlattr **tb, int maxtype,
+ const struct nlattr *head, int len,
+ const struct nla_policy *policy, unsigned int validate,
+ struct netlink_ext_ack *extack)
{
- return __nla_parse(tb, maxtype, head, len, true, policy, extack);
+ return __nla_validate_parse(head, len, maxtype, policy, validate,
+ extack, tb);
}
-EXPORT_SYMBOL(nla_parse_strict);
+EXPORT_SYMBOL(__nla_parse);
/**
* nla_find - Find a specific attribute in a stream of attributes
diff --git a/lib/packing.c b/lib/packing.c
new file mode 100644
index 000000000000..50d1e9f2f5a7
--- /dev/null
+++ b/lib/packing.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2016-2018, NXP Semiconductors
+ * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
+ */
+#include <linux/packing.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+
+static int get_le_offset(int offset)
+{
+ int closest_multiple_of_4;
+
+ closest_multiple_of_4 = (offset / 4) * 4;
+ offset -= closest_multiple_of_4;
+ return closest_multiple_of_4 + (3 - offset);
+}
+
+static int get_reverse_lsw32_offset(int offset, size_t len)
+{
+ int closest_multiple_of_4;
+ int word_index;
+
+ word_index = offset / 4;
+ closest_multiple_of_4 = word_index * 4;
+ offset -= closest_multiple_of_4;
+ word_index = (len / 4) - word_index - 1;
+ return word_index * 4 + offset;
+}
+
+static u64 bit_reverse(u64 val, unsigned int width)
+{
+ u64 new_val = 0;
+ unsigned int bit;
+ unsigned int i;
+
+ for (i = 0; i < width; i++) {
+ bit = (val & (1 << i)) != 0;
+ new_val |= (bit << (width - i - 1));
+ }
+ return new_val;
+}
+
+static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit,
+ int *box_end_bit, u8 *box_mask)
+{
+ int box_bit_width = *box_start_bit - *box_end_bit + 1;
+ int new_box_start_bit, new_box_end_bit;
+
+ *to_write >>= *box_end_bit;
+ *to_write = bit_reverse(*to_write, box_bit_width);
+ *to_write <<= *box_end_bit;
+
+ new_box_end_bit = box_bit_width - *box_start_bit - 1;
+ new_box_start_bit = box_bit_width - *box_end_bit - 1;
+ *box_mask = GENMASK_ULL(new_box_start_bit, new_box_end_bit);
+ *box_start_bit = new_box_start_bit;
+ *box_end_bit = new_box_end_bit;
+}
+
+/**
+ * packing - Convert numbers (currently u64) between a packed and an unpacked
+ * format. Unpacked means laid out in memory in the CPU's native
+ * understanding of integers, while packed means anything else that
+ * requires translation.
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @uval: Pointer to an u64 holding the unpacked value.
+ * @startbit: The index (in logical notation, compensated for quirks) where
+ * the packed value starts within pbuf. Must be larger than, or
+ * equal to, endbit.
+ * @endbit: The index (in logical notation, compensated for quirks) where
+ * the packed value ends within pbuf. Must be smaller than, or equal
+ * to, startbit.
+ * @op: If PACK, then uval will be treated as const pointer and copied (packed)
+ * into pbuf, between startbit and endbit.
+ * If UNPACK, then pbuf will be treated as const pointer and the logical
+ * value between startbit and endbit will be copied (unpacked) to uval.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ * QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
+ * correct usage, return code may be discarded.
+ * If op is PACK, pbuf is modified.
+ * If op is UNPACK, uval is modified.
+ */
+int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
+ enum packing_op op, u8 quirks)
+{
+ /* Number of bits for storing "uval"
+ * also width of the field to access in the pbuf
+ */
+ u64 value_width;
+ /* Logical byte indices corresponding to the
+ * start and end of the field.
+ */
+ int plogical_first_u8, plogical_last_u8, box;
+
+ /* startbit is expected to be larger than endbit */
+ if (startbit < endbit)
+ /* Invalid function call */
+ return -EINVAL;
+
+ value_width = startbit - endbit + 1;
+ if (value_width > 64)
+ return -ERANGE;
+
+ /* Check if "uval" fits in "value_width" bits.
+ * If value_width is 64, the check will fail, but any
+ * 64-bit uval will surely fit.
+ */
+ if (op == PACK && value_width < 64 && (*uval >= (1ull << value_width)))
+ /* Cannot store "uval" inside "value_width" bits.
+ * Truncating "uval" is most certainly not desirable,
+ * so simply erroring out is appropriate.
+ */
+ return -ERANGE;
+
+ /* Initialize parameter */
+ if (op == UNPACK)
+ *uval = 0;
+
+ /* Iterate through an idealistic view of the pbuf as an u64 with
+ * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low
+ * logical bit significance. "box" denotes the current logical u8.
+ */
+ plogical_first_u8 = startbit / 8;
+ plogical_last_u8 = endbit / 8;
+
+ for (box = plogical_first_u8; box >= plogical_last_u8; box--) {
+ /* Bit indices into the currently accessed 8-bit box */
+ int box_start_bit, box_end_bit, box_addr;
+ u8 box_mask;
+ /* Corresponding bits from the unpacked u64 parameter */
+ int proj_start_bit, proj_end_bit;
+ u64 proj_mask;
+
+ /* This u8 may need to be accessed in its entirety
+ * (from bit 7 to bit 0), or not, depending on the
+ * input arguments startbit and endbit.
+ */
+ if (box == plogical_first_u8)
+ box_start_bit = startbit % 8;
+ else
+ box_start_bit = 7;
+ if (box == plogical_last_u8)
+ box_end_bit = endbit % 8;
+ else
+ box_end_bit = 0;
+
+ /* We have determined the box bit start and end.
+ * Now we calculate where this (masked) u8 box would fit
+ * in the unpacked (CPU-readable) u64 - the u8 box's
+ * projection onto the unpacked u64. Though the
+ * box is u8, the projection is u64 because it may fall
+ * anywhere within the unpacked u64.
+ */
+ proj_start_bit = ((box * 8) + box_start_bit) - endbit;
+ proj_end_bit = ((box * 8) + box_end_bit) - endbit;
+ proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit);
+ box_mask = GENMASK_ULL(box_start_bit, box_end_bit);
+
+ /* Determine the offset of the u8 box inside the pbuf,
+ * adjusted for quirks. The adjusted box_addr will be used for
+ * effective addressing inside the pbuf (so it's not
+ * logical any longer).
+ */
+ box_addr = pbuflen - box - 1;
+ if (quirks & QUIRK_LITTLE_ENDIAN)
+ box_addr = get_le_offset(box_addr);
+ if (quirks & QUIRK_LSW32_IS_FIRST)
+ box_addr = get_reverse_lsw32_offset(box_addr,
+ pbuflen);
+
+ if (op == UNPACK) {
+ u64 pval;
+
+ /* Read from pbuf, write to uval */
+ pval = ((u8 *)pbuf)[box_addr] & box_mask;
+ if (quirks & QUIRK_MSB_ON_THE_RIGHT)
+ adjust_for_msb_right_quirk(&pval,
+ &box_start_bit,
+ &box_end_bit,
+ &box_mask);
+
+ pval >>= box_end_bit;
+ pval <<= proj_end_bit;
+ *uval &= ~proj_mask;
+ *uval |= pval;
+ } else {
+ u64 pval;
+
+ /* Write to pbuf, read from uval */
+ pval = (*uval) & proj_mask;
+ pval >>= proj_end_bit;
+ if (quirks & QUIRK_MSB_ON_THE_RIGHT)
+ adjust_for_msb_right_quirk(&pval,
+ &box_start_bit,
+ &box_end_bit,
+ &box_mask);
+
+ pval <<= box_end_bit;
+ ((u8 *)pbuf)[box_addr] &= ~box_mask;
+ ((u8 *)pbuf)[box_addr] |= pval;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(packing);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Generic bitfield packing and unpacking");
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 97f59abc3e92..6529fe1b45c1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -31,11 +31,10 @@
#define HASH_DEFAULT_SIZE 64UL
#define HASH_MIN_SIZE 4U
-#define BUCKET_LOCKS_PER_CPU 32UL
union nested_table {
union nested_table __rcu *table;
- struct rhash_head __rcu *bucket;
+ struct rhash_lock_head __rcu *bucket;
};
static u32 head_hashfn(struct rhashtable *ht,
@@ -56,9 +55,11 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
{
- spinlock_t *lock = rht_bucket_lock(tbl, hash);
-
- return (debug_locks) ? lockdep_is_held(lock) : 1;
+ if (!debug_locks)
+ return 1;
+ if (unlikely(tbl->nest))
+ return 1;
+ return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
}
EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
#else
@@ -104,7 +105,6 @@ static void bucket_table_free(const struct bucket_table *tbl)
if (tbl->nest)
nested_bucket_table_free(tbl);
- free_bucket_spinlocks(tbl->locks);
kvfree(tbl);
}
@@ -131,9 +131,11 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht,
INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
}
- rcu_assign_pointer(*prev, ntbl);
-
- return ntbl;
+ if (cmpxchg(prev, NULL, ntbl) == NULL)
+ return ntbl;
+ /* Raced with another thread. */
+ kfree(ntbl);
+ return rcu_dereference(*prev);
}
static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
@@ -169,11 +171,11 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
gfp_t gfp)
{
struct bucket_table *tbl = NULL;
- size_t size, max_locks;
+ size_t size;
int i;
+ static struct lock_class_key __key;
- size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
- tbl = kvzalloc(size, gfp);
+ tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
size = nbuckets;
@@ -185,18 +187,11 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
if (tbl == NULL)
return NULL;
- tbl->size = size;
-
- max_locks = size >> 1;
- if (tbl->nest)
- max_locks = min_t(size_t, max_locks, 1U << tbl->nest);
+ lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0);
- if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks,
- ht->p.locks_mul, gfp) < 0) {
- bucket_table_free(tbl);
- return NULL;
- }
+ tbl->size = size;
+ rcu_head_init(&tbl->rcu);
INIT_LIST_HEAD(&tbl->walkers);
tbl->hash_rnd = get_random_u32();
@@ -220,14 +215,15 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
return new_tbl;
}
-static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
+static int rhashtable_rehash_one(struct rhashtable *ht,
+ struct rhash_lock_head __rcu **bkt,
+ unsigned int old_hash)
{
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
- struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
int err = -EAGAIN;
struct rhash_head *head, *next, *entry;
- spinlock_t *new_bucket_lock;
+ struct rhash_head __rcu **pprev = NULL;
unsigned int new_hash;
if (new_tbl->nest)
@@ -235,7 +231,8 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
err = -ENOENT;
- rht_for_each(entry, old_tbl, old_hash) {
+ rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
+ old_tbl, old_hash) {
err = 0;
next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
@@ -250,18 +247,19 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
new_hash = head_hashfn(ht, new_tbl, entry);
- new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
+ rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING);
- spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
- head = rht_dereference_bucket(new_tbl->buckets[new_hash],
- new_tbl, new_hash);
+ head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
RCU_INIT_POINTER(entry->next, head);
- rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
- spin_unlock(new_bucket_lock);
+ rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry);
- rcu_assign_pointer(*pprev, next);
+ if (pprev)
+ rcu_assign_pointer(*pprev, next);
+ else
+ /* Need to preserved the bit lock. */
+ rht_assign_locked(bkt, next);
out:
return err;
@@ -271,20 +269,19 @@ static int rhashtable_rehash_chain(struct rhashtable *ht,
unsigned int old_hash)
{
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
- spinlock_t *old_bucket_lock;
+ struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
int err;
- old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
+ if (!bkt)
+ return 0;
+ rht_lock(old_tbl, bkt);
- spin_lock_bh(old_bucket_lock);
- while (!(err = rhashtable_rehash_one(ht, old_hash)))
+ while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
;
- if (err == -ENOENT) {
- old_tbl->rehash++;
+ if (err == -ENOENT)
err = 0;
- }
- spin_unlock_bh(old_bucket_lock);
+ rht_unlock(old_tbl, bkt);
return err;
}
@@ -330,13 +327,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
spin_lock(&ht->lock);
list_for_each_entry(walker, &old_tbl->walkers, list)
walker->tbl = NULL;
- spin_unlock(&ht->lock);
/* Wait for readers. All new readers will see the new
* table, and thus no references to the old table will
* remain.
+ * We do this inside the locked region so that
+ * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
+ * to check if it should not re-link the table.
*/
call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
+ spin_unlock(&ht->lock);
return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
}
@@ -478,6 +478,7 @@ fail:
}
static void *rhashtable_lookup_one(struct rhashtable *ht,
+ struct rhash_lock_head __rcu **bkt,
struct bucket_table *tbl, unsigned int hash,
const void *key, struct rhash_head *obj)
{
@@ -485,13 +486,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
.ht = ht,
.key = key,
};
- struct rhash_head __rcu **pprev;
+ struct rhash_head __rcu **pprev = NULL;
struct rhash_head *head;
int elasticity;
elasticity = RHT_ELASTICITY;
- pprev = rht_bucket_var(tbl, hash);
- rht_for_each_continue(head, *pprev, tbl, hash) {
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *list;
struct rhlist_head *plist;
@@ -513,7 +513,11 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
RCU_INIT_POINTER(list->next, plist);
head = rht_dereference_bucket(head->next, tbl, hash);
RCU_INIT_POINTER(list->rhead.next, head);
- rcu_assign_pointer(*pprev, obj);
+ if (pprev)
+ rcu_assign_pointer(*pprev, obj);
+ else
+ /* Need to preserve the bit lock */
+ rht_assign_locked(bkt, obj);
return NULL;
}
@@ -525,12 +529,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
}
static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
+ struct rhash_lock_head __rcu **bkt,
struct bucket_table *tbl,
unsigned int hash,
struct rhash_head *obj,
void *data)
{
- struct rhash_head __rcu **pprev;
struct bucket_table *new_tbl;
struct rhash_head *head;
@@ -553,11 +557,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
if (unlikely(rht_grow_above_100(ht, tbl)))
return ERR_PTR(-EAGAIN);
- pprev = rht_bucket_insert(ht, tbl, hash);
- if (!pprev)
- return ERR_PTR(-ENOMEM);
-
- head = rht_dereference_bucket(*pprev, tbl, hash);
+ head = rht_ptr(bkt, tbl, hash);
RCU_INIT_POINTER(obj->next, head);
if (ht->rhlist) {
@@ -567,7 +567,10 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
RCU_INIT_POINTER(list->next, NULL);
}
- rcu_assign_pointer(*pprev, obj);
+ /* bkt is always the head of the list, so it holds
+ * the lock, which we need to preserve
+ */
+ rht_assign_locked(bkt, obj);
atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl))
@@ -581,47 +584,35 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
{
struct bucket_table *new_tbl;
struct bucket_table *tbl;
+ struct rhash_lock_head __rcu **bkt;
unsigned int hash;
- spinlock_t *lock;
void *data;
- tbl = rcu_dereference(ht->tbl);
-
- /* All insertions must grab the oldest table containing
- * the hashed bucket that is yet to be rehashed.
- */
- for (;;) {
- hash = rht_head_hashfn(ht, tbl, obj, ht->p);
- lock = rht_bucket_lock(tbl, hash);
- spin_lock_bh(lock);
-
- if (tbl->rehash <= hash)
- break;
-
- spin_unlock_bh(lock);
- tbl = rht_dereference_rcu(tbl->future_tbl, ht);
- }
-
- data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
- new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
- if (PTR_ERR(new_tbl) != -EEXIST)
- data = ERR_CAST(new_tbl);
+ new_tbl = rcu_dereference(ht->tbl);
- while (!IS_ERR_OR_NULL(new_tbl)) {
+ do {
tbl = new_tbl;
hash = rht_head_hashfn(ht, tbl, obj, ht->p);
- spin_lock_nested(rht_bucket_lock(tbl, hash),
- SINGLE_DEPTH_NESTING);
-
- data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
- new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
- if (PTR_ERR(new_tbl) != -EEXIST)
- data = ERR_CAST(new_tbl);
-
- spin_unlock(rht_bucket_lock(tbl, hash));
- }
-
- spin_unlock_bh(lock);
+ if (rcu_access_pointer(tbl->future_tbl))
+ /* Failure is OK */
+ bkt = rht_bucket_var(tbl, hash);
+ else
+ bkt = rht_bucket_insert(ht, tbl, hash);
+ if (bkt == NULL) {
+ new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ data = ERR_PTR(-EAGAIN);
+ } else {
+ rht_lock(tbl, bkt);
+ data = rhashtable_lookup_one(ht, bkt, tbl,
+ hash, key, obj);
+ new_tbl = rhashtable_insert_one(ht, bkt, tbl,
+ hash, obj, data);
+ if (PTR_ERR(new_tbl) != -EEXIST)
+ data = ERR_CAST(new_tbl);
+
+ rht_unlock(tbl, bkt);
+ }
+ } while (!IS_ERR_OR_NULL(new_tbl));
if (PTR_ERR(data) == -EAGAIN)
data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
@@ -943,10 +934,11 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
ht = iter->ht;
spin_lock(&ht->lock);
- if (tbl->rehash < tbl->size)
- list_add(&iter->walker.list, &tbl->walkers);
- else
+ if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
+ /* This bucket table is being freed, don't re-link it. */
iter->walker.tbl = NULL;
+ else
+ list_add(&iter->walker.list, &tbl->walkers);
spin_unlock(&ht->lock);
out:
@@ -1046,11 +1038,6 @@ int rhashtable_init(struct rhashtable *ht,
size = rounded_hashtable_size(&ht->p);
- if (params->locks_mul)
- ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
- else
- ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
-
ht->key_len = ht->p.key_len;
if (!params->hashfn) {
ht->p.hashfn = jhash;
@@ -1152,7 +1139,7 @@ restart:
struct rhash_head *pos, *next;
cond_resched();
- for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
+ for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL;
!rht_is_a_nulls(pos);
@@ -1179,11 +1166,10 @@ void rhashtable_destroy(struct rhashtable *ht)
}
EXPORT_SYMBOL_GPL(rhashtable_destroy);
-struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
- unsigned int hash)
+struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl,
+ unsigned int hash)
{
const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
- static struct rhash_head __rcu *rhnull;
unsigned int index = hash & ((1 << tbl->nest) - 1);
unsigned int size = tbl->size >> tbl->nest;
unsigned int subhash = hash;
@@ -1201,20 +1187,28 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
subhash >>= shift;
}
- if (!ntbl) {
- if (!rhnull)
- INIT_RHT_NULLS_HEAD(rhnull);
- return &rhnull;
- }
+ if (!ntbl)
+ return NULL;
return &ntbl[subhash].bucket;
}
+EXPORT_SYMBOL_GPL(__rht_bucket_nested);
+
+struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
+ unsigned int hash)
+{
+ static struct rhash_lock_head __rcu *rhnull;
+
+ if (!rhnull)
+ INIT_RHT_NULLS_HEAD(rhnull);
+ return __rht_bucket_nested(tbl, hash) ?: &rhnull;
+}
EXPORT_SYMBOL_GPL(rht_bucket_nested);
-struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
- struct bucket_table *tbl,
- unsigned int hash)
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
+ struct bucket_table *tbl,
+ unsigned int hash)
{
const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
unsigned int index = hash & ((1 << tbl->nest) - 1);
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 3bd2e91bfc29..084fe5a6ac57 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -500,7 +500,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
struct rhash_head *pos, *next;
struct test_obj_rhl *p;
- pos = rht_dereference(tbl->buckets[i], ht);
+ pos = rht_ptr_exclusive(tbl->buckets + i);
next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
if (!rht_is_a_nulls(pos)) {