summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug150
-rw-r--r--lib/btree.c10
-rw-r--r--lib/bug.c4
-rw-r--r--lib/debugobjects.c141
-rw-r--r--lib/dma-direct.c35
-rw-r--r--lib/ioremap.c6
-rw-r--r--lib/percpu-refcount.c2
-rw-r--r--lib/rhashtable.c4
-rw-r--r--lib/swiotlb.c77
-rw-r--r--lib/test_bpf.c6
-rw-r--r--lib/test_kmod.c2
-rw-r--r--lib/test_rhashtable.c134
12 files changed, 377 insertions, 194 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6927c6d8d185..00eeff94b357 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1031,69 +1031,20 @@ config DEBUG_PREEMPT
menu "Lock Debugging (spinlocks, mutexes, etc...)"
-config DEBUG_RT_MUTEXES
- bool "RT Mutex debugging, deadlock detection"
- depends on DEBUG_KERNEL && RT_MUTEXES
- help
- This allows rt mutex semantics violations and rt mutex related
- deadlocks (lockups) to be detected and reported automatically.
-
-config DEBUG_SPINLOCK
- bool "Spinlock and rw-lock debugging: basic checks"
- depends on DEBUG_KERNEL
- select UNINLINE_SPIN_UNLOCK
- help
- Say Y here and build SMP to catch missing spinlock initialization
- and certain other kinds of spinlock errors commonly made. This is
- best used in conjunction with the NMI watchdog so that spinlock
- deadlocks are also debuggable.
-
-config DEBUG_MUTEXES
- bool "Mutex debugging: basic checks"
- depends on DEBUG_KERNEL
- help
- This feature allows mutex semantics violations to be detected and
- reported.
-
-config DEBUG_WW_MUTEX_SLOWPATH
- bool "Wait/wound mutex debugging: Slowpath testing"
- depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
- select DEBUG_LOCK_ALLOC
- select DEBUG_SPINLOCK
- select DEBUG_MUTEXES
- help
- This feature enables slowpath testing for w/w mutex users by
- injecting additional -EDEADLK wound/backoff cases. Together with
- the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this
- will test all possible w/w mutex interface abuse with the
- exception of simply not acquiring all the required locks.
- Note that this feature can introduce significant overhead, so
- it really should not be enabled in a production or distro kernel,
- even a debug kernel. If you are a driver writer, enable it. If
- you are a distro, do not.
-
-config DEBUG_LOCK_ALLOC
- bool "Lock debugging: detect incorrect freeing of live locks"
- depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
- select DEBUG_SPINLOCK
- select DEBUG_MUTEXES
- select DEBUG_RT_MUTEXES if RT_MUTEXES
- select LOCKDEP
- help
- This feature will check whether any held lock (spinlock, rwlock,
- mutex or rwsem) is incorrectly freed by the kernel, via any of the
- memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
- vfree(), etc.), whether a live lock is incorrectly reinitialized via
- spin_lock_init()/mutex_init()/etc., or whether there is any lock
- held during task exit.
+config LOCK_DEBUGGING_SUPPORT
+ bool
+ depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+ default y
config PROVE_LOCKING
bool "Lock debugging: prove locking correctness"
- depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+ depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
select LOCKDEP
select DEBUG_SPINLOCK
select DEBUG_MUTEXES
select DEBUG_RT_MUTEXES if RT_MUTEXES
+ select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER
+ select DEBUG_WW_MUTEX_SLOWPATH
select DEBUG_LOCK_ALLOC
select TRACE_IRQFLAGS
default n
@@ -1131,20 +1082,9 @@ config PROVE_LOCKING
For more details, see Documentation/locking/lockdep-design.txt.
-config LOCKDEP
- bool
- depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
- select STACKTRACE
- select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86
- select KALLSYMS
- select KALLSYMS_ALL
-
-config LOCKDEP_SMALL
- bool
-
config LOCK_STAT
bool "Lock usage statistics"
- depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+ depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
select LOCKDEP
select DEBUG_SPINLOCK
select DEBUG_MUTEXES
@@ -1164,6 +1104,80 @@ config LOCK_STAT
CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
(CONFIG_LOCKDEP defines "acquire" and "release" events.)
+config DEBUG_RT_MUTEXES
+ bool "RT Mutex debugging, deadlock detection"
+ depends on DEBUG_KERNEL && RT_MUTEXES
+ help
+ This allows rt mutex semantics violations and rt mutex related
+ deadlocks (lockups) to be detected and reported automatically.
+
+config DEBUG_SPINLOCK
+ bool "Spinlock and rw-lock debugging: basic checks"
+ depends on DEBUG_KERNEL
+ select UNINLINE_SPIN_UNLOCK
+ help
+ Say Y here and build SMP to catch missing spinlock initialization
+ and certain other kinds of spinlock errors commonly made. This is
+ best used in conjunction with the NMI watchdog so that spinlock
+ deadlocks are also debuggable.
+
+config DEBUG_MUTEXES
+ bool "Mutex debugging: basic checks"
+ depends on DEBUG_KERNEL
+ help
+ This feature allows mutex semantics violations to be detected and
+ reported.
+
+config DEBUG_WW_MUTEX_SLOWPATH
+ bool "Wait/wound mutex debugging: Slowpath testing"
+ depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
+ select DEBUG_LOCK_ALLOC
+ select DEBUG_SPINLOCK
+ select DEBUG_MUTEXES
+ help
+ This feature enables slowpath testing for w/w mutex users by
+ injecting additional -EDEADLK wound/backoff cases. Together with
+ the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this
+ will test all possible w/w mutex interface abuse with the
+ exception of simply not acquiring all the required locks.
+ Note that this feature can introduce significant overhead, so
+ it really should not be enabled in a production or distro kernel,
+ even a debug kernel. If you are a driver writer, enable it. If
+ you are a distro, do not.
+
+config DEBUG_RWSEMS
+ bool "RW Semaphore debugging: basic checks"
+ depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER
+ help
+ This debugging feature allows mismatched rw semaphore locks and unlocks
+ to be detected and reported.
+
+config DEBUG_LOCK_ALLOC
+ bool "Lock debugging: detect incorrect freeing of live locks"
+ depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
+ select DEBUG_SPINLOCK
+ select DEBUG_MUTEXES
+ select DEBUG_RT_MUTEXES if RT_MUTEXES
+ select LOCKDEP
+ help
+ This feature will check whether any held lock (spinlock, rwlock,
+ mutex or rwsem) is incorrectly freed by the kernel, via any of the
+ memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
+ vfree(), etc.), whether a live lock is incorrectly reinitialized via
+ spin_lock_init()/mutex_init()/etc., or whether there is any lock
+ held during task exit.
+
+config LOCKDEP
+ bool
+ depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
+ select STACKTRACE
+ select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86
+ select KALLSYMS
+ select KALLSYMS_ALL
+
+config LOCKDEP_SMALL
+ bool
+
config DEBUG_LOCKDEP
bool "Lock dependency engine debugging"
depends on DEBUG_KERNEL && LOCKDEP
diff --git a/lib/btree.c b/lib/btree.c
index f93a945274af..590facba2c50 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -3,7 +3,7 @@
*
* As should be obvious for Linux kernel code, license is GPLv2
*
- * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
+ * Copyright (c) 2007-2008 Joern Engel <joern@purestorage.com>
* Bits and pieces stolen from Peter Zijlstra's code, which is
* Copyright 2007, Red Hat Inc. Peter Zijlstra
* GPLv2
@@ -76,6 +76,8 @@ struct btree_geo btree_geo128 = {
};
EXPORT_SYMBOL_GPL(btree_geo128);
+#define MAX_KEYLEN (2 * LONG_PER_U64)
+
static struct kmem_cache *btree_cachep;
void *btree_alloc(gfp_t gfp_mask, void *pool_data)
@@ -313,7 +315,7 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
{
int i, height;
unsigned long *node, *oldnode;
- unsigned long *retry_key = NULL, key[geo->keylen];
+ unsigned long *retry_key = NULL, key[MAX_KEYLEN];
if (keyzero(geo, __key))
return NULL;
@@ -639,8 +641,8 @@ EXPORT_SYMBOL_GPL(btree_remove);
int btree_merge(struct btree_head *target, struct btree_head *victim,
struct btree_geo *geo, gfp_t gfp)
{
- unsigned long key[geo->keylen];
- unsigned long dup[geo->keylen];
+ unsigned long key[MAX_KEYLEN];
+ unsigned long dup[MAX_KEYLEN];
void *val;
int err;
diff --git a/lib/bug.c b/lib/bug.c
index c1b0fad31b10..1077366f496b 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -150,6 +150,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
return BUG_TRAP_TYPE_NONE;
bug = find_bug(bugaddr);
+ if (!bug)
+ return BUG_TRAP_TYPE_NONE;
file = NULL;
line = 0;
@@ -191,7 +193,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
if (file)
pr_crit("kernel BUG at %s:%u!\n", file, line);
else
- pr_crit("Kernel BUG at %p [verbose debug info unavailable]\n",
+ pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
(void *)bugaddr);
return BUG_TRAP_TYPE_BUG;
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 2f5349c6e81a..994be4805cec 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -42,14 +42,18 @@ static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
static DEFINE_RAW_SPINLOCK(pool_lock);
static HLIST_HEAD(obj_pool);
+static HLIST_HEAD(obj_to_free);
static int obj_pool_min_free = ODEBUG_POOL_SIZE;
static int obj_pool_free = ODEBUG_POOL_SIZE;
static int obj_pool_used;
static int obj_pool_max_used;
+/* The number of objs on the global free list */
+static int obj_nr_tofree;
static struct kmem_cache *obj_cache;
static int debug_objects_maxchain __read_mostly;
+static int __maybe_unused debug_objects_maxchecked __read_mostly;
static int debug_objects_fixups __read_mostly;
static int debug_objects_warnings __read_mostly;
static int debug_objects_enabled __read_mostly
@@ -96,12 +100,32 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
static void fill_pool(void)
{
gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
- struct debug_obj *new;
+ struct debug_obj *new, *obj;
unsigned long flags;
if (likely(obj_pool_free >= debug_objects_pool_min_level))
return;
+ /*
+ * Reuse objs from the global free list; they will be reinitialized
+ * when allocating.
+ */
+ while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) {
+ raw_spin_lock_irqsave(&pool_lock, flags);
+ /*
+ * Recheck with the lock held as the worker thread might have
+ * won the race and freed the global free list already.
+ */
+ if (obj_nr_tofree) {
+ obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
+ hlist_del(&obj->node);
+ obj_nr_tofree--;
+ hlist_add_head(&obj->node, &obj_pool);
+ obj_pool_free++;
+ }
+ raw_spin_unlock_irqrestore(&pool_lock, flags);
+ }
+
if (unlikely(!obj_cache))
return;
@@ -177,62 +201,76 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
* workqueue function to free objects.
*
* To reduce contention on the global pool_lock, the actual freeing of
- * debug objects will be delayed if the pool_lock is busy. We also free
- * the objects in a batch of 4 for each lock/unlock cycle.
+ * debug objects will be delayed if the pool_lock is busy.
*/
-#define ODEBUG_FREE_BATCH 4
-
static void free_obj_work(struct work_struct *work)
{
- struct debug_obj *objs[ODEBUG_FREE_BATCH];
+ struct hlist_node *tmp;
+ struct debug_obj *obj;
unsigned long flags;
- int i;
+ HLIST_HEAD(tofree);
if (!raw_spin_trylock_irqsave(&pool_lock, flags))
return;
- while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) {
- for (i = 0; i < ODEBUG_FREE_BATCH; i++) {
- objs[i] = hlist_entry(obj_pool.first,
- typeof(*objs[0]), node);
- hlist_del(&objs[i]->node);
- }
- obj_pool_free -= ODEBUG_FREE_BATCH;
- debug_objects_freed += ODEBUG_FREE_BATCH;
- /*
- * We release pool_lock across kmem_cache_free() to
- * avoid contention on pool_lock.
- */
- raw_spin_unlock_irqrestore(&pool_lock, flags);
- for (i = 0; i < ODEBUG_FREE_BATCH; i++)
- kmem_cache_free(obj_cache, objs[i]);
- if (!raw_spin_trylock_irqsave(&pool_lock, flags))
- return;
+ /*
+ * The objs on the pool list might be allocated before the work is
+ * run, so recheck if pool list it full or not, if not fill pool
+ * list from the global free list
+ */
+ while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
+ obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
+ hlist_del(&obj->node);
+ hlist_add_head(&obj->node, &obj_pool);
+ obj_pool_free++;
+ obj_nr_tofree--;
+ }
+
+ /*
+ * Pool list is already full and there are still objs on the free
+ * list. Move remaining free objs to a temporary list to free the
+ * memory outside the pool_lock held region.
+ */
+ if (obj_nr_tofree) {
+ hlist_move_list(&obj_to_free, &tofree);
+ debug_objects_freed += obj_nr_tofree;
+ obj_nr_tofree = 0;
}
raw_spin_unlock_irqrestore(&pool_lock, flags);
+
+ hlist_for_each_entry_safe(obj, tmp, &tofree, node) {
+ hlist_del(&obj->node);
+ kmem_cache_free(obj_cache, obj);
+ }
}
-/*
- * Put the object back into the pool and schedule work to free objects
- * if necessary.
- */
-static void free_object(struct debug_obj *obj)
+static bool __free_object(struct debug_obj *obj)
{
unsigned long flags;
- int sched = 0;
+ bool work;
raw_spin_lock_irqsave(&pool_lock, flags);
- /*
- * schedule work when the pool is filled and the cache is
- * initialized:
- */
- if (obj_pool_free > debug_objects_pool_size && obj_cache)
- sched = 1;
- hlist_add_head(&obj->node, &obj_pool);
- obj_pool_free++;
+ work = (obj_pool_free > debug_objects_pool_size) && obj_cache;
obj_pool_used--;
+
+ if (work) {
+ obj_nr_tofree++;
+ hlist_add_head(&obj->node, &obj_to_free);
+ } else {
+ obj_pool_free++;
+ hlist_add_head(&obj->node, &obj_pool);
+ }
raw_spin_unlock_irqrestore(&pool_lock, flags);
- if (sched)
+ return work;
+}
+
+/*
+ * Put the object back into the pool and schedule work to free objects
+ * if necessary.
+ */
+static void free_object(struct debug_obj *obj)
+{
+ if (__free_object(obj))
schedule_work(&debug_obj_work);
}
@@ -714,13 +752,13 @@ EXPORT_SYMBOL_GPL(debug_object_active_state);
static void __debug_check_no_obj_freed(const void *address, unsigned long size)
{
unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
- struct hlist_node *tmp;
- HLIST_HEAD(freelist);
struct debug_obj_descr *descr;
enum debug_obj_state state;
struct debug_bucket *db;
+ struct hlist_node *tmp;
struct debug_obj *obj;
- int cnt;
+ int cnt, objs_checked = 0;
+ bool work = false;
saddr = (unsigned long) address;
eaddr = saddr + size;
@@ -751,21 +789,24 @@ repeat:
goto repeat;
default:
hlist_del(&obj->node);
- hlist_add_head(&obj->node, &freelist);
+ work |= __free_object(obj);
break;
}
}
raw_spin_unlock_irqrestore(&db->lock, flags);
- /* Now free them */
- hlist_for_each_entry_safe(obj, tmp, &freelist, node) {
- hlist_del(&obj->node);
- free_object(obj);
- }
-
if (cnt > debug_objects_maxchain)
debug_objects_maxchain = cnt;
+
+ objs_checked += cnt;
}
+
+ if (objs_checked > debug_objects_maxchecked)
+ debug_objects_maxchecked = objs_checked;
+
+ /* Schedule work to actually kmem_cache_free() objects */
+ if (work)
+ schedule_work(&debug_obj_work);
}
void debug_check_no_obj_freed(const void *address, unsigned long size)
@@ -780,12 +821,14 @@ void debug_check_no_obj_freed(const void *address, unsigned long size)
static int debug_stats_show(struct seq_file *m, void *v)
{
seq_printf(m, "max_chain :%d\n", debug_objects_maxchain);
+ seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked);
seq_printf(m, "warnings :%d\n", debug_objects_warnings);
seq_printf(m, "fixups :%d\n", debug_objects_fixups);
seq_printf(m, "pool_free :%d\n", obj_pool_free);
seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
seq_printf(m, "pool_used :%d\n", obj_pool_used);
seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
+ seq_printf(m, "on_free_list :%d\n", obj_nr_tofree);
seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
seq_printf(m, "objs_freed :%d\n", debug_objects_freed);
return 0;
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index c9e8e21cb334..c0bba30fef0a 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -9,6 +9,7 @@
#include <linux/scatterlist.h>
#include <linux/dma-contiguous.h>
#include <linux/pfn.h>
+#include <linux/set_memory.h>
#define DIRECT_MAPPING_ERROR 0
@@ -20,6 +21,14 @@
#define ARCH_ZONE_DMA_BITS 24
#endif
+/*
+ * For AMD SEV all DMA must be to unencrypted addresses.
+ */
+static inline bool force_dma_unencrypted(void)
+{
+ return sev_active();
+}
+
static bool
check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
const char *caller)
@@ -37,7 +46,9 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
{
- return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask;
+ dma_addr_t addr = force_dma_unencrypted() ?
+ __phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
+ return addr + size - 1 <= dev->coherent_dma_mask;
}
void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
@@ -46,6 +57,10 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
int page_order = get_order(size);
struct page *page = NULL;
+ void *ret;
+
+ /* we always manually zero the memory once we are done: */
+ gfp &= ~__GFP_ZERO;
/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
@@ -78,10 +93,15 @@ again:
if (!page)
return NULL;
-
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
- memset(page_address(page), 0, size);
- return page_address(page);
+ ret = page_address(page);
+ if (force_dma_unencrypted()) {
+ set_memory_decrypted((unsigned long)ret, 1 << page_order);
+ *dma_handle = __phys_to_dma(dev, page_to_phys(page));
+ } else {
+ *dma_handle = phys_to_dma(dev, page_to_phys(page));
+ }
+ memset(ret, 0, size);
+ return ret;
}
/*
@@ -92,9 +112,12 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned int page_order = get_order(size);
+ if (force_dma_unencrypted())
+ set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
- free_pages((unsigned long)cpu_addr, get_order(size));
+ free_pages((unsigned long)cpu_addr, page_order);
}
static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
diff --git a/lib/ioremap.c b/lib/ioremap.c
index b808a390e4c3..54e5bbaa3200 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -91,7 +91,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
if (ioremap_pmd_enabled() &&
((next - addr) == PMD_SIZE) &&
- IS_ALIGNED(phys_addr + addr, PMD_SIZE)) {
+ IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
+ pmd_free_pte_page(pmd)) {
if (pmd_set_huge(pmd, phys_addr + addr, prot))
continue;
}
@@ -117,7 +118,8 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
if (ioremap_pud_enabled() &&
((next - addr) == PUD_SIZE) &&
- IS_ALIGNED(phys_addr + addr, PUD_SIZE)) {
+ IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
+ pud_free_pmd_page(pud)) {
if (pud_set_huge(pud, phys_addr + addr, prot))
continue;
}
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 30e7dd88148b..9f96fa7bc000 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -322,6 +322,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu);
* This function normally doesn't block and can be called from any context
* but it may block if @confirm_kill is specified and @ref is in the
* process of switching to atomic mode by percpu_ref_switch_to_atomic().
+ *
+ * There are no implied RCU grace periods between kill and release.
*/
void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
percpu_ref_func_t *confirm_kill)
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 3825c30aaa36..47de025b6245 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -506,8 +506,10 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
if (!key ||
(ht->p.obj_cmpfn ?
ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
- rhashtable_compare(&arg, rht_obj(ht, head))))
+ rhashtable_compare(&arg, rht_obj(ht, head)))) {
+ pprev = &head->next;
continue;
+ }
if (!ht->rhlist)
return rht_obj(ht, head);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c43ec2271469..47aeb04c1997 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -31,6 +31,7 @@
#include <linux/gfp.h>
#include <linux/scatterlist.h>
#include <linux/mem_encrypt.h>
+#include <linux/set_memory.h>
#include <asm/io.h>
#include <asm/dma.h>
@@ -156,22 +157,6 @@ unsigned long swiotlb_size_or_default(void)
return size ? size : (IO_TLB_DEFAULT_SIZE);
}
-void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { }
-
-/* For swiotlb, clear memory encryption mask from dma addresses */
-static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
- phys_addr_t address)
-{
- return __sme_clr(phys_to_dma(hwdev, address));
-}
-
-/* Note that this doesn't work with highmem page */
-static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
- volatile void *address)
-{
- return phys_to_dma(hwdev, virt_to_phys(address));
-}
-
static bool no_iotlb_memory;
void swiotlb_print_info(void)
@@ -209,12 +194,12 @@ void __init swiotlb_update_mem_attributes(void)
vaddr = phys_to_virt(io_tlb_start);
bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
- swiotlb_set_mem_attributes(vaddr, bytes);
+ set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
vaddr = phys_to_virt(io_tlb_overflow_buffer);
bytes = PAGE_ALIGN(io_tlb_overflow);
- swiotlb_set_mem_attributes(vaddr, bytes);
+ set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
}
@@ -355,7 +340,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
io_tlb_start = virt_to_phys(tlb);
io_tlb_end = io_tlb_start + bytes;
- swiotlb_set_mem_attributes(tlb, bytes);
+ set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
memset(tlb, 0, bytes);
/*
@@ -366,7 +351,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
if (!v_overflow_buffer)
goto cleanup2;
- swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow);
+ set_memory_decrypted((unsigned long)v_overflow_buffer,
+ io_tlb_overflow >> PAGE_SHIFT);
memset(v_overflow_buffer, 0, io_tlb_overflow);
io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
@@ -622,7 +608,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
return SWIOTLB_MAP_ERROR;
}
- start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
+ start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
dir, attrs);
}
@@ -706,6 +692,7 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
}
}
+#ifdef CONFIG_DMA_DIRECT_OPS
static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
size_t size)
{
@@ -726,12 +713,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
goto out_warn;
phys_addr = swiotlb_tbl_map_single(dev,
- swiotlb_phys_to_dma(dev, io_tlb_start),
+ __phys_to_dma(dev, io_tlb_start),
0, size, DMA_FROM_DEVICE, 0);
if (phys_addr == SWIOTLB_MAP_ERROR)
goto out_warn;
- *dma_handle = swiotlb_phys_to_dma(dev, phys_addr);
+ *dma_handle = __phys_to_dma(dev, phys_addr);
if (dma_coherent_ok(dev, *dma_handle, size))
goto out_unmap;
@@ -759,28 +746,6 @@ out_warn:
return NULL;
}
-void *
-swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags)
-{
- int order = get_order(size);
- unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0;
- void *ret;
-
- ret = (void *)__get_free_pages(flags, order);
- if (ret) {
- *dma_handle = swiotlb_virt_to_bus(hwdev, ret);
- if (dma_coherent_ok(hwdev, *dma_handle, size)) {
- memset(ret, 0, size);
- return ret;
- }
- free_pages((unsigned long)ret, order);
- }
-
- return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs);
-}
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
-
static bool swiotlb_free_buffer(struct device *dev, size_t size,
dma_addr_t dma_addr)
{
@@ -799,15 +764,7 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size,
DMA_ATTR_SKIP_CPU_SYNC);
return true;
}
-
-void
-swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
- dma_addr_t dev_addr)
-{
- if (!swiotlb_free_buffer(hwdev, size, dev_addr))
- free_pages((unsigned long)vaddr, get_order(size));
-}
-EXPORT_SYMBOL(swiotlb_free_coherent);
+#endif
static void
swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
@@ -867,10 +824,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
map = map_single(dev, phys, size, dir, attrs);
if (map == SWIOTLB_MAP_ERROR) {
swiotlb_full(dev, size, dir, 1);
- return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+ return __phys_to_dma(dev, io_tlb_overflow_buffer);
}
- dev_addr = swiotlb_phys_to_dma(dev, map);
+ dev_addr = __phys_to_dma(dev, map);
/* Ensure that the address returned is DMA'ble */
if (dma_capable(dev, dev_addr, size))
@@ -879,7 +836,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
attrs |= DMA_ATTR_SKIP_CPU_SYNC;
swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
- return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+ return __phys_to_dma(dev, io_tlb_overflow_buffer);
}
/*
@@ -1009,7 +966,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
sg_dma_len(sgl) = 0;
return 0;
}
- sg->dma_address = swiotlb_phys_to_dma(hwdev, map);
+ sg->dma_address = __phys_to_dma(hwdev, map);
} else
sg->dma_address = dev_addr;
sg_dma_len(sg) = sg->length;
@@ -1073,7 +1030,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
int
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
{
- return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer));
+ return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
}
/*
@@ -1085,7 +1042,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
int
swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
- return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
+ return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
}
#ifdef CONFIG_DMA_DIRECT_OPS
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index b4e22345963f..3e9335493fe4 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -24,10 +24,11 @@
#include <linux/if_vlan.h>
#include <linux/random.h>
#include <linux/highmem.h>
+#include <linux/sched.h>
/* General test specific settings */
#define MAX_SUBTESTS 3
-#define MAX_TESTRUNS 10000
+#define MAX_TESTRUNS 1000
#define MAX_DATA 128
#define MAX_INSNS 512
#define MAX_K 0xffffFFFF
@@ -5466,7 +5467,7 @@ static struct bpf_test tests[] = {
{
"BPF_MAXINSNS: Jump, gap, jump, ...",
{ },
-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86)
CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
#else
CLASSIC | FLAG_NO_DATA,
@@ -6582,6 +6583,7 @@ static __init int test_bpf(void)
struct bpf_prog *fp;
int err;
+ cond_resched();
if (exclude_test(i))
continue;
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index e372b97eee13..0e5b7a61460b 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -1141,7 +1141,7 @@ static struct kmod_test_device *register_test_dev_kmod(void)
mutex_lock(&reg_dev_mutex);
/* int should suffice for number of devices, test for wrap */
- if (unlikely(num_test_devs + 1) < 0) {
+ if (num_test_devs + 1 == INT_MAX) {
pr_err("reached limit of number of test devices\n");
goto out;
}
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 76d3667fdea2..f4000c137dbe 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -79,6 +79,21 @@ struct thread_data {
struct test_obj *objs;
};
+static u32 my_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct test_obj_rhl *obj = data;
+
+ return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE;
+}
+
+static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct test_obj_rhl *test_obj = obj;
+ const struct test_obj_val *val = arg->key;
+
+ return test_obj->value.id - val->id;
+}
+
static struct rhashtable_params test_rht_params = {
.head_offset = offsetof(struct test_obj, node),
.key_offset = offsetof(struct test_obj, value),
@@ -87,6 +102,17 @@ static struct rhashtable_params test_rht_params = {
.nulls_base = (3U << RHT_BASE_SHIFT),
};
+static struct rhashtable_params test_rht_params_dup = {
+ .head_offset = offsetof(struct test_obj_rhl, list_node),
+ .key_offset = offsetof(struct test_obj_rhl, value),
+ .key_len = sizeof(struct test_obj_val),
+ .hashfn = jhash,
+ .obj_hashfn = my_hashfn,
+ .obj_cmpfn = my_cmpfn,
+ .nelem_hint = 128,
+ .automatic_shrinking = false,
+};
+
static struct semaphore prestart_sem;
static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0);
@@ -465,6 +491,112 @@ static int __init test_rhashtable_max(struct test_obj *array,
return err;
}
+static unsigned int __init print_ht(struct rhltable *rhlt)
+{
+ struct rhashtable *ht;
+ const struct bucket_table *tbl;
+ char buff[512] = "";
+ unsigned int i, cnt = 0;
+
+ ht = &rhlt->ht;
+ tbl = rht_dereference(ht->tbl, ht);
+ for (i = 0; i < tbl->size; i++) {
+ struct rhash_head *pos, *next;
+ struct test_obj_rhl *p;
+
+ pos = rht_dereference(tbl->buckets[i], ht);
+ next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
+
+ if (!rht_is_a_nulls(pos)) {
+ sprintf(buff, "%s\nbucket[%d] -> ", buff, i);
+ }
+
+ while (!rht_is_a_nulls(pos)) {
+ struct rhlist_head *list = container_of(pos, struct rhlist_head, rhead);
+ sprintf(buff, "%s[[", buff);
+ do {
+ pos = &list->rhead;
+ list = rht_dereference(list->next, ht);
+ p = rht_obj(ht, pos);
+
+ sprintf(buff, "%s val %d (tid=%d)%s", buff, p->value.id, p->value.tid,
+ list? ", " : " ");
+ cnt++;
+ } while (list);
+
+ pos = next,
+ next = !rht_is_a_nulls(pos) ?
+ rht_dereference(pos->next, ht) : NULL;
+
+ sprintf(buff, "%s]]%s", buff, !rht_is_a_nulls(pos) ? " -> " : "");
+ }
+ }
+ printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff);
+
+ return cnt;
+}
+
+static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects,
+ int cnt, bool slow)
+{
+ struct rhltable rhlt;
+ unsigned int i, ret;
+ const char *key;
+ int err = 0;
+
+ err = rhltable_init(&rhlt, &test_rht_params_dup);
+ if (WARN_ON(err))
+ return err;
+
+ for (i = 0; i < cnt; i++) {
+ rhl_test_objects[i].value.tid = i;
+ key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead);
+ key += test_rht_params_dup.key_offset;
+
+ if (slow) {
+ err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key,
+ &rhl_test_objects[i].list_node.rhead));
+ if (err == -EAGAIN)
+ err = 0;
+ } else
+ err = rhltable_insert(&rhlt,
+ &rhl_test_objects[i].list_node,
+ test_rht_params_dup);
+ if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast"))
+ goto skip_print;
+ }
+
+ ret = print_ht(&rhlt);
+ WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast");
+
+skip_print:
+ rhltable_destroy(&rhlt);
+
+ return 0;
+}
+
+static int __init test_insert_duplicates_run(void)
+{
+ struct test_obj_rhl rhl_test_objects[3] = {};
+
+ pr_info("test inserting duplicates\n");
+
+ /* two different values that map to same bucket */
+ rhl_test_objects[0].value.id = 1;
+ rhl_test_objects[1].value.id = 21;
+
+ /* and another duplicate with same as [0] value
+ * which will be second on the bucket list */
+ rhl_test_objects[2].value.id = rhl_test_objects[0].value.id;
+
+ test_insert_dup(rhl_test_objects, 2, false);
+ test_insert_dup(rhl_test_objects, 3, false);
+ test_insert_dup(rhl_test_objects, 2, true);
+ test_insert_dup(rhl_test_objects, 3, true);
+
+ return 0;
+}
+
static int thread_lookup_test(struct thread_data *tdata)
{
unsigned int entries = tdata->entries;
@@ -613,6 +745,8 @@ static int __init test_rht_init(void)
do_div(total_time, runs);
pr_info("Average test time: %llu\n", total_time);
+ test_insert_duplicates_run();
+
if (!tcount)
return 0;