From 69c7a5fb2482636f525f016c8333fdb9111ecb9d Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Sat, 19 Jun 2021 01:01:07 +0800 Subject: locking/lockdep: Fix the dep path printing for backwards BFS We use the same code to print backwards lock dependency path as the forwards lock dependency path, and this could result into incorrect printing because for a backwards lock_list ->trace is not the call trace where the lock of ->class is acquired. Fix this by introducing a separate function on printing the backwards dependency path. Also add a few comments about the printing while we are at it. Reported-by: Johannes Berg Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210618170110.3699115-2-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 2 deletions(-) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 48d736aa03b2..3b32cd9cdfd0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2304,7 +2304,56 @@ static void print_lock_class_header(struct lock_class *class, int depth) } /* - * printk the shortest lock dependencies from @start to @end in reverse order: + * Dependency path printing: + * + * After BFS we get a lock dependency path (linked via ->parent of lock_list), + * printing out each lock in the dependency path will help on understanding how + * the deadlock could happen. Here are some details about dependency path + * printing: + * + * 1) A lock_list can be either forwards or backwards for a lock dependency, + * for a lock dependency A -> B, there are two lock_lists: + * + * a) lock_list in the ->locks_after list of A, whose ->class is B and + * ->links_to is A. In this case, we can say the lock_list is + * "A -> B" (forwards case). + * + * b) lock_list in the ->locks_before list of B, whose ->class is A + * and ->links_to is B. In this case, we can say the lock_list is + * "B <- A" (bacwards case). + * + * The ->trace of both a) and b) point to the call trace where B was + * acquired with A held. + * + * 2) A "helper" lock_list is introduced during BFS, this lock_list doesn't + * represent a certain lock dependency, it only provides an initial entry + * for BFS. For example, BFS may introduce a "helper" lock_list whose + * ->class is A, as a result BFS will search all dependencies starting with + * A, e.g. A -> B or A -> C. + * + * The notation of a forwards helper lock_list is like "-> A", which means + * we should search the forwards dependencies starting with "A", e.g A -> B + * or A -> C. + * + * The notation of a bacwards helper lock_list is like "<- B", which means + * we should search the backwards dependencies ending with "B", e.g. + * B <- A or B <- C. + */ + +/* + * printk the shortest lock dependencies from @root to @leaf in reverse order. + * + * We have a lock dependency path as follow: + * + * @root @leaf + * | | + * V V + * ->parent ->parent + * | lock_list | <--------- | lock_list | ... | lock_list | <--------- | lock_list | + * | -> L1 | | L1 -> L2 | ... |Ln-2 -> Ln-1| | Ln-1 -> Ln| + * + * , so it's natural that we start from @leaf and print every ->class and + * ->trace until we reach the @root. */ static void __used print_shortest_lock_dependencies(struct lock_list *leaf, @@ -2332,6 +2381,61 @@ print_shortest_lock_dependencies(struct lock_list *leaf, } while (entry && (depth >= 0)); } +/* + * printk the shortest lock dependencies from @leaf to @root. + * + * We have a lock dependency path (from a backwards search) as follow: + * + * @leaf @root + * | | + * V V + * ->parent ->parent + * | lock_list | ---------> | lock_list | ... | lock_list | ---------> | lock_list | + * | L2 <- L1 | | L3 <- L2 | ... | Ln <- Ln-1 | | <- Ln | + * + * , so when we iterate from @leaf to @root, we actually print the lock + * dependency path L1 -> L2 -> .. -> Ln in the non-reverse order. + * + * Another thing to notice here is that ->class of L2 <- L1 is L1, while the + * ->trace of L2 <- L1 is the call trace of L2, in fact we don't have the call + * trace of L1 in the dependency path, which is alright, because most of the + * time we can figure out where L1 is held from the call trace of L2. + */ +static void __used +print_shortest_lock_dependencies_backwards(struct lock_list *leaf, + struct lock_list *root) +{ + struct lock_list *entry = leaf; + const struct lock_trace *trace = NULL; + int depth; + + /*compute depth from generated tree by BFS*/ + depth = get_lock_depth(leaf); + + do { + print_lock_class_header(entry->class, depth); + if (trace) { + printk("%*s ... acquired at:\n", depth, ""); + print_lock_trace(trace, 2); + printk("\n"); + } + + /* + * Record the pointer to the trace for the next lock_list + * entry, see the comments for the function. + */ + trace = entry->trace; + + if (depth == 0 && (entry != root)) { + printk("lockdep:%s bad path found in chain graph\n", __func__); + break; + } + + entry = get_lock_parent(entry); + depth--; + } while (entry && (depth >= 0)); +} + static void print_irq_lock_scenario(struct lock_list *safe_entry, struct lock_list *unsafe_entry, @@ -2449,7 +2553,7 @@ print_bad_irq_dependency(struct task_struct *curr, prev_root->trace = save_trace(); if (!prev_root->trace) return; - print_shortest_lock_dependencies(backwards_entry, prev_root); + print_shortest_lock_dependencies_backwards(backwards_entry, prev_root); pr_warn("\nthe dependencies between the lock to be acquired"); pr_warn(" and %s-irq-unsafe lock:\n", irqclass); -- cgit v1.2.3 From d4c157c7b1a67a0844a904baaca9a840c196c103 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Sat, 19 Jun 2021 01:01:08 +0800 Subject: locking/lockdep: Remove the unnecessary trace saving In print_bad_irq_dependency(), save_trace() is called to set the ->trace for @prev_root as the current call trace, however @prev_root corresponds to the the held lock, which may not be acquired in current call trace, therefore it's wrong to use save_trace() to set ->trace of @prev_root. Moreover, with our adjustment of printing backwards dependency path, the ->trace of @prev_root is unncessary, so remove it. Reported-by: Johannes Berg Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210618170110.3699115-3-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 3b32cd9cdfd0..74d084a398be 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2550,9 +2550,6 @@ print_bad_irq_dependency(struct task_struct *curr, lockdep_print_held_locks(curr); pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass); - prev_root->trace = save_trace(); - if (!prev_root->trace) - return; print_shortest_lock_dependencies_backwards(backwards_entry, prev_root); pr_warn("\nthe dependencies between the lock to be acquired"); -- cgit v1.2.3 From 7b1f8c6179769af6ffa055e1169610b51d71edd5 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Sat, 19 Jun 2021 01:01:09 +0800 Subject: lockding/lockdep: Avoid to find wrong lock dep path in check_irq_usage() In the step #3 of check_irq_usage(), we seach backwards to find a lock whose usage conflicts the usage of @target_entry1 on safe/unsafe. However, we should only keep the irq-unsafe usage of @target_entry1 into consideration, because it could be a case where a lock is hardirq-unsafe but soft-safe, and in check_irq_usage() we find it because its hardirq-unsafe could result into a hardirq-safe-unsafe deadlock, but currently since we don't filter out the other usage bits, so we may find a lock dependency path softirq-unsafe -> softirq-safe, which in fact doesn't cause a deadlock. And this may cause misleading lockdep splats. Fix this by only keeping LOCKF_ENABLED_IRQ_ALL bits when we try the backwards search. Reported-by: Johannes Berg Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210618170110.3699115-4-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 74d084a398be..6ff1e8405a83 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2768,8 +2768,18 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, * Step 3: we found a bad match! Now retrieve a lock from the backward * list whose usage mask matches the exclusive usage mask from the * lock found on the forward list. + * + * Note, we should only keep the LOCKF_ENABLED_IRQ_ALL bits, considering + * the follow case: + * + * When trying to add A -> B to the graph, we find that there is a + * hardirq-safe L, that L -> ... -> A, and another hardirq-unsafe M, + * that B -> ... -> M. However M is **softirq-safe**, if we use exact + * invert bits of M's usage_mask, we will find another lock N that is + * **softirq-unsafe** and N -> ... -> A, however N -> .. -> M will not + * cause a inversion deadlock. */ - backward_mask = original_mask(target_entry1->class->usage_mask); + backward_mask = original_mask(target_entry1->class->usage_mask & LOCKF_ENABLED_IRQ_ALL); ret = find_usage_backwards(&this, backward_mask, &target_entry); if (bfs_error(ret)) { -- cgit v1.2.3 From f8b298cc39f0619544c607eaef09fd0b2afd10f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Jun 2021 20:57:18 +0200 Subject: lockdep: Fix wait-type for empty stack Even the very first lock can violate the wait-context check, consider the various IRQ contexts. Fixes: de8f5e4f2dc1 ("lockdep: Introduce wait-type checks") Signed-off-by: Peter Zijlstra (Intel) Tested-by: Joerg Roedel Link: https://lore.kernel.org/r/20210617190313.256987481@infradead.org --- kernel/locking/lockdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 6ff1e8405a83..0584b2090084 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4688,7 +4688,7 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next) u8 curr_inner; int depth; - if (!curr->lockdep_depth || !next_inner || next->trylock) + if (!next_inner || next->trylock) return 0; if (!next_outer) -- cgit v1.2.3 From 0e8a89d49d45197770f2e57fb15f1bc9ded96eb0 Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Fri, 18 Jun 2021 21:02:30 +0800 Subject: locking/lockdep: Correct the description error for check_redundant() If there is no matched result, check_redundant() will return BFS_RNOMATCH. Signed-off-by: Xiongwei Song Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Boqun Feng Link: https://lkml.kernel.org/r/20210618130230.123249-1-sxwjean@me.com --- kernel/locking/lockdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 0584b2090084..095c87f97a31 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2829,7 +2829,7 @@ static inline bool usage_skip(struct lock_list *entry, void *mask) * or not. If it can, -> dependency is already * in the graph. * - * Return BFS_RMATCH if it does, or BFS_RMATCH if it does not, return BFS_E* if + * Return BFS_RMATCH if it does, or BFS_RNOMATCH if it does not, return BFS_E* if * any error appears in the bfs search. */ static noinline enum bfs_result -- cgit v1.2.3