diff options
Diffstat (limited to 'kernel/rcu')
-rw-r--r-- | kernel/rcu/srcutiny.c | 77 | ||||
-rw-r--r-- | kernel/rcu/srcutree.c | 127 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 4 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 9 | ||||
-rw-r--r-- | kernel/rcu/tree_stall.h | 34 |
5 files changed, 204 insertions, 47 deletions
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 6208c1dae5c9..26344dc6483b 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -34,6 +34,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp) ssp->srcu_gp_running = false; ssp->srcu_gp_waiting = false; ssp->srcu_idx = 0; + ssp->srcu_idx_max = 0; INIT_WORK(&ssp->srcu_work, srcu_drive_gp); INIT_LIST_HEAD(&ssp->srcu_work.entry); return 0; @@ -84,6 +85,8 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) WARN_ON(ssp->srcu_gp_waiting); WARN_ON(ssp->srcu_cb_head); WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail); + WARN_ON(ssp->srcu_idx != ssp->srcu_idx_max); + WARN_ON(ssp->srcu_idx & 0x1); } EXPORT_SYMBOL_GPL(cleanup_srcu_struct); @@ -114,7 +117,7 @@ void srcu_drive_gp(struct work_struct *wp) struct srcu_struct *ssp; ssp = container_of(wp, struct srcu_struct, srcu_work); - if (ssp->srcu_gp_running || !READ_ONCE(ssp->srcu_cb_head)) + if (ssp->srcu_gp_running || USHORT_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) return; /* Already running or nothing to do. */ /* Remove recently arrived callbacks and wait for readers. */ @@ -124,11 +127,12 @@ void srcu_drive_gp(struct work_struct *wp) ssp->srcu_cb_head = NULL; ssp->srcu_cb_tail = &ssp->srcu_cb_head; local_irq_enable(); - idx = ssp->srcu_idx; - WRITE_ONCE(ssp->srcu_idx, !ssp->srcu_idx); + idx = (ssp->srcu_idx & 0x2) / 2; + WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); WRITE_ONCE(ssp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx])); WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ + WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); /* Invoke the callbacks we removed above. */ while (lh) { @@ -146,11 +150,27 @@ void srcu_drive_gp(struct work_struct *wp) * straighten that out. */ WRITE_ONCE(ssp->srcu_gp_running, false); - if (READ_ONCE(ssp->srcu_cb_head)) + if (USHORT_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) schedule_work(&ssp->srcu_work); } EXPORT_SYMBOL_GPL(srcu_drive_gp); +static void srcu_gp_start_if_needed(struct srcu_struct *ssp) +{ + unsigned short cookie; + + cookie = get_state_synchronize_srcu(ssp); + if (USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie)) + return; + WRITE_ONCE(ssp->srcu_idx_max, cookie); + if (!READ_ONCE(ssp->srcu_gp_running)) { + if (likely(srcu_init_done)) + schedule_work(&ssp->srcu_work); + else if (list_empty(&ssp->srcu_work.entry)) + list_add(&ssp->srcu_work.entry, &srcu_boot_list); + } +} + /* * Enqueue an SRCU callback on the specified srcu_struct structure, * initiating grace-period processing if it is not already running. @@ -166,12 +186,7 @@ void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, *ssp->srcu_cb_tail = rhp; ssp->srcu_cb_tail = &rhp->next; local_irq_restore(flags); - if (!READ_ONCE(ssp->srcu_gp_running)) { - if (likely(srcu_init_done)) - schedule_work(&ssp->srcu_work); - else if (list_empty(&ssp->srcu_work.entry)) - list_add(&ssp->srcu_work.entry, &srcu_boot_list); - } + srcu_gp_start_if_needed(ssp); } EXPORT_SYMBOL_GPL(call_srcu); @@ -190,6 +205,48 @@ void synchronize_srcu(struct srcu_struct *ssp) } EXPORT_SYMBOL_GPL(synchronize_srcu); +/* + * get_state_synchronize_srcu - Provide an end-of-grace-period cookie + */ +unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp) +{ + unsigned long ret; + + barrier(); + ret = (READ_ONCE(ssp->srcu_idx) + 3) & ~0x1; + barrier(); + return ret & USHRT_MAX; +} +EXPORT_SYMBOL_GPL(get_state_synchronize_srcu); + +/* + * start_poll_synchronize_srcu - Provide cookie and start grace period + * + * The difference between this and get_state_synchronize_srcu() is that + * this function ensures that the poll_state_synchronize_srcu() will + * eventually return the value true. + */ +unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp) +{ + unsigned long ret = get_state_synchronize_srcu(ssp); + + srcu_gp_start_if_needed(ssp); + return ret; +} +EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); + +/* + * poll_state_synchronize_srcu - Has cookie's grace period ended? + */ +bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) +{ + bool ret = USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx), cookie); + + barrier(); + return ret; +} +EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu); + /* Lockdep diagnostics. */ void __init rcu_scheduler_starting(void) { diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 68ceac387844..b8821665c435 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -809,6 +809,46 @@ static void srcu_leak_callback(struct rcu_head *rhp) } /* + * Start an SRCU grace period, and also queue the callback if non-NULL. + */ +static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, + struct rcu_head *rhp, bool do_norm) +{ + unsigned long flags; + int idx; + bool needexp = false; + bool needgp = false; + unsigned long s; + struct srcu_data *sdp; + + check_init_srcu_struct(ssp); + idx = srcu_read_lock(ssp); + sdp = raw_cpu_ptr(ssp->sda); + spin_lock_irqsave_rcu_node(sdp, flags); + if (rhp) + rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); + rcu_segcblist_advance(&sdp->srcu_cblist, + rcu_seq_current(&ssp->srcu_gp_seq)); + s = rcu_seq_snap(&ssp->srcu_gp_seq); + (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); + if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { + sdp->srcu_gp_seq_needed = s; + needgp = true; + } + if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) { + sdp->srcu_gp_seq_needed_exp = s; + needexp = true; + } + spin_unlock_irqrestore_rcu_node(sdp, flags); + if (needgp) + srcu_funnel_gp_start(ssp, sdp, s, do_norm); + else if (needexp) + srcu_funnel_exp_start(ssp, sdp->mynode, s); + srcu_read_unlock(ssp, idx); + return s; +} + +/* * Enqueue an SRCU callback on the srcu_data structure associated with * the current CPU and the specified srcu_struct structure, initiating * grace-period processing if it is not already running. @@ -839,14 +879,6 @@ static void srcu_leak_callback(struct rcu_head *rhp) static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func, bool do_norm) { - unsigned long flags; - int idx; - bool needexp = false; - bool needgp = false; - unsigned long s; - struct srcu_data *sdp; - - check_init_srcu_struct(ssp); if (debug_rcu_head_queue(rhp)) { /* Probable double call_srcu(), so leak the callback. */ WRITE_ONCE(rhp->func, srcu_leak_callback); @@ -854,28 +886,7 @@ static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, return; } rhp->func = func; - idx = srcu_read_lock(ssp); - sdp = raw_cpu_ptr(ssp->sda); - spin_lock_irqsave_rcu_node(sdp, flags); - rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); - rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&ssp->srcu_gp_seq)); - s = rcu_seq_snap(&ssp->srcu_gp_seq); - (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); - if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { - sdp->srcu_gp_seq_needed = s; - needgp = true; - } - if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) { - sdp->srcu_gp_seq_needed_exp = s; - needexp = true; - } - spin_unlock_irqrestore_rcu_node(sdp, flags); - if (needgp) - srcu_funnel_gp_start(ssp, sdp, s, do_norm); - else if (needexp) - srcu_funnel_exp_start(ssp, sdp->mynode, s); - srcu_read_unlock(ssp, idx); + (void)srcu_gp_start_if_needed(ssp, rhp, do_norm); } /** @@ -1004,6 +1015,62 @@ void synchronize_srcu(struct srcu_struct *ssp) } EXPORT_SYMBOL_GPL(synchronize_srcu); +/** + * get_state_synchronize_srcu - Provide an end-of-grace-period cookie + * @ssp: srcu_struct to provide cookie for. + * + * This function returns a cookie that can be passed to + * poll_state_synchronize_srcu(), which will return true if a full grace + * period has elapsed in the meantime. It is the caller's responsibility + * to make sure that grace period happens, for example, by invoking + * call_srcu() after return from get_state_synchronize_srcu(). + */ +unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp) +{ + // Any prior manipulation of SRCU-protected data must happen + // before the load from ->srcu_gp_seq. + smp_mb(); + return rcu_seq_snap(&ssp->srcu_gp_seq); +} +EXPORT_SYMBOL_GPL(get_state_synchronize_srcu); + +/** + * start_poll_synchronize_srcu - Provide cookie and start grace period + * @ssp: srcu_struct to provide cookie for. + * + * This function returns a cookie that can be passed to + * poll_state_synchronize_srcu(), which will return true if a full grace + * period has elapsed in the meantime. Unlike get_state_synchronize_srcu(), + * this function also ensures that any needed SRCU grace period will be + * started. This convenience does come at a cost in terms of CPU overhead. + */ +unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp) +{ + return srcu_gp_start_if_needed(ssp, NULL, true); +} +EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); + +/** + * poll_state_synchronize_srcu - Has cookie's grace period ended? + * @ssp: srcu_struct to provide cookie for. + * @cookie: Return value from get_state_synchronize_srcu() or start_poll_synchronize_srcu(). + * + * This function takes the cookie that was returned from either + * get_state_synchronize_srcu() or start_poll_synchronize_srcu(), and + * returns @true if an SRCU grace period elapsed since the time that the + * cookie was created. + */ +bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) +{ + if (!rcu_seq_done(&ssp->srcu_gp_seq, cookie)) + return false; + // Ensure that the end of the SRCU grace period happens before + // any subsequent code that the caller might execute. + smp_mb(); // ^^^ + return true; +} +EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu); + /* * Callback function for srcu_barrier() use. */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8c3ba0185082..8c81c05c4236 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2561,6 +2561,7 @@ static void rcu_do_batch(struct rcu_data *rdp) void rcu_sched_clock_irq(int user) { trace_rcu_utilization(TPS("Start scheduler-tick")); + lockdep_assert_irqs_disabled(); raw_cpu_inc(rcu_data.ticks_this_gp); /* The load-acquire pairs with the store-release setting to true. */ if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) { @@ -2574,6 +2575,7 @@ void rcu_sched_clock_irq(int user) rcu_flavor_sched_clock_irq(user); if (rcu_pending(user)) invoke_rcu_core(); + lockdep_assert_irqs_disabled(); trace_rcu_utilization(TPS("End scheduler-tick")); } @@ -3730,6 +3732,8 @@ static int rcu_pending(int user) struct rcu_data *rdp = this_cpu_ptr(&rcu_data); struct rcu_node *rnp = rdp->mynode; + lockdep_assert_irqs_disabled(); + /* Check for CPU stalls, if enabled. */ check_cpu_stall(rdp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7d4f78bf4057..c5091aeaa37b 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -682,6 +682,7 @@ static void rcu_flavor_sched_clock_irq(int user) { struct task_struct *t = current; + lockdep_assert_irqs_disabled(); if (user || rcu_is_cpu_rrupt_from_idle()) { rcu_note_voluntary_context_switch(current); } @@ -2590,17 +2591,17 @@ static void noinstr rcu_dynticks_task_exit(void) /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */ static void rcu_dynticks_task_trace_enter(void) { -#ifdef CONFIG_TASKS_RCU_TRACE +#ifdef CONFIG_TASKS_TRACE_RCU if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) current->trc_reader_special.b.need_mb = true; -#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */ +#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ } /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */ static void rcu_dynticks_task_trace_exit(void) { -#ifdef CONFIG_TASKS_RCU_TRACE +#ifdef CONFIG_TASKS_TRACE_RCU if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) current->trc_reader_special.b.need_mb = false; -#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */ +#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ } diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index ca21d28a0f98..251a9af3709a 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -7,6 +7,8 @@ * Author: Paul E. McKenney <paulmck@linux.ibm.com> */ +#include <linux/kvm_para.h> + ////////////////////////////////////////////////////////////////////////////// // // Controlling CPU stall warnings, including delay calculation. @@ -260,8 +262,11 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) struct task_struct *t; struct task_struct *ts[8]; - if (!rcu_preempt_blocked_readers_cgp(rnp)) + lockdep_assert_irqs_disabled(); + if (!rcu_preempt_blocked_readers_cgp(rnp)) { + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return 0; + } pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", rnp->level, rnp->grplo, rnp->grphi); t = list_entry(rnp->gp_tasks->prev, @@ -273,8 +278,8 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) break; } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - for (i--; i; i--) { - t = ts[i]; + while (i) { + t = ts[--i]; if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr)) pr_cont(" P%d", t->pid); else @@ -284,6 +289,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) ".q"[rscr.rs.b.need_qs], ".e"[rscr.rs.b.exp_hint], ".l"[rscr.on_blkd_list]); + lockdep_assert_irqs_disabled(); put_task_struct(t); ndetected++; } @@ -472,6 +478,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) struct rcu_node *rnp; long totqlen = 0; + lockdep_assert_irqs_disabled(); + /* Kick and suppress, if so configured. */ rcu_stall_kick_kthreads(); if (rcu_stall_is_suppressed()) @@ -493,6 +501,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) } } ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock. + lockdep_assert_irqs_disabled(); } for_each_possible_cpu(cpu) @@ -538,6 +547,8 @@ static void print_cpu_stall(unsigned long gps) struct rcu_node *rnp = rcu_get_root(); long totqlen = 0; + lockdep_assert_irqs_disabled(); + /* Kick and suppress, if so configured. */ rcu_stall_kick_kthreads(); if (rcu_stall_is_suppressed()) @@ -592,6 +603,7 @@ static void check_cpu_stall(struct rcu_data *rdp) unsigned long js; struct rcu_node *rnp; + lockdep_assert_irqs_disabled(); if ((rcu_stall_is_suppressed() && !READ_ONCE(rcu_kick_kthreads)) || !rcu_gp_in_progress()) return; @@ -633,6 +645,14 @@ static void check_cpu_stall(struct rcu_data *rdp) (READ_ONCE(rnp->qsmask) & rdp->grpmask) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { + /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like an RCU stall. Check to see if the host + * stopped the vm. + */ + if (kvm_check_and_clear_guest_paused()) + return; + /* We haven't checked in, so go dump stack. */ print_cpu_stall(gps); if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) @@ -642,6 +662,14 @@ static void check_cpu_stall(struct rcu_data *rdp) ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { + /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like an RCU stall. Check to see if the host + * stopped the vm. + */ + if (kvm_check_and_clear_guest_paused()) + return; + /* They had a few time units to dump stack, so complain. */ print_other_cpu_stall(gs2, gps); if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) |