diff options
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 123 |
1 files changed, 119 insertions, 4 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0d532950baa3..f4b23f16677a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -224,6 +224,7 @@ static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; module_param(jiffies_till_first_fqs, ulong, 0644); module_param(jiffies_till_next_fqs, ulong, 0644); +static void rcu_start_gp(struct rcu_state *rsp); static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(int cpu); @@ -1075,6 +1076,120 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp, } /* + * Trace-event helper function for rcu_start_future_gp() and + * rcu_nocb_wait_gp(). + */ +static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, + unsigned long c, char *s) +{ + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, s); +} + +/* + * Start some future grace period, as needed to handle newly arrived + * callbacks. The required future grace periods are recorded in each + * rcu_node structure's ->need_future_gp field. + * + * The caller must hold the specified rcu_node structure's ->lock. + */ +static unsigned long __maybe_unused +rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) +{ + unsigned long c; + int i; + struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); + + /* + * Pick up grace-period number for new callbacks. If this + * grace period is already marked as needed, return to the caller. + */ + c = rcu_cbs_completed(rdp->rsp, rnp); + trace_rcu_future_gp(rnp, rdp, c, "Startleaf"); + if (rnp->need_future_gp[c & 0x1]) { + trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf"); + return c; + } + + /* + * If either this rcu_node structure or the root rcu_node structure + * believe that a grace period is in progress, then we must wait + * for the one following, which is in "c". Because our request + * will be noticed at the end of the current grace period, we don't + * need to explicitly start one. + */ + if (rnp->gpnum != rnp->completed || + ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) { + rnp->need_future_gp[c & 0x1]++; + trace_rcu_future_gp(rnp, rdp, c, "Startedleaf"); + return c; + } + + /* + * There might be no grace period in progress. If we don't already + * hold it, acquire the root rcu_node structure's lock in order to + * start one (if needed). + */ + if (rnp != rnp_root) + raw_spin_lock(&rnp_root->lock); + + /* + * Get a new grace-period number. If there really is no grace + * period in progress, it will be smaller than the one we obtained + * earlier. Adjust callbacks as needed. Note that even no-CBs + * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed. + */ + c = rcu_cbs_completed(rdp->rsp, rnp_root); + for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++) + if (ULONG_CMP_LT(c, rdp->nxtcompleted[i])) + rdp->nxtcompleted[i] = c; + + /* + * If the needed for the required grace period is already + * recorded, trace and leave. + */ + if (rnp_root->need_future_gp[c & 0x1]) { + trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot"); + goto unlock_out; + } + + /* Record the need for the future grace period. */ + rnp_root->need_future_gp[c & 0x1]++; + + /* If a grace period is not already in progress, start one. */ + if (rnp_root->gpnum != rnp_root->completed) { + trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot"); + } else { + trace_rcu_future_gp(rnp, rdp, c, "Startedroot"); + rcu_start_gp(rdp->rsp); + } +unlock_out: + if (rnp != rnp_root) + raw_spin_unlock(&rnp_root->lock); + return c; +} + +/* + * Clean up any old requests for the just-ended grace period. Also return + * whether any additional grace periods have been requested. Also invoke + * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads + * waiting for this grace period to complete. + */ +static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +{ + int c = rnp->completed; + int needmore; + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); + + rcu_nocb_gp_cleanup(rsp, rnp); + rnp->need_future_gp[c & 0x1] = 0; + needmore = rnp->need_future_gp[(c + 1) & 0x1]; + trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup"); + return needmore; +} + +/* * If there is room, assign a ->completed number to any callbacks on * this CPU that have not already been assigned. Also accelerate any * callbacks that were previously assigned a ->completed number that has @@ -1312,9 +1427,9 @@ static int rcu_gp_init(struct rcu_state *rsp) rdp = this_cpu_ptr(rsp->rda); rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; - rnp->gpnum = rsp->gpnum; + ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; WARN_ON_ONCE(rnp->completed != rsp->completed); - rnp->completed = rsp->completed; + ACCESS_ONCE(rnp->completed) = rsp->completed; if (rnp == rdp->mynode) rcu_start_gp_per_cpu(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); @@ -1395,11 +1510,11 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) */ rcu_for_each_node_breadth_first(rsp, rnp) { raw_spin_lock_irq(&rnp->lock); - rnp->completed = rsp->gpnum; + ACCESS_ONCE(rnp->completed) = rsp->gpnum; rdp = this_cpu_ptr(rsp->rda); if (rnp == rdp->mynode) __rcu_process_gp_end(rsp, rnp, rdp); - nocb += rcu_nocb_gp_cleanup(rsp, rnp); + nocb += rcu_future_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); cond_resched(); } |