diff options
Diffstat (limited to 'kernel/rcu')
-rw-r--r-- | kernel/rcu/rcutorture.c | 217 | ||||
-rw-r--r-- | kernel/rcu/tiny_plugin.h | 8 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 331 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 11 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 144 | ||||
-rw-r--r-- | kernel/rcu/update.c | 30 |
6 files changed, 481 insertions, 260 deletions
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index bd30bc61bc05..7fa34f86e5ba 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -58,9 +58,11 @@ torture_param(int, fqs_duration, 0, "Duration of fqs bursts (us), 0 to disable"); torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)"); torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)"); +torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); torture_param(bool, gp_normal, false, "Use normal (non-expedited) GP wait primitives"); +torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives"); torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers"); torture_param(int, n_barrier_cbs, 0, "# of callbacks/kthreads for barrier testing"); @@ -138,6 +140,18 @@ static long n_barrier_attempts; static long n_barrier_successes; static struct list_head rcu_torture_removed; +static int rcu_torture_writer_state; +#define RTWS_FIXED_DELAY 0 +#define RTWS_DELAY 1 +#define RTWS_REPLACE 2 +#define RTWS_DEF_FREE 3 +#define RTWS_EXP_SYNC 4 +#define RTWS_COND_GET 5 +#define RTWS_COND_SYNC 6 +#define RTWS_SYNC 7 +#define RTWS_STUTTER 8 +#define RTWS_STOPPING 9 + #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) #define RCUTORTURE_RUNNABLE_INIT 1 #else @@ -214,6 +228,7 @@ rcu_torture_free(struct rcu_torture *p) */ struct rcu_torture_ops { + int ttype; void (*init)(void); int (*readlock)(void); void (*read_delay)(struct torture_random_state *rrsp); @@ -222,6 +237,8 @@ struct rcu_torture_ops { void (*deferred_free)(struct rcu_torture *p); void (*sync)(void); void (*exp_sync)(void); + unsigned long (*get_state)(void); + void (*cond_sync)(unsigned long oldstate); void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); void (*cb_barrier)(void); void (*fqs)(void); @@ -273,10 +290,48 @@ static int rcu_torture_completed(void) return rcu_batches_completed(); } +/* + * Update callback in the pipe. This should be invoked after a grace period. + */ +static bool +rcu_torture_pipe_update_one(struct rcu_torture *rp) +{ + int i; + + i = rp->rtort_pipe_count; + if (i > RCU_TORTURE_PIPE_LEN) + i = RCU_TORTURE_PIPE_LEN; + atomic_inc(&rcu_torture_wcount[i]); + if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { + rp->rtort_mbtest = 0; + return true; + } + return false; +} + +/* + * Update all callbacks in the pipe. Suitable for synchronous grace-period + * primitives. + */ +static void +rcu_torture_pipe_update(struct rcu_torture *old_rp) +{ + struct rcu_torture *rp; + struct rcu_torture *rp1; + + if (old_rp) + list_add(&old_rp->rtort_free, &rcu_torture_removed); + list_for_each_entry_safe(rp, rp1, &rcu_torture_removed, rtort_free) { + if (rcu_torture_pipe_update_one(rp)) { + list_del(&rp->rtort_free); + rcu_torture_free(rp); + } + } +} + static void rcu_torture_cb(struct rcu_head *p) { - int i; struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu); if (torture_must_stop_irq()) { @@ -284,16 +339,10 @@ rcu_torture_cb(struct rcu_head *p) /* The next initialization will pick up the pieces. */ return; } - i = rp->rtort_pipe_count; - if (i > RCU_TORTURE_PIPE_LEN) - i = RCU_TORTURE_PIPE_LEN; - atomic_inc(&rcu_torture_wcount[i]); - if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { - rp->rtort_mbtest = 0; + if (rcu_torture_pipe_update_one(rp)) rcu_torture_free(rp); - } else { + else cur_ops->deferred_free(rp); - } } static int rcu_no_completed(void) @@ -312,6 +361,7 @@ static void rcu_sync_torture_init(void) } static struct rcu_torture_ops rcu_ops = { + .ttype = RCU_FLAVOR, .init = rcu_sync_torture_init, .readlock = rcu_torture_read_lock, .read_delay = rcu_read_delay, @@ -320,6 +370,8 @@ static struct rcu_torture_ops rcu_ops = { .deferred_free = rcu_torture_deferred_free, .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, + .get_state = get_state_synchronize_rcu, + .cond_sync = cond_synchronize_rcu, .call = call_rcu, .cb_barrier = rcu_barrier, .fqs = rcu_force_quiescent_state, @@ -355,6 +407,7 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p) } static struct rcu_torture_ops rcu_bh_ops = { + .ttype = RCU_BH_FLAVOR, .init = rcu_sync_torture_init, .readlock = rcu_bh_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ @@ -397,6 +450,7 @@ call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } static struct rcu_torture_ops rcu_busted_ops = { + .ttype = INVALID_RCU_FLAVOR, .init = rcu_sync_torture_init, .readlock = rcu_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ @@ -479,9 +533,11 @@ static void srcu_torture_stats(char *page) page += sprintf(page, "%s%s per-CPU(idx=%d):", torture_type, TORTURE_FLAG, idx); for_each_possible_cpu(cpu) { - page += sprintf(page, " %d(%lu,%lu)", cpu, - per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], - per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); + long c0, c1; + + c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx]; + c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]; + page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1); } sprintf(page, "\n"); } @@ -492,6 +548,7 @@ static void srcu_torture_synchronize_expedited(void) } static struct rcu_torture_ops srcu_ops = { + .ttype = SRCU_FLAVOR, .init = rcu_sync_torture_init, .readlock = srcu_torture_read_lock, .read_delay = srcu_read_delay, @@ -527,6 +584,7 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p) } static struct rcu_torture_ops sched_ops = { + .ttype = RCU_SCHED_FLAVOR, .init = rcu_sync_torture_init, .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ @@ -688,23 +746,59 @@ rcu_torture_fqs(void *arg) static int rcu_torture_writer(void *arg) { - bool exp; + unsigned long gp_snap; + bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal; + bool gp_sync1 = gp_sync; int i; struct rcu_torture *rp; - struct rcu_torture *rp1; struct rcu_torture *old_rp; static DEFINE_TORTURE_RANDOM(rand); + int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC, + RTWS_COND_GET, RTWS_SYNC }; + int nsynctypes = 0; VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); - set_user_nice(current, MAX_NICE); + + /* Initialize synctype[] array. If none set, take default. */ + if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync) + gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true; + if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) + synctype[nsynctypes++] = RTWS_COND_GET; + else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync)) + pr_alert("rcu_torture_writer: gp_cond without primitives.\n"); + if (gp_exp1 && cur_ops->exp_sync) + synctype[nsynctypes++] = RTWS_EXP_SYNC; + else if (gp_exp && !cur_ops->exp_sync) + pr_alert("rcu_torture_writer: gp_exp without primitives.\n"); + if (gp_normal1 && cur_ops->deferred_free) + synctype[nsynctypes++] = RTWS_DEF_FREE; + else if (gp_normal && !cur_ops->deferred_free) + pr_alert("rcu_torture_writer: gp_normal without primitives.\n"); + if (gp_sync1 && cur_ops->sync) + synctype[nsynctypes++] = RTWS_SYNC; + else if (gp_sync && !cur_ops->sync) + pr_alert("rcu_torture_writer: gp_sync without primitives.\n"); + if (WARN_ONCE(nsynctypes == 0, + "rcu_torture_writer: No update-side primitives.\n")) { + /* + * No updates primitives, so don't try updating. + * The resulting test won't be testing much, hence the + * above WARN_ONCE(). + */ + rcu_torture_writer_state = RTWS_STOPPING; + torture_kthread_stopping("rcu_torture_writer"); + } do { + rcu_torture_writer_state = RTWS_FIXED_DELAY; schedule_timeout_uninterruptible(1); rp = rcu_torture_alloc(); if (rp == NULL) continue; rp->rtort_pipe_count = 0; + rcu_torture_writer_state = RTWS_DELAY; udelay(torture_random(&rand) & 0x3ff); + rcu_torture_writer_state = RTWS_REPLACE; old_rp = rcu_dereference_check(rcu_torture_current, current == writer_task); rp->rtort_mbtest = 1; @@ -716,35 +810,42 @@ rcu_torture_writer(void *arg) i = RCU_TORTURE_PIPE_LEN; atomic_inc(&rcu_torture_wcount[i]); old_rp->rtort_pipe_count++; - if (gp_normal == gp_exp) - exp = !!(torture_random(&rand) & 0x80); - else - exp = gp_exp; - if (!exp) { + switch (synctype[torture_random(&rand) % nsynctypes]) { + case RTWS_DEF_FREE: + rcu_torture_writer_state = RTWS_DEF_FREE; cur_ops->deferred_free(old_rp); - } else { + break; + case RTWS_EXP_SYNC: + rcu_torture_writer_state = RTWS_EXP_SYNC; cur_ops->exp_sync(); - list_add(&old_rp->rtort_free, - &rcu_torture_removed); - list_for_each_entry_safe(rp, rp1, - &rcu_torture_removed, - rtort_free) { - i = rp->rtort_pipe_count; - if (i > RCU_TORTURE_PIPE_LEN) - i = RCU_TORTURE_PIPE_LEN; - atomic_inc(&rcu_torture_wcount[i]); - if (++rp->rtort_pipe_count >= - RCU_TORTURE_PIPE_LEN) { - rp->rtort_mbtest = 0; - list_del(&rp->rtort_free); - rcu_torture_free(rp); - } - } + rcu_torture_pipe_update(old_rp); + break; + case RTWS_COND_GET: + rcu_torture_writer_state = RTWS_COND_GET; + gp_snap = cur_ops->get_state(); + i = torture_random(&rand) % 16; + if (i != 0) + schedule_timeout_interruptible(i); + udelay(torture_random(&rand) % 1000); + rcu_torture_writer_state = RTWS_COND_SYNC; + cur_ops->cond_sync(gp_snap); + rcu_torture_pipe_update(old_rp); + break; + case RTWS_SYNC: + rcu_torture_writer_state = RTWS_SYNC; + cur_ops->sync(); + rcu_torture_pipe_update(old_rp); + break; + default: + WARN_ON_ONCE(1); + break; } } rcutorture_record_progress(++rcu_torture_current_version); + rcu_torture_writer_state = RTWS_STUTTER; stutter_wait("rcu_torture_writer"); } while (!torture_must_stop()); + rcu_torture_writer_state = RTWS_STOPPING; torture_kthread_stopping("rcu_torture_writer"); return 0; } @@ -784,7 +885,7 @@ rcu_torture_fakewriter(void *arg) return 0; } -void rcutorture_trace_dump(void) +static void rcutorture_trace_dump(void) { static atomic_t beenhere = ATOMIC_INIT(0); @@ -918,11 +1019,13 @@ rcu_torture_reader(void *arg) __this_cpu_inc(rcu_torture_batch[completed]); preempt_enable(); cur_ops->readunlock(idx); - schedule(); + cond_resched(); stutter_wait("rcu_torture_reader"); } while (!torture_must_stop()); - if (irqreader && cur_ops->irq_capable) + if (irqreader && cur_ops->irq_capable) { del_timer_sync(&t); + destroy_timer_on_stack(&t); + } torture_kthread_stopping("rcu_torture_reader"); return 0; } @@ -937,6 +1040,7 @@ rcu_torture_printk(char *page) int i; long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; + static unsigned long rtcv_snap = ULONG_MAX; for_each_possible_cpu(cpu) { for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { @@ -997,6 +1101,22 @@ rcu_torture_printk(char *page) page += sprintf(page, "\n"); if (cur_ops->stats) cur_ops->stats(page); + if (rtcv_snap == rcu_torture_current_version && + rcu_torture_current != NULL) { + int __maybe_unused flags; + unsigned long __maybe_unused gpnum; + unsigned long __maybe_unused completed; + + rcutorture_get_gp_data(cur_ops->ttype, + &flags, &gpnum, &completed); + page += sprintf(page, + "??? Writer stall state %d g%lu c%lu f%#x\n", + rcu_torture_writer_state, + gpnum, completed, flags); + show_rcu_gp_kthreads(); + rcutorture_trace_dump(); + } + rtcv_snap = rcu_torture_current_version; } /* @@ -1146,7 +1266,7 @@ static int __init rcu_torture_stall_init(void) } /* Callback function for RCU barrier testing. */ -void rcu_torture_barrier_cbf(struct rcu_head *rcu) +static void rcu_torture_barrier_cbf(struct rcu_head *rcu) { atomic_inc(&barrier_cbs_invoked); } @@ -1416,7 +1536,8 @@ rcu_torture_init(void) &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, }; - torture_init_begin(torture_type, verbose, &rcutorture_runnable); + if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable)) + return -EBUSY; /* Process args and tell the world that the torturer is on the job. */ for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { @@ -1441,10 +1562,13 @@ rcu_torture_init(void) if (cur_ops->init) cur_ops->init(); /* no "goto unwind" prior to this point!!! */ - if (nreaders >= 0) + if (nreaders >= 0) { nrealreaders = nreaders; - else - nrealreaders = 2 * num_online_cpus(); + } else { + nrealreaders = num_online_cpus() - 1; + if (nrealreaders <= 0) + nrealreaders = 1; + } rcu_torture_print_module_parms(cur_ops, "Start of test"); /* Set up the freelist. */ @@ -1533,7 +1657,8 @@ rcu_torture_init(void) fqs_duration = 0; if (fqs_duration) { /* Create the fqs thread */ - torture_create_kthread(rcu_torture_fqs, NULL, fqs_task); + firsterr = torture_create_kthread(rcu_torture_fqs, NULL, + fqs_task); if (firsterr) goto unwind; } diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 431528520562..858c56569127 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -144,7 +144,7 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp) return; rcp->ticks_this_gp++; j = jiffies; - js = rcp->jiffies_stall; + js = ACCESS_ONCE(rcp->jiffies_stall); if (*rcp->curtail && ULONG_CMP_GE(j, js)) { pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting, @@ -152,17 +152,17 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp) dump_stack(); } if (*rcp->curtail && ULONG_CMP_GE(j, js)) - rcp->jiffies_stall = jiffies + + ACCESS_ONCE(rcp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; else if (ULONG_CMP_GE(j, js)) - rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check(); + ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); } static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) { rcp->ticks_this_gp = 0; rcp->gp_start = jiffies; - rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check(); + ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); } static void check_cpu_stalls(void) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0c47e300210a..f1ba77363fbb 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -101,7 +101,7 @@ DEFINE_PER_CPU(struct rcu_data, sname##_data) RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); -static struct rcu_state *rcu_state; +static struct rcu_state *rcu_state_p; LIST_HEAD(rcu_struct_flavors); /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ @@ -243,7 +243,7 @@ static ulong jiffies_till_next_fqs = ULONG_MAX; module_param(jiffies_till_first_fqs, ulong, 0644); module_param(jiffies_till_next_fqs, ulong, 0644); -static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, +static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp); static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp, bool *isidle, @@ -271,6 +271,15 @@ long rcu_batches_completed_bh(void) EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); /* + * Force a quiescent state. + */ +void rcu_force_quiescent_state(void) +{ + force_quiescent_state(rcu_state_p); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + +/* * Force a quiescent state for RCU BH. */ void rcu_bh_force_quiescent_state(void) @@ -280,6 +289,21 @@ void rcu_bh_force_quiescent_state(void) EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); /* + * Show the state of the grace-period kthreads. + */ +void show_rcu_gp_kthreads(void) +{ + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) { + pr_info("%s: wait state: %d ->state: %#lx\n", + rsp->name, rsp->gp_state, rsp->gp_kthread->state); + /* sched_show_task(rsp->gp_kthread); */ + } +} +EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); + +/* * Record the number of times rcutorture tests have been initiated and * terminated. This information allows the debugfs tracing stats to be * correlated to the rcutorture messages, even when the rcutorture module @@ -294,6 +318,39 @@ void rcutorture_record_test_transition(void) EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); /* + * Send along grace-period-related data for rcutorture diagnostics. + */ +void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, + unsigned long *gpnum, unsigned long *completed) +{ + struct rcu_state *rsp = NULL; + + switch (test_type) { + case RCU_FLAVOR: + rsp = rcu_state_p; + break; + case RCU_BH_FLAVOR: + rsp = &rcu_bh_state; + break; + case RCU_SCHED_FLAVOR: + rsp = &rcu_sched_state; + break; + default: + break; + } + if (rsp != NULL) { + *flags = ACCESS_ONCE(rsp->gp_flags); + *gpnum = ACCESS_ONCE(rsp->gpnum); + *completed = ACCESS_ONCE(rsp->completed); + return; + } + *flags = 0; + *gpnum = 0; + *completed = 0; +} +EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); + +/* * Record the number of writer passes through the current rcutorture test. * This is also used to correlate debugfs tracing stats with the rcutorture * messages. @@ -324,6 +381,28 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) } /* + * Return the root node of the specified rcu_state structure. + */ +static struct rcu_node *rcu_get_root(struct rcu_state *rsp) +{ + return &rsp->node[0]; +} + +/* + * Is there any need for future grace periods? + * Interrupts must be disabled. If the caller does not hold the root + * rnp_node structure's ->lock, the results are advisory only. + */ +static int rcu_future_needs_gp(struct rcu_state *rsp) +{ + struct rcu_node *rnp = rcu_get_root(rsp); + int idx = (ACCESS_ONCE(rnp->completed) + 1) & 0x1; + int *fp = &rnp->need_future_gp[idx]; + + return ACCESS_ONCE(*fp); +} + +/* * Does the current CPU require a not-yet-started grace period? * The caller must have disabled interrupts to prevent races with * normal callback registry. @@ -335,7 +414,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) if (rcu_gp_in_progress(rsp)) return 0; /* No, a grace period is already in progress. */ - if (rcu_nocb_needs_gp(rsp)) + if (rcu_future_needs_gp(rsp)) return 1; /* Yes, a no-CBs CPU needs one. */ if (!rdp->nxttail[RCU_NEXT_TAIL]) return 0; /* No, this is a no-CBs (or offline) CPU. */ @@ -350,14 +429,6 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) } /* - * Return the root node of the specified rcu_state structure. - */ -static struct rcu_node *rcu_get_root(struct rcu_state *rsp) -{ - return &rsp->node[0]; -} - -/* * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state * * If the new value of the ->dynticks_nesting counter now is zero, @@ -387,9 +458,9 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, } rcu_prepare_for_idle(smp_processor_id()); /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ - smp_mb__before_atomic_inc(); /* See above. */ + smp_mb__before_atomic(); /* See above. */ atomic_inc(&rdtp->dynticks); - smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ + smp_mb__after_atomic(); /* Force ordering with next sojourn. */ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); /* @@ -507,10 +578,10 @@ void rcu_irq_exit(void) static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, int user) { - smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ + smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ atomic_inc(&rdtp->dynticks); /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ - smp_mb__after_atomic_inc(); /* See above. */ + smp_mb__after_atomic(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); rcu_cleanup_after_idle(smp_processor_id()); trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); @@ -635,10 +706,10 @@ void rcu_nmi_enter(void) (atomic_read(&rdtp->dynticks) & 0x1)) return; rdtp->dynticks_nmi_nesting++; - smp_mb__before_atomic_inc(); /* Force delay from prior write. */ + smp_mb__before_atomic(); /* Force delay from prior write. */ atomic_inc(&rdtp->dynticks); /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ - smp_mb__after_atomic_inc(); /* See above. */ + smp_mb__after_atomic(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); } @@ -657,9 +728,9 @@ void rcu_nmi_exit(void) --rdtp->dynticks_nmi_nesting != 0) return; /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ - smp_mb__before_atomic_inc(); /* See above. */ + smp_mb__before_atomic(); /* See above. */ atomic_inc(&rdtp->dynticks); - smp_mb__after_atomic_inc(); /* Force delay to next write. */ + smp_mb__after_atomic(); /* Force delay to next write. */ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); } @@ -758,7 +829,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp, { rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); rcu_sysidle_check_cpu(rdp, isidle, maxj); - return (rdp->dynticks_snap & 0x1) == 0; + if ((rdp->dynticks_snap & 0x1) == 0) { + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); + return 1; + } else { + return 0; + } } /* @@ -834,7 +910,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, * we will beat on the first one until it gets unstuck, then move * to the next. Only do this for the primary flavor of RCU. */ - if (rdp->rsp == rcu_state && + if (rdp->rsp == rcu_state_p && ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { rdp->rsp->jiffies_resched += 5; resched_cpu(rdp->cpu); @@ -851,7 +927,7 @@ static void record_gp_stall_check_time(struct rcu_state *rsp) rsp->gp_start = j; smp_wmb(); /* Record start time before stall time. */ j1 = rcu_jiffies_till_stall_check(); - rsp->jiffies_stall = j + j1; + ACCESS_ONCE(rsp->jiffies_stall) = j + j1; rsp->jiffies_resched = j + j1 / 2; } @@ -890,12 +966,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp) /* Only let one CPU complain about others per time interval. */ raw_spin_lock_irqsave(&rnp->lock, flags); - delta = jiffies - rsp->jiffies_stall; + delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall); if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; + ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; raw_spin_unlock_irqrestore(&rnp->lock, flags); /* @@ -932,9 +1008,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp) print_cpu_stall_info_end(); for_each_possible_cpu(cpu) totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; - pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n", + pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start), - rsp->gpnum, rsp->completed, totqlen); + (long)rsp->gpnum, (long)rsp->completed, totqlen); if (ndetected == 0) pr_err("INFO: Stall ended before state dump start\n"); else if (!trigger_all_cpu_backtrace()) @@ -947,12 +1023,6 @@ static void print_other_cpu_stall(struct rcu_state *rsp) force_quiescent_state(rsp); /* Kick them all. */ } -/* - * This function really isn't for public consumption, but RCU is special in - * that context switches can allow the state machine to make progress. - */ -extern void resched_cpu(int cpu); - static void print_cpu_stall(struct rcu_state *rsp) { int cpu; @@ -971,14 +1041,15 @@ static void print_cpu_stall(struct rcu_state *rsp) print_cpu_stall_info_end(); for_each_possible_cpu(cpu) totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; - pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n", - jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen); + pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", + jiffies - rsp->gp_start, + (long)rsp->gpnum, (long)rsp->completed, totqlen); if (!trigger_all_cpu_backtrace()) dump_stack(); raw_spin_lock_irqsave(&rnp->lock, flags); - if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) - rsp->jiffies_stall = jiffies + + if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall))) + ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; raw_spin_unlock_irqrestore(&rnp->lock, flags); @@ -1062,7 +1133,7 @@ void rcu_cpu_stall_reset(void) struct rcu_state *rsp; for_each_rcu_flavor(rsp) - rsp->jiffies_stall = jiffies + ULONG_MAX / 2; + ACCESS_ONCE(rsp->jiffies_stall) = jiffies + ULONG_MAX / 2; } /* @@ -1123,15 +1194,18 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, /* * Start some future grace period, as needed to handle newly arrived * callbacks. The required future grace periods are recorded in each - * rcu_node structure's ->need_future_gp field. + * rcu_node structure's ->need_future_gp field. Returns true if there + * is reason to awaken the grace-period kthread. * * The caller must hold the specified rcu_node structure's ->lock. */ -static unsigned long __maybe_unused -rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) +static bool __maybe_unused +rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, + unsigned long *c_out) { unsigned long c; int i; + bool ret = false; struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); /* @@ -1142,7 +1216,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf")); if (rnp->need_future_gp[c & 0x1]) { trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf")); - return c; + goto out; } /* @@ -1156,7 +1230,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) { rnp->need_future_gp[c & 0x1]++; trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf")); - return c; + goto out; } /* @@ -1197,12 +1271,15 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot")); } else { trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot")); - rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); + ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); } unlock_out: if (rnp != rnp_root) raw_spin_unlock(&rnp_root->lock); - return c; +out: + if (c_out != NULL) + *c_out = c; + return ret; } /* @@ -1226,25 +1303,43 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) } /* + * Awaken the grace-period kthread for the specified flavor of RCU. + * Don't do a self-awaken, and don't bother awakening when there is + * nothing for the grace-period kthread to do (as in several CPUs + * raced to awaken, and we lost), and finally don't try to awaken + * a kthread that has not yet been created. + */ +static void rcu_gp_kthread_wake(struct rcu_state *rsp) +{ + if (current == rsp->gp_kthread || + !ACCESS_ONCE(rsp->gp_flags) || + !rsp->gp_kthread) + return; + wake_up(&rsp->gp_wq); +} + +/* * If there is room, assign a ->completed number to any callbacks on * this CPU that have not already been assigned. Also accelerate any * callbacks that were previously assigned a ->completed number that has * since proven to be too conservative, which can happen if callbacks get * assigned a ->completed number while RCU is idle, but with reference to * a non-root rcu_node structure. This function is idempotent, so it does - * not hurt to call it repeatedly. + * not hurt to call it repeatedly. Returns an flag saying that we should + * awaken the RCU grace-period kthread. * * The caller must hold rnp->lock with interrupts disabled. */ -static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, +static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { unsigned long c; int i; + bool ret; /* If the CPU has no callbacks, nothing to do. */ if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) - return; + return false; /* * Starting from the sublist containing the callbacks most @@ -1273,7 +1368,7 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, * be grouped into. */ if (++i >= RCU_NEXT_TAIL) - return; + return false; /* * Assign all subsequent callbacks' ->completed number to the next @@ -1285,13 +1380,14 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, rdp->nxtcompleted[i] = c; } /* Record any needed additional grace periods. */ - rcu_start_future_gp(rnp, rdp); + ret = rcu_start_future_gp(rnp, rdp, NULL); /* Trace depending on how much we were able to accelerate. */ if (!*rdp->nxttail[RCU_WAIT_TAIL]) trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB")); else trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB")); + return ret; } /* @@ -1300,17 +1396,18 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL * sublist. This function is idempotent, so it does not hurt to * invoke it repeatedly. As long as it is not invoked -too- often... + * Returns true if the RCU grace-period kthread needs to be awakened. * * The caller must hold rnp->lock with interrupts disabled. */ -static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, +static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { int i, j; /* If the CPU has no callbacks, nothing to do. */ if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) - return; + return false; /* * Find all callbacks whose ->completed numbers indicate that they @@ -1334,26 +1431,30 @@ static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, } /* Classify any remaining callbacks. */ - rcu_accelerate_cbs(rsp, rnp, rdp); + return rcu_accelerate_cbs(rsp, rnp, rdp); } /* * Update CPU-local rcu_data state to record the beginnings and ends of * grace periods. The caller must hold the ->lock of the leaf rcu_node * structure corresponding to the current CPU, and must have irqs disabled. + * Returns true if the grace-period kthread needs to be awakened. */ -static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, + struct rcu_data *rdp) { + bool ret; + /* Handle the ends of any preceding grace periods first. */ if (rdp->completed == rnp->completed) { /* No grace period end, so just accelerate recent callbacks. */ - rcu_accelerate_cbs(rsp, rnp, rdp); + ret = rcu_accelerate_cbs(rsp, rnp, rdp); } else { /* Advance callbacks. */ - rcu_advance_cbs(rsp, rnp, rdp); + ret = rcu_advance_cbs(rsp, rnp, rdp); /* Remember that we saw this grace-period completion. */ rdp->completed = rnp->completed; @@ -1372,11 +1473,13 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); zero_cpu_stall_ticks(rdp); } + return ret; } static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; + bool needwake; struct rcu_node *rnp; local_irq_save(flags); @@ -1388,8 +1491,10 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) return; } smp_mb__after_unlock_lock(); - __note_gp_changes(rsp, rnp, rdp); + needwake = __note_gp_changes(rsp, rnp, rdp); raw_spin_unlock_irqrestore(&rnp->lock, flags); + if (needwake) + rcu_gp_kthread_wake(rsp); } /* @@ -1403,12 +1508,12 @@ static int rcu_gp_init(struct rcu_state *rsp) rcu_bind_gp_kthread(); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); - if (rsp->gp_flags == 0) { + if (!ACCESS_ONCE(rsp->gp_flags)) { /* Spurious wakeup, tell caller to go back to sleep. */ raw_spin_unlock_irq(&rnp->lock); return 0; } - rsp->gp_flags = 0; /* Clear all flags: New grace period. */ + ACCESS_ONCE(rsp->gp_flags) = 0; /* Clear all flags: New grace period. */ if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { /* @@ -1453,7 +1558,7 @@ static int rcu_gp_init(struct rcu_state *rsp) WARN_ON_ONCE(rnp->completed != rsp->completed); ACCESS_ONCE(rnp->completed) = rsp->completed; if (rnp == rdp->mynode) - __note_gp_changes(rsp, rnp, rdp); + (void)__note_gp_changes(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); trace_rcu_grace_period_init(rsp->name, rnp->gpnum, rnp->level, rnp->grplo, @@ -1501,7 +1606,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); - rsp->gp_flags &= ~RCU_GP_FLAG_FQS; + ACCESS_ONCE(rsp->gp_flags) &= ~RCU_GP_FLAG_FQS; raw_spin_unlock_irq(&rnp->lock); } return fqs_state; @@ -1513,6 +1618,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) static void rcu_gp_cleanup(struct rcu_state *rsp) { unsigned long gp_duration; + bool needgp = false; int nocb = 0; struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); @@ -1548,7 +1654,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) ACCESS_ONCE(rnp->completed) = rsp->gpnum; rdp = this_cpu_ptr(rsp->rda); if (rnp == rdp->mynode) - __note_gp_changes(rsp, rnp, rdp); + needgp = __note_gp_changes(rsp, rnp, rdp) || needgp; /* smp_mb() provided by prior unlock-lock pair. */ nocb += rcu_future_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); @@ -1564,9 +1670,10 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); rsp->fqs_state = RCU_GP_IDLE; rdp = this_cpu_ptr(rsp->rda); - rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ - if (cpu_needs_another_gp(rsp, rdp)) { - rsp->gp_flags = RCU_GP_FLAG_INIT; + /* Advance CBs to reduce false positives below. */ + needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp; + if (needgp || cpu_needs_another_gp(rsp, rdp)) { + ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT; trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("newreq")); @@ -1593,6 +1700,7 @@ static int __noreturn rcu_gp_kthread(void *arg) trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("reqwait")); + rsp->gp_state = RCU_GP_WAIT_GPS; wait_event_interruptible(rsp->gp_wq, ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_INIT); @@ -1620,6 +1728,7 @@ static int __noreturn rcu_gp_kthread(void *arg) trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("fqswait")); + rsp->gp_state = RCU_GP_WAIT_FQS; ret = wait_event_interruptible_timeout(rsp->gp_wq, ((gf = ACCESS_ONCE(rsp->gp_flags)) & RCU_GP_FLAG_FQS) || @@ -1665,14 +1774,6 @@ static int __noreturn rcu_gp_kthread(void *arg) } } -static void rsp_wakeup(struct irq_work *work) -{ - struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work); - - /* Wake up rcu_gp_kthread() to start the grace period. */ - wake_up(&rsp->gp_wq); -} - /* * Start a new RCU grace period if warranted, re-initializing the hierarchy * in preparation for detecting the next grace period. The caller must hold @@ -1681,8 +1782,10 @@ static void rsp_wakeup(struct irq_work *work) * Note that it is legal for a dying CPU (which is marked as offline) to * invoke this function. This can happen when the dying CPU reports its * quiescent state. + * + * Returns true if the grace-period kthread must be awakened. */ -static void +static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { @@ -1693,20 +1796,18 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, * or a grace period is already in progress. * Either way, don't start a new grace period. */ - return; + return false; } - rsp->gp_flags = RCU_GP_FLAG_INIT; + ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT; trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("newreq")); /* * We can't do wakeups while holding the rnp->lock, as that * could cause possible deadlocks with the rq->lock. Defer - * the wakeup to interrupt context. And don't bother waking - * up the running kthread. + * the wakeup to our caller. */ - if (current != rsp->gp_kthread) - irq_work_queue(&rsp->wakeup_work); + return true; } /* @@ -1715,12 +1816,14 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, * is invoked indirectly from rcu_advance_cbs(), which would result in * endless recursion -- or would do so if it wasn't for the self-deadlock * that is encountered beforehand. + * + * Returns true if the grace-period kthread needs to be awakened. */ -static void -rcu_start_gp(struct rcu_state *rsp) +static bool rcu_start_gp(struct rcu_state *rsp) { struct rcu_data *rdp = this_cpu_ptr(rsp->rda); struct rcu_node *rnp = rcu_get_root(rsp); + bool ret = false; /* * If there is no grace period in progress right now, any @@ -1730,8 +1833,9 @@ rcu_start_gp(struct rcu_state *rsp) * resulting in pointless grace periods. So, advance callbacks * then start the grace period! */ - rcu_advance_cbs(rsp, rnp, rdp); - rcu_start_gp_advanced(rsp, rnp, rdp); + ret = rcu_advance_cbs(rsp, rnp, rdp) || ret; + ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret; + return ret; } /* @@ -1820,6 +1924,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; unsigned long mask; + bool needwake; struct rcu_node *rnp; rnp = rdp->mynode; @@ -1848,9 +1953,11 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) * This GP can't end until cpu checks in, so all of our * callbacks can be processed during the next GP. */ - rcu_accelerate_cbs(rsp, rnp, rdp); + needwake = rcu_accelerate_cbs(rsp, rnp, rdp); rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ + if (needwake) + rcu_gp_kthread_wake(rsp); } } @@ -1951,7 +2058,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) { int i; - struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); + struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); /* No-CBs CPUs are handled specially. */ if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) @@ -2320,7 +2427,7 @@ static void force_quiescent_state(struct rcu_state *rsp) raw_spin_unlock_irqrestore(&rnp_old->lock, flags); return; /* Someone beat us to it. */ } - rsp->gp_flags |= RCU_GP_FLAG_FQS; + ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS; raw_spin_unlock_irqrestore(&rnp_old->lock, flags); wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ } @@ -2334,7 +2441,8 @@ static void __rcu_process_callbacks(struct rcu_state *rsp) { unsigned long flags; - struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); + bool needwake; + struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); WARN_ON_ONCE(rdp->beenonline == 0); @@ -2345,8 +2453,10 @@ __rcu_process_callbacks(struct rcu_state *rsp) local_irq_save(flags); if (cpu_needs_another_gp(rsp, rdp)) { raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ - rcu_start_gp(rsp); + needwake = rcu_start_gp(rsp); raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); + if (needwake) + rcu_gp_kthread_wake(rsp); } else { local_irq_restore(flags); } @@ -2404,6 +2514,8 @@ static void invoke_rcu_core(void) static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, struct rcu_head *head, unsigned long flags) { + bool needwake; + /* * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. @@ -2433,8 +2545,10 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, raw_spin_lock(&rnp_root->lock); smp_mb__after_unlock_lock(); - rcu_start_gp(rsp); + needwake = rcu_start_gp(rsp); raw_spin_unlock(&rnp_root->lock); + if (needwake) + rcu_gp_kthread_wake(rsp); } else { /* Give the grace period a kick. */ rdp->blimit = LONG_MAX; @@ -2537,6 +2651,20 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) EXPORT_SYMBOL_GPL(call_rcu_bh); /* + * Queue an RCU callback for lazy invocation after a grace period. + * This will likely be later named something like "call_rcu_lazy()", + * but this change will require some way of tagging the lazy RCU + * callbacks in the list of pending callbacks. Until then, this + * function may only be called from __kfree_rcu(). + */ +void kfree_call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, rcu_state_p, -1, 1); +} +EXPORT_SYMBOL_GPL(kfree_call_rcu); + +/* * Because a context switch is a grace period for RCU-sched and RCU-bh, * any blocking grace-period wait automatically implies a grace period * if there is only one CPU online at any point time during execution @@ -2659,7 +2787,7 @@ unsigned long get_state_synchronize_rcu(void) * time-consuming work between get_state_synchronize_rcu() * and cond_synchronize_rcu(). */ - return smp_load_acquire(&rcu_state->gpnum); + return smp_load_acquire(&rcu_state_p->gpnum); } EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); @@ -2685,7 +2813,7 @@ void cond_synchronize_rcu(unsigned long oldstate) * Ensure that this load happens before any RCU-destructive * actions the caller might carry out after we return. */ - newstate = smp_load_acquire(&rcu_state->completed); + newstate = smp_load_acquire(&rcu_state_p->completed); if (ULONG_CMP_GE(oldstate, newstate)) synchronize_rcu(); } @@ -2790,7 +2918,7 @@ void synchronize_sched_expedited(void) s = atomic_long_read(&rsp->expedited_done); if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) { /* ensure test happens before caller kfree */ - smp_mb__before_atomic_inc(); /* ^^^ */ + smp_mb__before_atomic(); /* ^^^ */ atomic_long_inc(&rsp->expedited_workdone1); return; } @@ -2808,7 +2936,7 @@ void synchronize_sched_expedited(void) s = atomic_long_read(&rsp->expedited_done); if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) { /* ensure test happens before caller kfree */ - smp_mb__before_atomic_inc(); /* ^^^ */ + smp_mb__before_atomic(); /* ^^^ */ atomic_long_inc(&rsp->expedited_workdone2); return; } @@ -2837,7 +2965,7 @@ void synchronize_sched_expedited(void) s = atomic_long_read(&rsp->expedited_done); if (ULONG_CMP_GE((ulong)s, (ulong)snap)) { /* ensure test happens before caller kfree */ - smp_mb__before_atomic_inc(); /* ^^^ */ + smp_mb__before_atomic(); /* ^^^ */ atomic_long_inc(&rsp->expedited_done_lost); break; } @@ -2988,7 +3116,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp) static void rcu_barrier_func(void *type) { struct rcu_state *rsp = type; - struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); + struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); atomic_inc(&rsp->barrier_cpu_count); @@ -3160,7 +3288,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) * that this CPU cannot possibly have any RCU callbacks in flight yet. */ static void -rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) +rcu_init_percpu_data(int cpu, struct rcu_state *rsp) { unsigned long flags; unsigned long mask; @@ -3173,7 +3301,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave(&rnp->lock, flags); rdp->beenonline = 1; /* We have now been online. */ - rdp->preemptible = preemptible; rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; @@ -3217,8 +3344,7 @@ static void rcu_prepare_cpu(int cpu) struct rcu_state *rsp; for_each_rcu_flavor(rsp) - rcu_init_percpu_data(cpu, rsp, - strcmp(rsp->name, "rcu_preempt") == 0); + rcu_init_percpu_data(cpu, rsp); } /* @@ -3228,7 +3354,7 @@ static int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); + struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); struct rcu_node *rnp = rdp->mynode; struct rcu_state *rsp; @@ -3402,8 +3528,8 @@ static void __init rcu_init_one(struct rcu_state *rsp, rnp->qsmaskinit = 0; rnp->grplo = j * cpustride; rnp->grphi = (j + 1) * cpustride - 1; - if (rnp->grphi >= NR_CPUS) - rnp->grphi = NR_CPUS - 1; + if (rnp->grphi >= nr_cpu_ids) + rnp->grphi = nr_cpu_ids - 1; if (i == 0) { rnp->grpnum = 0; rnp->grpmask = 0; @@ -3422,7 +3548,6 @@ static void __init rcu_init_one(struct rcu_state *rsp, rsp->rda = rda; init_waitqueue_head(&rsp->gp_wq); - init_irq_work(&rsp->wakeup_work, rsp_wakeup); rnp = rsp->level[rcu_num_lvls - 1]; for_each_possible_cpu(i) { while (i > rnp->grphi) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 75dc3c39a02a..bf2c1e669691 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -252,7 +252,6 @@ struct rcu_data { bool passed_quiesce; /* User-mode/idle loop etc. */ bool qs_pending; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ - bool preemptible; /* Preemptible RCU? */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ unsigned long grpmask; /* Mask to apply to leaf qsmask. */ #ifdef CONFIG_RCU_CPU_STALL_INFO @@ -406,7 +405,8 @@ struct rcu_state { unsigned long completed; /* # of last completed gp. */ struct task_struct *gp_kthread; /* Task for grace periods. */ wait_queue_head_t gp_wq; /* Where GP task waits. */ - int gp_flags; /* Commands for GP task. */ + short gp_flags; /* Commands for GP task. */ + short gp_state; /* GP kthread sleep state. */ /* End of fields guarded by root rcu_node's lock. */ @@ -462,13 +462,17 @@ struct rcu_state { const char *name; /* Name of structure. */ char abbr; /* Abbreviated name. */ struct list_head flavors; /* List of RCU flavors. */ - struct irq_work wakeup_work; /* Postponed wakeups */ }; /* Values for rcu_state structure's gp_flags field. */ #define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */ #define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */ +/* Values for rcu_state structure's gp_flags field. */ +#define RCU_GP_WAIT_INIT 0 /* Initial state. */ +#define RCU_GP_WAIT_GPS 1 /* Wait for grace-period start. */ +#define RCU_GP_WAIT_FQS 2 /* Wait for force-quiescent-state time. */ + extern struct list_head rcu_struct_flavors; /* Sequence through rcu_state structures for each RCU flavor. */ @@ -547,7 +551,6 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); static void print_cpu_stall_info_end(void); static void zero_cpu_stall_ticks(struct rcu_data *rdp); static void increment_cpu_stall_ticks(void); -static int rcu_nocb_needs_gp(struct rcu_state *rsp); static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); static void rcu_init_one_nocb(struct rcu_node *rnp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 962d1d589929..cbc2c45265e2 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -116,7 +116,7 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_TREE_PREEMPT_RCU RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); -static struct rcu_state *rcu_state = &rcu_preempt_state; +static struct rcu_state *rcu_state_p = &rcu_preempt_state; static int rcu_preempted_readers_exp(struct rcu_node *rnp); @@ -149,15 +149,6 @@ long rcu_batches_completed(void) EXPORT_SYMBOL_GPL(rcu_batches_completed); /* - * Force a quiescent state for preemptible RCU. - */ -void rcu_force_quiescent_state(void) -{ - force_quiescent_state(&rcu_preempt_state); -} -EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); - -/* * Record a preemptible-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is * not in a quiescent state. There might be any number of tasks blocked @@ -688,20 +679,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu); -/* - * Queue an RCU callback for lazy invocation after a grace period. - * This will likely be later named something like "call_rcu_lazy()", - * but this change will require some way of tagging the lazy RCU - * callbacks in the list of pending callbacks. Until then, this - * function may only be called from __kfree_rcu(). - */ -void kfree_call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) -{ - __call_rcu(head, func, &rcu_preempt_state, -1, 1); -} -EXPORT_SYMBOL_GPL(kfree_call_rcu); - /** * synchronize_rcu - wait until a grace period has elapsed. * @@ -970,7 +947,7 @@ void exit_rcu(void) #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -static struct rcu_state *rcu_state = &rcu_sched_state; +static struct rcu_state *rcu_state_p = &rcu_sched_state; /* * Tell them what RCU they are running. @@ -991,16 +968,6 @@ long rcu_batches_completed(void) EXPORT_SYMBOL_GPL(rcu_batches_completed); /* - * Force a quiescent state for RCU, which, because there is no preemptible - * RCU, becomes the same as rcu-sched. - */ -void rcu_force_quiescent_state(void) -{ - rcu_sched_force_quiescent_state(); -} -EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); - -/* * Because preemptible RCU does not exist, we never have to check for * CPUs being in quiescent states. */ @@ -1080,22 +1047,6 @@ static void rcu_preempt_check_callbacks(int cpu) } /* - * Queue an RCU callback for lazy invocation after a grace period. - * This will likely be later named something like "call_rcu_lazy()", - * but this change will require some way of tagging the lazy RCU - * callbacks in the list of pending callbacks. Until then, this - * function may only be called from __kfree_rcu(). - * - * Because there is no preemptible RCU, we use RCU-sched instead. - */ -void kfree_call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) -{ - __call_rcu(head, func, &rcu_sched_state, -1, 1); -} -EXPORT_SYMBOL_GPL(kfree_call_rcu); - -/* * Wait for an rcu-preempt grace period, but make it happen quickly. * But because preemptible RCU does not exist, map to rcu-sched. */ @@ -1517,11 +1468,11 @@ static int __init rcu_spawn_kthreads(void) for_each_possible_cpu(cpu) per_cpu(rcu_cpu_has_work, cpu) = 0; BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); - rnp = rcu_get_root(rcu_state); - (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); + rnp = rcu_get_root(rcu_state_p); + (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state, rnp) - (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); + rcu_for_each_leaf_node(rcu_state_p, rnp) + (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); } return 0; } @@ -1529,12 +1480,12 @@ early_initcall(rcu_spawn_kthreads); static void rcu_prepare_kthreads(int cpu) { - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); + struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); struct rcu_node *rnp = rdp->mynode; /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ if (rcu_scheduler_fully_active) - (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); + (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); } #else /* #ifdef CONFIG_RCU_BOOST */ @@ -1744,6 +1695,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj) static void rcu_prepare_for_idle(int cpu) { #ifndef CONFIG_RCU_NOCB_CPU_ALL + bool needwake; struct rcu_data *rdp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); struct rcu_node *rnp; @@ -1792,8 +1744,10 @@ static void rcu_prepare_for_idle(int cpu) rnp = rdp->mynode; raw_spin_lock(&rnp->lock); /* irqs already disabled. */ smp_mb__after_unlock_lock(); - rcu_accelerate_cbs(rsp, rnp, rdp); + needwake = rcu_accelerate_cbs(rsp, rnp, rdp); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + if (needwake) + rcu_gp_kthread_wake(rsp); } #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } @@ -1855,7 +1809,7 @@ static void rcu_oom_notify_cpu(void *unused) struct rcu_data *rdp; for_each_rcu_flavor(rsp) { - rdp = __this_cpu_ptr(rsp->rda); + rdp = raw_cpu_ptr(rsp->rda); if (rdp->qlen_lazy != 0) { atomic_inc(&oom_callback_count); rsp->call(&rdp->oom_head, rcu_oom_callback); @@ -1997,7 +1951,7 @@ static void increment_cpu_stall_ticks(void) struct rcu_state *rsp; for_each_rcu_flavor(rsp) - __this_cpu_ptr(rsp->rda)->ticks_this_gp++; + raw_cpu_inc(rsp->rda->ticks_this_gp); } #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ @@ -2068,19 +2022,6 @@ static int __init parse_rcu_nocb_poll(char *arg) early_param("rcu_nocb_poll", parse_rcu_nocb_poll); /* - * Do any no-CBs CPUs need another grace period? - * - * Interrupts must be disabled. If the caller does not hold the root - * rnp_node structure's ->lock, the results are advisory only. - */ -static int rcu_nocb_needs_gp(struct rcu_state *rsp) -{ - struct rcu_node *rnp = rcu_get_root(rsp); - - return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1]; -} - -/* * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended * grace period. */ @@ -2109,7 +2050,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) } #ifndef CONFIG_RCU_NOCB_CPU_ALL -/* Is the specified CPU a no-CPUs CPU? */ +/* Is the specified CPU a no-CBs CPU? */ bool rcu_is_nocb_cpu(int cpu) { if (have_rcu_nocb_mask) @@ -2243,12 +2184,15 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) unsigned long c; bool d; unsigned long flags; + bool needwake; struct rcu_node *rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - c = rcu_start_future_gp(rnp, rdp); + needwake = rcu_start_future_gp(rnp, rdp, &c); raw_spin_unlock_irqrestore(&rnp->lock, flags); + if (needwake) + rcu_gp_kthread_wake(rdp->rsp); /* * Wait for the grace period. Do so interruptibly to avoid messing @@ -2402,11 +2346,6 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) #else /* #ifdef CONFIG_RCU_NOCB_CPU */ -static int rcu_nocb_needs_gp(struct rcu_state *rsp) -{ - return 0; -} - static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { } @@ -2523,9 +2462,9 @@ static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq) /* Record start of fully idle period. */ j = jiffies; ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j; - smp_mb__before_atomic_inc(); + smp_mb__before_atomic(); atomic_inc(&rdtp->dynticks_idle); - smp_mb__after_atomic_inc(); + smp_mb__after_atomic(); WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1); } @@ -2590,9 +2529,9 @@ static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq) } /* Record end of idle period. */ - smp_mb__before_atomic_inc(); + smp_mb__before_atomic(); atomic_inc(&rdtp->dynticks_idle); - smp_mb__after_atomic_inc(); + smp_mb__after_atomic(); WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1)); /* @@ -2657,20 +2596,6 @@ static bool is_sysidle_rcu_state(struct rcu_state *rsp) } /* - * Bind the grace-period kthread for the sysidle flavor of RCU to the - * timekeeping CPU. - */ -static void rcu_bind_gp_kthread(void) -{ - int cpu = ACCESS_ONCE(tick_do_timer_cpu); - - if (cpu < 0 || cpu >= nr_cpu_ids) - return; - if (raw_smp_processor_id() != cpu) - set_cpus_allowed_ptr(current, cpumask_of(cpu)); -} - -/* * Return a delay in jiffies based on the number of CPUs, rcu_node * leaf fanout, and jiffies tick rate. The idea is to allow larger * systems more time to transition to full-idle state in order to @@ -2734,7 +2659,8 @@ static void rcu_sysidle(unsigned long j) static void rcu_sysidle_cancel(void) { smp_mb(); - ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT; + if (full_sysidle_state > RCU_SYSIDLE_SHORT) + ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT; } /* @@ -2880,10 +2806,6 @@ static bool is_sysidle_rcu_state(struct rcu_state *rsp) return false; } -static void rcu_bind_gp_kthread(void) -{ -} - static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, unsigned long maxj) { @@ -2914,3 +2836,19 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp) #endif /* #ifdef CONFIG_NO_HZ_FULL */ return 0; } + +/* + * Bind the grace-period kthread for the sysidle flavor of RCU to the + * timekeeping CPU. + */ +static void rcu_bind_gp_kthread(void) +{ +#ifdef CONFIG_NO_HZ_FULL + int cpu = ACCESS_ONCE(tick_do_timer_cpu); + + if (cpu < 0 || cpu >= nr_cpu_ids) + return; + if (raw_smp_processor_id() != cpu) + set_cpus_allowed_ptr(current, cpumask_of(cpu)); +#endif /* #ifdef CONFIG_NO_HZ_FULL */ +} diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 4c0a9b0af469..a2aeb4df0f60 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -320,6 +320,18 @@ int rcu_jiffies_till_stall_check(void) return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; } +void rcu_sysrq_start(void) +{ + if (!rcu_cpu_stall_suppress) + rcu_cpu_stall_suppress = 2; +} + +void rcu_sysrq_end(void) +{ + if (rcu_cpu_stall_suppress == 2) + rcu_cpu_stall_suppress = 0; +} + static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) { rcu_cpu_stall_suppress = 1; @@ -338,3 +350,21 @@ static int __init check_cpu_stall_init(void) early_initcall(check_cpu_stall_init); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ + +/* + * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings. + */ + +DEFINE_PER_CPU(int, rcu_cond_resched_count); + +/* + * Report a set of RCU quiescent states, for use by cond_resched() + * and friends. Out of line due to being called infrequently. + */ +void rcu_resched(void) +{ + preempt_disable(); + __this_cpu_write(rcu_cond_resched_count, 0); + rcu_note_context_switch(smp_processor_id()); + preempt_enable(); +} |