diff options
author | Paul E. McKenney <paulmck@kernel.org> | 2022-03-09 02:45:33 +0300 |
---|---|---|
committer | Paul E. McKenney <paulmck@kernel.org> | 2022-05-03 20:20:57 +0300 |
commit | 282d8998e9979c2186af7f7d22366f2fc3149838 (patch) | |
tree | a89b3241fcd01074c023062015e24de1440f5066 /kernel/rcu | |
parent | c2445d38785086422e56dcbe049b73a53b2ba81f (diff) | |
download | linux-282d8998e9979c2186af7f7d22366f2fc3149838.tar.xz |
srcu: Prevent expedited GPs and blocking readers from consuming CPU
If an SRCU reader blocks while a synchronize_srcu_expedited() waits for
that same reader, then that grace period will spawn an endless series of
workqueue handlers, consuming a full CPU. This quickly gets pointless
because consuming more CPU isn't going to make that reader get done
faster, especially if it is blocked waiting for an external event.
This commit therefore spawns at most one pair of back-to-back workqueue
handlers per expedited grace period phase, instead inserting increasing
delays as that grace period phase grows older, but capped at 10 jiffies.
In any case, if there have been at least 100 back-to-back workqueue
handlers within a single jiffy, regardless of grace period or grace-period
phase, then a one-jiffy delay is inserted.
[ paulmck: Apply feedback from kernel test robot. ]
Cc: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Reported-by: Song Liu <song@kernel.org>
Tested-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Diffstat (limited to 'kernel/rcu')
-rw-r--r-- | kernel/rcu/srcutree.c | 44 |
1 files changed, 38 insertions, 6 deletions
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 862008c147b0..6dd44e759f12 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -511,7 +511,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp) return sum; } -#define SRCU_INTERVAL 1 +#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending. +#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers. +#define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive no-delay instances. +#define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay instances. /* * Return grace-period delay, zero if there are expedited grace @@ -519,9 +522,18 @@ static bool srcu_readers_active(struct srcu_struct *ssp) */ static unsigned long srcu_get_delay(struct srcu_struct *ssp) { + unsigned long jbase = SRCU_INTERVAL; + if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) - return 0; - return SRCU_INTERVAL; + jbase = 0; + if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) + jbase += jiffies - READ_ONCE(ssp->srcu_gp_start); + if (!jbase) { + WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); + if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE) + jbase = 1; + } + return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase; } /** @@ -623,6 +635,8 @@ static void srcu_gp_start(struct srcu_struct *ssp) (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, rcu_seq_snap(&ssp->srcu_gp_seq)); spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ + WRITE_ONCE(ssp->srcu_gp_start, jiffies); + WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0); smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ rcu_seq_start(&ssp->srcu_gp_seq); state = rcu_seq_state(ssp->srcu_gp_seq); @@ -706,7 +720,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) spin_lock_irq_rcu_node(ssp); idx = rcu_seq_state(ssp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); - cbdelay = srcu_get_delay(ssp); + cbdelay = !!srcu_get_delay(ssp); WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns()); rcu_seq_end(&ssp->srcu_gp_seq); gpseq = rcu_seq_current(&ssp->srcu_gp_seq); @@ -893,7 +907,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, // the one boot CPU running with interrupts still disabled. if (likely(srcu_init_done)) queue_delayed_work(rcu_gp_wq, &ssp->work, - srcu_get_delay(ssp)); + !!srcu_get_delay(ssp)); else if (list_empty(&ssp->work.work.entry)) list_add(&ssp->work.work.entry, &srcu_boot_list); } @@ -1448,6 +1462,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) srcu_flip(ssp); spin_lock_irq_rcu_node(ssp); rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2); + ssp->srcu_n_exp_nodelay = 0; spin_unlock_irq_rcu_node(ssp); } @@ -1462,6 +1477,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) mutex_unlock(&ssp->srcu_gp_mutex); return; /* readers present, retry later. */ } + ssp->srcu_n_exp_nodelay = 0; srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */ } } @@ -1552,12 +1568,28 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) */ static void process_srcu(struct work_struct *work) { + unsigned long curdelay; + unsigned long j; struct srcu_struct *ssp; ssp = container_of(work, struct srcu_struct, work.work); srcu_advance_state(ssp); - srcu_reschedule(ssp, srcu_get_delay(ssp)); + curdelay = srcu_get_delay(ssp); + if (curdelay) { + WRITE_ONCE(ssp->reschedule_count, 0); + } else { + j = jiffies; + if (READ_ONCE(ssp->reschedule_jiffies) == j) { + WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1); + if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY) + curdelay = 1; + } else { + WRITE_ONCE(ssp->reschedule_count, 1); + WRITE_ONCE(ssp->reschedule_jiffies, j); + } + } + srcu_reschedule(ssp, curdelay); } void srcutorture_get_gp_data(enum rcutorture_type test_type, |