From c7e88067c1ae89e7bcbed070fb2c4e30bc39b51f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Apr 2017 16:01:46 -0700 Subject: srcu: Exact tracking of srcu_data structures containing callbacks The current Tree SRCU implementation schedules a workqueue for every srcu_data covered by a given leaf srcu_node structure having callbacks, even if only one of those srcu_data structures actually contains callbacks. This is clearly inefficient for workloads that don't feature callbacks everywhere all the time. This commit therefore adds an array of masks that are used by the leaf srcu_node structures to track exactly which srcu_data structures contain callbacks. Signed-off-by: Paul E. McKenney Tested-by: Mike Galbraith --- include/linux/srcutree.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 0400e211aa44..94515ff226fb 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -47,6 +47,8 @@ struct srcu_data { struct delayed_work work; /* Context for CB invoking. */ struct rcu_head srcu_barrier_head; /* For srcu_barrier() use. */ struct srcu_node *mynode; /* Leaf srcu_node. */ + unsigned long grpmask; /* Mask for leaf srcu_node */ + /* ->srcu_data_have_cbs[]. */ int cpu; struct srcu_struct *sp; }; @@ -59,6 +61,8 @@ struct srcu_node { unsigned long srcu_have_cbs[4]; /* GP seq for children */ /* having CBs, but only */ /* is > ->srcu_gq_seq. */ + unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs */ + /* have CBs for given GP? */ struct srcu_node *srcu_parent; /* Next up in tree. */ int grplo; /* Least CPU for node. */ int grphi; /* Biggest CPU for node. */ -- cgit v1.2.3 From 7f6733c3c648ddd6cf459c1b80ad388a95452955 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Apr 2017 17:17:35 -0700 Subject: srcu: Make rcutorture writer stalls print SRCU GP state In the past, SRCU was simple enough that there was little point in making the rcutorture writer stall messages print the SRCU grace-period number state. With the advent of Tree SRCU, this has changed. This commit therefore makes Classic, Tiny, and Tree SRCU report this state to rcutorture as needed. Signed-off-by: Paul E. McKenney Tested-by: Mike Galbraith --- include/linux/srcuclassic.h | 14 ++++++++++++++ include/linux/srcutiny.h | 12 ++++++++++++ include/linux/srcutree.h | 4 ++++ kernel/rcu/rcutorture.c | 8 +++++--- kernel/rcu/srcutree.c | 13 +++++++++++++ kernel/rcu/tree.c | 12 ++++-------- 6 files changed, 52 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h index 41cf99930f34..5753f7322262 100644 --- a/include/linux/srcuclassic.h +++ b/include/linux/srcuclassic.h @@ -98,4 +98,18 @@ void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); unsigned long srcu_batches_completed(struct srcu_struct *sp); +static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, + unsigned long *completed) +{ + if (test_type != SRCU_FLAVOR) + return; + *flags = 0; + *completed = sp->completed; + *gpnum = *completed; + if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head) + (*gpnum)++; +} + #endif diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 4f284e4f4d8c..42311ee0334f 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -78,4 +78,16 @@ static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) return 0; } +static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, + unsigned long *completed) +{ + if (test_type != SRCU_FLAVOR) + return; + *flags = 0; + *completed = sp->srcu_gp_seq; + *gpnum = *completed; +} + #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 94515ff226fb..3865717df124 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -140,4 +140,8 @@ void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); unsigned long srcu_batches_completed(struct srcu_struct *sp); +void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, unsigned long *completed); + #endif diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index e9d4527cdd43..ae6e574d4cf5 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1360,12 +1360,14 @@ rcu_torture_stats_print(void) cur_ops->stats(); if (rtcv_snap == rcu_torture_current_version && rcu_torture_current != NULL) { - int __maybe_unused flags; - unsigned long __maybe_unused gpnum; - unsigned long __maybe_unused completed; + int __maybe_unused flags = 0; + unsigned long __maybe_unused gpnum = 0; + unsigned long __maybe_unused completed = 0; rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed); + srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, + &flags, &gpnum, &completed); wtp = READ_ONCE(writer_task); pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n", rcu_torture_writer_state_getname(), diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 1c2c1004b3b1..72b6cce5f591 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1011,3 +1011,16 @@ void process_srcu(struct work_struct *work) srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL); } EXPORT_SYMBOL_GPL(process_srcu); + +void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, + unsigned long *completed) +{ + if (test_type != SRCU_FLAVOR) + return; + *flags = 0; + *completed = rcu_seq_ctr(sp->srcu_gp_seq); + *gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed); +} +EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 23aa02587d0f..91fff49d5869 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -704,15 +704,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, default: break; } - if (rsp != NULL) { - *flags = READ_ONCE(rsp->gp_flags); - *gpnum = READ_ONCE(rsp->gpnum); - *completed = READ_ONCE(rsp->completed); + if (rsp == NULL) return; - } - *flags = 0; - *gpnum = 0; - *completed = 0; + *flags = READ_ONCE(rsp->gp_flags); + *gpnum = READ_ONCE(rsp->gpnum); + *completed = READ_ONCE(rsp->completed); } EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); -- cgit v1.2.3 From 1e9a038b7fe9a8c10ef1238f4e695d5fbe0dd594 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 24 Apr 2017 16:02:09 -0700 Subject: srcu: Expedited grace periods with reduced memory contention Commit f60d231a87c5 ("srcu: Crude control of expedited grace periods") introduced a per-srcu_struct atomic counter to track outstanding requests for grace periods. This works, but represents a memory-contention bottleneck. This commit therefore uses the srcu_node combining tree to remove this bottleneck. This commit adds new ->srcu_gp_seq_needed_exp fields to the srcu_data, srcu_node, and srcu_struct structures, which track the farthest-in-the-future grace period that must be expedited, which in turn requires that all nearer-term grace periods also be expedited. Requests for expediting start with the srcu_data structure, run up through the srcu_node tree, and end at the srcu_struct structure. Note that it may be necessary to expedite a grace period that just now started, and this is handled by a new srcu_funnel_exp_start() function, which is invoked when the grace period itself is already in its way, but when that grace period was not marked as expedited. A new srcu_get_delay() function returns zero if there is at least one expedited SRCU grace period in flight, or SRCU_INTERVAL otherwise. This function is used to calculate delays: Normal grace periods are allowed to extend in order to cover more requests with a given grace-period computation, which decreases per-request overhead. Signed-off-by: Paul E. McKenney Tested-by: Mike Galbraith --- include/linux/srcutree.h | 4 +- kernel/rcu/srcutree.c | 135 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 98 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 3865717df124..86df48d3e97b 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -43,6 +43,7 @@ struct srcu_data { spinlock_t lock ____cacheline_internodealigned_in_smp; struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ + unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ bool srcu_cblist_invoking; /* Invoking these CBs? */ struct delayed_work work; /* Context for CB invoking. */ struct rcu_head srcu_barrier_head; /* For srcu_barrier() use. */ @@ -63,6 +64,7 @@ struct srcu_node { /* is > ->srcu_gq_seq. */ unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs */ /* have CBs for given GP? */ + unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ struct srcu_node *srcu_parent; /* Next up in tree. */ int grplo; /* Least CPU for node. */ int grphi; /* Biggest CPU for node. */ @@ -81,7 +83,7 @@ struct srcu_struct { unsigned int srcu_idx; /* Current rdr array element. */ unsigned long srcu_gp_seq; /* Grace-period seq #. */ unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ - atomic_t srcu_exp_cnt; /* # ongoing expedited GPs. */ + unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 72b6cce5f591..4b98e6f45166 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -72,6 +72,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) snp->srcu_have_cbs[i] = 0; snp->srcu_data_have_cbs[i] = 0; } + snp->srcu_gp_seq_needed_exp = 0; snp->grplo = -1; snp->grphi = -1; if (snp == &sp->node[0]) { @@ -102,6 +103,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; + sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq; sdp->mynode = &snp_first[cpu / levelspread[level]]; for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { if (snp->grplo < 0) @@ -135,7 +137,6 @@ static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static) mutex_init(&sp->srcu_gp_mutex); sp->srcu_idx = 0; sp->srcu_gp_seq = 0; - atomic_set(&sp->srcu_exp_cnt, 0); sp->srcu_barrier_seq = 0; mutex_init(&sp->srcu_barrier_mutex); atomic_set(&sp->srcu_barrier_cpu_cnt, 0); @@ -143,6 +144,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static) if (!is_static) sp->sda = alloc_percpu(struct srcu_data); init_srcu_struct_nodes(sp, is_static); + sp->srcu_gp_seq_needed_exp = 0; smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */ return sp->sda ? 0 : -ENOMEM; } @@ -307,6 +309,18 @@ static bool srcu_readers_active(struct srcu_struct *sp) #define SRCU_INTERVAL 1 +/* + * Return grace-period delay, zero if there are expedited grace + * periods pending, SRCU_INTERVAL otherwise. + */ +static unsigned long srcu_get_delay(struct srcu_struct *sp) +{ + if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq), + READ_ONCE(sp->srcu_gp_seq_needed_exp))) + return 0; + return SRCU_INTERVAL; +} + /** * cleanup_srcu_struct - deconstruct a sleep-RCU structure * @sp: structure to clean up. @@ -318,7 +332,8 @@ void cleanup_srcu_struct(struct srcu_struct *sp) { int cpu; - WARN_ON_ONCE(atomic_read(&sp->srcu_exp_cnt)); + if (WARN_ON(!srcu_get_delay(sp))) + return; /* Leakage unless caller handles error. */ if (WARN_ON(srcu_readers_active(sp))) return; /* Leakage unless caller handles error. */ flush_delayed_work(&sp->work); @@ -444,15 +459,14 @@ static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) * schedule this invocation on the corresponding CPUs. */ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, - unsigned long mask) + unsigned long mask, unsigned long delay) { int cpu; for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { if (!(mask & (1 << (cpu - snp->grplo)))) continue; - srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), - atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL); + srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay); } } @@ -467,6 +481,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, */ static void srcu_gp_end(struct srcu_struct *sp) { + unsigned long cbdelay; bool cbs; unsigned long gpseq; int idx; @@ -481,8 +496,11 @@ static void srcu_gp_end(struct srcu_struct *sp) spin_lock_irq(&sp->gp_lock); idx = rcu_seq_state(sp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); + cbdelay = srcu_get_delay(sp); rcu_seq_end(&sp->srcu_gp_seq); gpseq = rcu_seq_current(&sp->srcu_gp_seq); + if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) + sp->srcu_gp_seq_needed_exp = gpseq; spin_unlock_irq(&sp->gp_lock); mutex_unlock(&sp->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ @@ -497,12 +515,14 @@ static void srcu_gp_end(struct srcu_struct *sp) cbs = snp->srcu_have_cbs[idx] == gpseq; snp->srcu_have_cbs[idx] = gpseq; rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1); + if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq)) + snp->srcu_gp_seq_needed_exp = gpseq; mask = snp->srcu_data_have_cbs[idx]; snp->srcu_data_have_cbs[idx] = 0; spin_unlock_irq(&snp->lock); if (cbs) { smp_mb(); /* GP end before CB invocation. */ - srcu_schedule_cbs_snp(sp, snp, mask); + srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); } } @@ -517,15 +537,43 @@ static void srcu_gp_end(struct srcu_struct *sp) srcu_gp_start(sp); spin_unlock_irq(&sp->gp_lock); /* Throttle expedited grace periods: Should be rare! */ - srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) && - rcu_seq_ctr(gpseq) & 0xf - ? 0 - : SRCU_INTERVAL); + srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff + ? 0 : SRCU_INTERVAL); } else { spin_unlock_irq(&sp->gp_lock); } } +/* + * Funnel-locking scheme to scalably mediate many concurrent expedited + * grace-period requests. This function is invoked for the first known + * expedited request for a grace period that has already been requested, + * but without expediting. To start a completely new grace period, + * whether expedited or not, use srcu_funnel_gp_start() instead. + */ +static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, + unsigned long s) +{ + unsigned long flags; + + for (; snp != NULL; snp = snp->srcu_parent) { + if (rcu_seq_done(&sp->srcu_gp_seq, s) || + ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) + return; + spin_lock_irqsave(&snp->lock, flags); + if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { + spin_unlock_irqrestore(&snp->lock, flags); + return; + } + WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); + spin_unlock_irqrestore(&snp->lock, flags); + } + spin_lock_irqsave(&sp->gp_lock, flags); + if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) + sp->srcu_gp_seq_needed_exp = s; + spin_unlock_irqrestore(&sp->gp_lock, flags); +} + /* * Funnel-locking scheme to scalably mediate many concurrent grace-period * requests. The winner has to do the work of actually starting grace @@ -533,9 +581,8 @@ static void srcu_gp_end(struct srcu_struct *sp) * number is recorded on at least their leaf srcu_node structure, or they * must take steps to invoke their own callbacks. */ -static void srcu_funnel_gp_start(struct srcu_struct *sp, - struct srcu_data *sdp, - unsigned long s) +static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, + unsigned long s, bool do_norm) { unsigned long flags; int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs); @@ -554,13 +601,20 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, spin_unlock_irqrestore(&snp->lock, flags); if (snp == sdp->mynode && snp_seq != s) { smp_mb(); /* CBs after GP! */ - srcu_schedule_cbs_sdp(sdp, 0); + srcu_schedule_cbs_sdp(sdp, do_norm + ? SRCU_INTERVAL + : 0); + return; } + if (!do_norm) + srcu_funnel_exp_start(sp, snp, s); return; } snp->srcu_have_cbs[idx] = s; if (snp == sdp->mynode) snp->srcu_data_have_cbs[idx] |= sdp->grpmask; + if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) + snp->srcu_gp_seq_needed_exp = s; spin_unlock_irqrestore(&snp->lock, flags); } @@ -573,6 +627,8 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, */ smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/ } + if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) + sp->srcu_gp_seq_needed_exp = s; /* If grace period not already done and none in progress, start it. */ if (!rcu_seq_done(&sp->srcu_gp_seq, s) && @@ -580,9 +636,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); srcu_gp_start(sp); queue_delayed_work(system_power_efficient_wq, &sp->work, - atomic_read(&sp->srcu_exp_cnt) - ? 0 - : SRCU_INTERVAL); + srcu_get_delay(sp)); } spin_unlock_irqrestore(&sp->gp_lock, flags); } @@ -597,7 +651,7 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) for (;;) { if (srcu_readers_active_idx_check(sp, idx)) return true; - if (--trycount + !!atomic_read(&sp->srcu_exp_cnt) <= 0) + if (--trycount + !srcu_get_delay(sp) <= 0) return false; udelay(SRCU_RETRY_CHECK_DELAY); } @@ -650,10 +704,11 @@ static void srcu_flip(struct srcu_struct *sp) * srcu_read_lock(), and srcu_read_unlock() that are all passed the same * srcu_struct structure. */ -void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, - rcu_callback_t func) +void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, + rcu_callback_t func, bool do_norm) { unsigned long flags; + bool needexp = false; bool needgp = false; unsigned long s; struct srcu_data *sdp; @@ -672,16 +727,28 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, sdp->srcu_gp_seq_needed = s; needgp = true; } + if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) { + sdp->srcu_gp_seq_needed_exp = s; + needexp = true; + } spin_unlock_irqrestore(&sdp->lock, flags); if (needgp) - srcu_funnel_gp_start(sp, sdp, s); + srcu_funnel_gp_start(sp, sdp, s, do_norm); + else if (needexp) + srcu_funnel_exp_start(sp, sdp->mynode, s); +} + +void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, + rcu_callback_t func) +{ + __call_srcu(sp, rhp, func, true); } EXPORT_SYMBOL_GPL(call_srcu); /* * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). */ -static void __synchronize_srcu(struct srcu_struct *sp) +static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) { struct rcu_synchronize rcu; @@ -697,7 +764,7 @@ static void __synchronize_srcu(struct srcu_struct *sp) check_init_srcu_struct(sp); init_completion(&rcu.completion); init_rcu_head_on_stack(&rcu.head); - call_srcu(sp, &rcu.head, wakeme_after_rcu); + __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm); wait_for_completion(&rcu.completion); destroy_rcu_head_on_stack(&rcu.head); } @@ -714,18 +781,7 @@ static void __synchronize_srcu(struct srcu_struct *sp) */ void synchronize_srcu_expedited(struct srcu_struct *sp) { - bool do_norm = rcu_gp_is_normal(); - - check_init_srcu_struct(sp); - if (!do_norm) { - atomic_inc(&sp->srcu_exp_cnt); - smp_mb__after_atomic(); /* increment before GP. */ - } - __synchronize_srcu(sp); - if (!do_norm) { - smp_mb__before_atomic(); /* GP before decrement. */ - WARN_ON_ONCE(atomic_dec_return(&sp->srcu_exp_cnt) < 0); - } + __synchronize_srcu(sp, rcu_gp_is_normal()); } EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); @@ -773,7 +829,7 @@ void synchronize_srcu(struct srcu_struct *sp) if (rcu_gp_is_expedited()) synchronize_srcu_expedited(sp); else - __synchronize_srcu(sp); + __synchronize_srcu(sp, true); } EXPORT_SYMBOL_GPL(synchronize_srcu); @@ -1008,14 +1064,13 @@ void process_srcu(struct work_struct *work) sp = container_of(work, struct srcu_struct, work.work); srcu_advance_state(sp); - srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL); + srcu_reschedule(sp, srcu_get_delay(sp)); } EXPORT_SYMBOL_GPL(process_srcu); void srcutorture_get_gp_data(enum rcutorture_type test_type, - struct srcu_struct *sp, int *flags, - unsigned long *gpnum, - unsigned long *completed) + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, unsigned long *completed) { if (test_type != SRCU_FLAVOR) return; -- cgit v1.2.3 From 22607d66bbc3e81140d3bcf08894f4378eb36428 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 Apr 2017 14:03:11 -0700 Subject: srcu: Specify auto-expedite holdoff time On small systems, in the absence of readers, expedited SRCU grace periods can complete in less than a microsecond. This means that an eight-CPU system can have all CPUs doing synchronize_srcu() in a tight loop and almost always expedite. This might actually be desirable in some situations, but in general it is a good way to needlessly burn CPU cycles. And in those situations where it is desirable, your friend is the function synchronize_srcu_expedited(). For other situations, this commit adds a kernel parameter that specifies a holdoff between completing the last SRCU grace period and auto-expediting the next. If the next grace period starts before the holdoff expires, auto-expediting is disabled. The holdoff is 50 microseconds by default, and can be tuned to the desired number of nanoseconds. A value of zero disables auto-expediting. Signed-off-by: Paul E. McKenney Tested-by: Mike Galbraith --- Documentation/admin-guide/kernel-parameters.txt | 8 ++++++++ include/linux/srcutree.h | 1 + kernel/rcu/srcutree.c | 18 +++++++++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index facc20a3f962..4a4b9266c4de 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3779,6 +3779,14 @@ spia_pedr= spia_peddr= + srcutree.exp_holdoff [KNL] + Specifies how many nanoseconds must elapse + since the end of the last SRCU grace period for + a given srcu_struct until the next normal SRCU + grace period will be considered for automatic + expediting. Set to zero to disable automatic + expediting. + stacktrace [FTRACE] Enabled the stack tracer on boot up. diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 86df48d3e97b..32e86d85fd11 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -84,6 +84,7 @@ struct srcu_struct { unsigned long srcu_gp_seq; /* Grace-period seq #. */ unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ + unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */ struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 2286e06fd159..74c283f9d15e 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -34,10 +34,14 @@ #include #include #include +#include #include #include "rcu.h" +ulong exp_holdoff = 50 * 1000; /* Holdoff (ns) for auto-expediting. */ +module_param(exp_holdoff, ulong, 0444); + static void srcu_invoke_callbacks(struct work_struct *work); static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); @@ -145,6 +149,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static) sp->sda = alloc_percpu(struct srcu_data); init_srcu_struct_nodes(sp, is_static); sp->srcu_gp_seq_needed_exp = 0; + sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */ return sp->sda ? 0 : -ENOMEM; } @@ -498,6 +503,7 @@ static void srcu_gp_end(struct srcu_struct *sp) idx = rcu_seq_state(sp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); cbdelay = srcu_get_delay(sp); + sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); rcu_seq_end(&sp->srcu_gp_seq); gpseq = rcu_seq_current(&sp->srcu_gp_seq); if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) @@ -700,9 +706,10 @@ static void srcu_flip(struct srcu_struct *sp) */ static bool srcu_might_be_idle(struct srcu_struct *sp) { + unsigned long curseq; unsigned long flags; struct srcu_data *sdp; - unsigned long curseq; + unsigned long t; /* If the local srcu_data structure has callbacks, not idle. */ local_irq_save(flags); @@ -718,6 +725,15 @@ static bool srcu_might_be_idle(struct srcu_struct *sp) * Exact information would require acquiring locks, which would * kill scalability, hence the probabalistic nature of the probe. */ + + /* First, see if enough time has passed since the last GP. */ + t = ktime_get_mono_fast_ns(); + if (exp_holdoff == 0 || + time_in_range_open(t, sp->srcu_last_gp_end, + sp->srcu_last_gp_end + exp_holdoff)) + return false; /* Too soon after last GP. */ + + /* Next, check for probable idleness. */ curseq = rcu_seq_current(&sp->srcu_gp_seq); smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */ if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed))) -- cgit v1.2.3