diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2019-12-18 12:40:57 +0300 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2019-12-18 20:38:56 +0300 |
commit | 01f624f01845a1685b6070bfa6be9a13ed03b712 (patch) | |
tree | 42a417468c03a48428af7a0a9d03bdb521b5b083 /drivers/gpu/drm/i915 | |
parent | 54400257ae523fa7fff11fe4209e7f9dcafdefa0 (diff) | |
download | linux-01f624f01845a1685b6070bfa6be9a13ed03b712.tar.xz |
drm/i915: Ratelimit i915_globals_park
When doing our global park, we like to be a good citizen and shrink our
slab caches (of which we have quite a few now), but each
kmem_cache_shrink() incurs a stop_machine() and so ends up being quite
expensive, causing machine-wide stalls. While ideally we would like to
throw away unused pages in our slab caches whenever it appears that we
are idling, doing so will require a much cheaper mechanism. In the
meantime use a delayed worked to impose a rate-limit that means we have
to have been idle for more than 2 seconds before we start shrinking.
References: https://gitlab.freedesktop.org/drm/intel/issues/848
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191218094057.3510459-1-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r-- | drivers/gpu/drm/i915/i915_globals.c | 53 |
1 files changed, 44 insertions, 9 deletions
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index be127cd28931..3aa213684293 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -20,7 +20,10 @@ static LIST_HEAD(globals); static atomic_t active; static atomic_t epoch; static struct park_work { - struct rcu_work work; + struct delayed_work work; + struct rcu_head rcu; + unsigned long flags; +#define PENDING 0 int epoch; } park; @@ -37,11 +40,33 @@ static void i915_globals_shrink(void) global->shrink(); } +static void __i915_globals_grace(struct rcu_head *rcu) +{ + /* Ratelimit parking as shrinking is quite slow */ + schedule_delayed_work(&park.work, round_jiffies_up_relative(2 * HZ)); +} + +static void __i915_globals_queue_rcu(void) +{ + park.epoch = atomic_inc_return(&epoch); + if (!atomic_read(&active)) { + init_rcu_head(&park.rcu); + call_rcu(&park.rcu, __i915_globals_grace); + } +} + static void __i915_globals_park(struct work_struct *work) { + destroy_rcu_head(&park.rcu); + /* Confirm nothing woke up in the last grace period */ - if (park.epoch == atomic_read(&epoch)) - i915_globals_shrink(); + if (park.epoch != atomic_read(&epoch)) { + __i915_globals_queue_rcu(); + return; + } + + clear_bit(PENDING, &park.flags); + i915_globals_shrink(); } void __init i915_global_register(struct i915_global *global) @@ -85,7 +110,7 @@ int __init i915_globals_init(void) } } - INIT_RCU_WORK(&park.work, __i915_globals_park); + INIT_DELAYED_WORK(&park.work, __i915_globals_park); return 0; } @@ -103,8 +128,9 @@ void i915_globals_park(void) if (!atomic_dec_and_test(&active)) return; - park.epoch = atomic_inc_return(&epoch); - queue_rcu_work(system_wq, &park.work); + /* Queue cleanup after the next RCU grace period has freed slabs */ + if (!test_and_set_bit(PENDING, &park.flags)) + __i915_globals_queue_rcu(); } void i915_globals_unpark(void) @@ -113,12 +139,21 @@ void i915_globals_unpark(void) atomic_inc(&active); } +static void __exit __i915_globals_flush(void) +{ + atomic_inc(&active); /* skip shrinking */ + + rcu_barrier(); /* wait for the work to be queued */ + flush_delayed_work(&park.work); + + atomic_dec(&active); +} + void __exit i915_globals_exit(void) { - /* Flush any residual park_work */ - atomic_inc(&epoch); - flush_rcu_work(&park.work); + GEM_BUG_ON(atomic_read(&active)); + __i915_globals_flush(); __i915_globals_cleanup(); /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ |