summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robdclark@gmail.com>2015-06-07 20:46:04 +0300
committerRob Clark <robdclark@gmail.com>2015-06-11 20:11:06 +0300
commit1a370be9ac51129e40b0ed7fa71d2b2b92bc47e5 (patch)
tree678bad05754937920c7d09fb66dd5a356ed9f172
parent56c2da8338d5cdfc0695eeed96ebe03cf2ac0321 (diff)
downloadlinux-1a370be9ac51129e40b0ed7fa71d2b2b92bc47e5.tar.xz
drm/msm: restart queued submits after hang
Track the list of in-flight submits. If the gpu hangs, retire up to an including the offending submit, and then re-submit the remainder. This way, for concurrently running piglit tests (for example), one failing test doesn't cause unrelated tests to fail simply because it's submit was queued up after one that triggered a hang. Signed-off-by: Rob Clark <robdclark@gmail.com>
-rw-r--r--drivers/gpu/drm/msm/msm_gem.h1
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c1
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c49
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h2
4 files changed, 49 insertions, 4 deletions
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 85d481e29276..6fc59bfeedeb 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -96,6 +96,7 @@ static inline uint32_t msm_gem_fence(struct msm_gem_object *msm_obj,
struct msm_gem_submit {
struct drm_device *dev;
struct msm_gpu *gpu;
+ struct list_head node; /* node in gpu submit_list */
struct list_head bo_list;
struct ww_acquire_ctx ticket;
uint32_t fence;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index cd0554f68316..6d7cd3fe21e7 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -314,7 +314,6 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool fail)
}
ww_acquire_fini(&submit->ticket);
- kfree(submit);
}
int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 4016aef56c50..8f70d9248ac5 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -265,6 +265,8 @@ static void inactive_start(struct msm_gpu *gpu)
* Hangcheck detection for locked gpu:
*/
+static void retire_submits(struct msm_gpu *gpu, uint32_t fence);
+
static void recover_worker(struct work_struct *work)
{
struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
@@ -274,8 +276,19 @@ static void recover_worker(struct work_struct *work)
mutex_lock(&dev->struct_mutex);
if (msm_gpu_active(gpu)) {
+ struct msm_gem_submit *submit;
+ uint32_t fence = gpu->funcs->last_fence(gpu);
+
+ /* retire completed submits, plus the one that hung: */
+ retire_submits(gpu, fence + 1);
+
inactive_cancel(gpu);
gpu->funcs->recover(gpu);
+
+ /* replay the remaining submits after the one that hung: */
+ list_for_each_entry(submit, &gpu->submit_list, node) {
+ gpu->funcs->submit(gpu, submit, NULL);
+ }
}
mutex_unlock(&dev->struct_mutex);
@@ -418,6 +431,27 @@ out:
* Cmdstream submission/retirement:
*/
+static void retire_submits(struct msm_gpu *gpu, uint32_t fence)
+{
+ struct drm_device *dev = gpu->dev;
+
+ WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+ while (!list_empty(&gpu->submit_list)) {
+ struct msm_gem_submit *submit;
+
+ submit = list_first_entry(&gpu->submit_list,
+ struct msm_gem_submit, node);
+
+ if (submit->fence <= fence) {
+ list_del(&submit->node);
+ kfree(submit);
+ } else {
+ break;
+ }
+ }
+}
+
static void retire_worker(struct work_struct *work)
{
struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
@@ -428,6 +462,8 @@ static void retire_worker(struct work_struct *work)
mutex_lock(&dev->struct_mutex);
+ retire_submits(gpu, fence);
+
while (!list_empty(&gpu->active_list)) {
struct msm_gem_object *obj;
@@ -467,21 +503,22 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_drm_private *priv = dev->dev_private;
int i, ret;
+ WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
submit->fence = ++priv->next_fence;
gpu->submitted_fence = submit->fence;
inactive_cancel(gpu);
+ list_add_tail(&submit->node, &gpu->submit_list);
+
msm_rd_dump_submit(submit);
gpu->submitted_fence = submit->fence;
update_sw_cntrs(gpu);
- ret = gpu->funcs->submit(gpu, submit, ctx);
- priv->lastctx = ctx;
-
for (i = 0; i < submit->nr_bos; i++) {
struct msm_gem_object *msm_obj = submit->bos[i].obj;
@@ -505,6 +542,10 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
}
+
+ ret = gpu->funcs->submit(gpu, submit, ctx);
+ priv->lastctx = ctx;
+
hangcheck_timer_reset(gpu);
return ret;
@@ -545,6 +586,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
INIT_WORK(&gpu->inactive_work, inactive_worker);
INIT_WORK(&gpu->recover_work, recover_worker);
+ INIT_LIST_HEAD(&gpu->submit_list);
+
setup_timer(&gpu->inactive_timer, inactive_handler,
(unsigned long)gpu);
setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 7b3ec21d5d77..2bbe85a3d6f6 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -119,6 +119,8 @@ struct msm_gpu {
struct timer_list hangcheck_timer;
uint32_t hangcheck_fence;
struct work_struct recover_work;
+
+ struct list_head submit_list;
};
static inline bool msm_gpu_active(struct msm_gpu *gpu)