drm/xe/ct: prevent UAF in send_recv()

commit db7f92af626178ba59dbbcdd5dee9ec24a987a88 upstream. Ensure we serialize with completion side to prevent UAF with fence going out of scope on the stack, since we have no clue if it will fire after the timeout before we can erase from the xa. Also we have some dependent loads and stores for which we need the correct ordering, and we lack the needed barriers. Fix this by grabbing the ct->lock after the wait, which is also held by the completion side. v2 (Badal): - Also print done after acquiring the lock and seeing timeout. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Badal Nilawar <badal.nilawar@intel.com> Cc: <stable@vger.kernel.org> # v6.8+ Reviewed-by: Badal Nilawar <badal.nilawar@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20241001084346.98516-5-matthew.auld@intel.com (cherry picked from commit 52789ce35c55ccd30c4b67b9cc5b2af55e0122ea) Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Matthew Auld <matthew.auld@intel.com> 2024-10-01 11:43:47 +0300
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2024-10-17 16:26:57 +0300
commit: 8ed7dd4c55e4fb21531a9645aeb66a30eaf43a46 (patch)
tree: d245be15c40daa59ce972809d96971e6f01d2433
parent: ea15e5072f997f7b7a0776dab86b64269148d740 (diff)
download: linux-8ed7dd4c55e4fb21531a9645aeb66a30eaf43a46.tar.xz
1 files changed, 18 insertions, 3 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 64afc90ad2c5..ef5059db8aaa 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -894,16 +894,26 @@ retry_same_fence:
 	}
 
 	ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+
+	/*
+	 * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+	 * the stack, since we have no clue if it will fire after the timeout before we can erase
+	 * from the xa. Also we have some dependent loads and stores below for which we need the
+	 * correct ordering, and we lack the needed barriers.
+	 */
+	mutex_lock(&ct->lock);
 	if (!ret) {
-		xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x",
-			  g2h_fence.seqno, action[0]);
+		xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+			  g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
 		xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+		mutex_unlock(&ct->lock);
 		return -ETIME;
 	}
 
 	if (g2h_fence.retry) {
 		xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
 			  action[0], g2h_fence.reason);
+		mutex_unlock(&ct->lock);
 		goto retry;
 	}
 	if (g2h_fence.fail) {
@@ -912,7 +922,12 @@ retry_same_fence:
 		ret = -EIO;
 	}
 
-	return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
+	if (ret > 0)
+		ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+
+	mutex_unlock(&ct->lock);
+
+	return ret;
 }
 
 /**
author	Matthew Auld <matthew.auld@intel.com>	2024-10-01 11:43:47 +0300
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2024-10-17 16:26:57 +0300
commit	8ed7dd4c55e4fb21531a9645aeb66a30eaf43a46 (patch)
tree	d245be15c40daa59ce972809d96971e6f01d2433
parent	ea15e5072f997f7b7a0776dab86b64269148d740 (diff)
download	linux-8ed7dd4c55e4fb21531a9645aeb66a30eaf43a46.tar.xz