drm/i915/execlists: Move reset_active() from schedule-out to schedule-in

The gem_ctx_persistence/smoketest was detecting an odd coherency issue inside the LRC context image; that the address of the ring buffer did not match our associated struct intel_ring. As we set the address into the context image when we pin the ring buffer into place before the context is active, that leaves the question of where did it get overwritten. Either the HW context save occurred after our pin which would imply that our idle barriers are broken, or we overwrote the context image ourselves. It is only in reset_active() where we dabble inside the context image outside of a serialised path from schedule-out; but we could equally perform the operation inside schedule-in which is then fully serialised with the context pin -- and remains serialised by the engine pulse with kill_context(). (The only downside, aside from doing more work inside the engine->active.lock, was the plan to merge all the reset paths into doing their context scrubbing on schedule-out needs more thought.) Fixes: d12acee84f ("drm/i915/execlists: Cancel banned contexts on schedule-out") Testcase: igt/gem_ctx_persistence/smoketest Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191111133205.11590-3-chris@chris-wilson.co.uk (cherry picked from commit 31b61f0ef9) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2024-11-18 17:54:13 +08:00 · 2019-11-11 13:32:05 +00:00 · 2019-11-11 13:32:05 +00:00 · 98ae6fb3f1
commit 98ae6fb3f1
parent cee7fb437e
1 changed files with 66 additions and 60 deletions
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@ -990,63 +990,6 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 }

-static inline struct intel_engine_cs *
-__execlists_schedule_in(struct i915_request *rq)
-{
-	struct intel_engine_cs * const engine = rq->engine;
-	struct intel_context * const ce = rq->hw_context;
-
-	intel_context_get(ce);
-
-	if (ce->tag) {
-		/* Use a fixed tag for OA and friends */
-		ce->lrc_desc |= (u64)ce->tag << 32;
-	} else {
-		/* We don't need a strict matching tag, just different values */
-		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
-		ce->lrc_desc |=
-			(u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
-			GEN11_SW_CTX_ID_SHIFT;
-		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
-	}
-
-	intel_gt_pm_get(engine->gt);
-	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-	intel_engine_context_in(engine);
-
-	return engine;
-}
-
-static inline struct i915_request *
-execlists_schedule_in(struct i915_request *rq, int idx)
-{
-	struct intel_context * const ce = rq->hw_context;
-	struct intel_engine_cs *old;
-
-	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
-	trace_i915_request_in(rq, idx);
-
-	old = READ_ONCE(ce->inflight);
-	do {
-		if (!old) {
-			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
-			break;
-		}
-	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
-
-	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
-	return i915_request_get(rq);
-}
-
-static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
-{
-	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
-	struct i915_request *next = READ_ONCE(ve->request);
-
-	if (next && next->execution_mask & ~rq->execution_mask)
-		tasklet_schedule(&ve->base.execlists.tasklet);
-}
-
 static void restore_default_state(struct intel_context *ce,
 				  struct intel_engine_cs *engine)
 {
@ -1100,19 +1043,82 @@ static void reset_active(struct i915_request *rq,
 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 }

+static inline struct intel_engine_cs *
+__execlists_schedule_in(struct i915_request *rq)
+{
+	struct intel_engine_cs * const engine = rq->engine;
+	struct intel_context * const ce = rq->hw_context;
+
+	intel_context_get(ce);
+
+	if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+		reset_active(rq, engine);
+
+	if (ce->tag) {
+		/* Use a fixed tag for OA and friends */
+		ce->lrc_desc |= (u64)ce->tag << 32;
+	} else {
+		/* We don't need a strict matching tag, just different values */
+		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
+		ce->lrc_desc |=
+			(u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
+			GEN11_SW_CTX_ID_SHIFT;
+		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+	}
+
+	intel_gt_pm_get(engine->gt);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+	intel_engine_context_in(engine);
+
+	return engine;
+}
+
+static inline struct i915_request *
+execlists_schedule_in(struct i915_request *rq, int idx)
+{
+	struct intel_context * const ce = rq->hw_context;
+	struct intel_engine_cs *old;
+
+	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
+	trace_i915_request_in(rq, idx);
+
+	old = READ_ONCE(ce->inflight);
+	do {
+		if (!old) {
+			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
+			break;
+		}
+	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
+
+	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+	return i915_request_get(rq);
+}
+
+static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	struct i915_request *next = READ_ONCE(ve->request);
+
+	if (next && next->execution_mask & ~rq->execution_mask)
+		tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
 static inline void
 __execlists_schedule_out(struct i915_request *rq,
 			 struct intel_engine_cs * const engine)
 {
 	struct intel_context * const ce = rq->hw_context;

+	/*
+	 * NB process_csb() is not under the engine->active.lock and hence
+	 * schedule_out can race with schedule_in meaning that we should
+	 * refrain from doing non-trivial work here.
+	 */
+
 	intel_engine_context_out(engine);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	intel_gt_pm_put(engine->gt);

-	if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
-		reset_active(rq, engine);
-
 	/*
 	 * If this is part of a virtual engine, its next request may
 	 * have been blocked waiting for access to the active context.