drm/i915/tgl: Extend MI_SEMAPHORE_WAIT

On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
update the length field and emit that extra parameter and any padding
noop as required.

v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT
v3: Use int instead of bool in the addition so that readers are not left
wondering about the intricacies of the C spec. Now they just have to
worry what the integer value of a boolean operation is...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190917123055.28965-1-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson 2019-09-17 13:30:55 +01:00
parent 80fa64d620
commit c210e85b8f
4 changed files with 84 additions and 12 deletions

View File

@ -112,6 +112,7 @@
#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ #define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */
#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */
#define MI_SEMAPHORE_POLL (1 << 15) #define MI_SEMAPHORE_POLL (1 << 15)
#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
@ -119,6 +120,8 @@
#define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) #define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12)
#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) #define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12)
#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
#define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5)
#define MI_SEMAPHORE_TOKEN_SHIFT 5
#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2)
#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */

View File

@ -2879,6 +2879,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
return gen8_emit_fini_breadcrumb_footer(request, cs); return gen8_emit_fini_breadcrumb_footer(request, cs);
} }
static u32 *
gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
request->timeline->hwsp_offset,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
return gen8_emit_fini_breadcrumb_footer(request, cs);
}
/* /*
* Note that the CS instruction pre-parser will not stall on the breadcrumb * Note that the CS instruction pre-parser will not stall on the breadcrumb
* flush and will continue pre-fetching the instructions after it before the * flush and will continue pre-fetching the instructions after it before the
@ -2897,8 +2913,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
* All the above applies only to the instructions themselves. Non-inline data * All the above applies only to the instructions themselves. Non-inline data
* used by the instructions is not pre-fetched. * used by the instructions is not pre-fetched.
*/ */
static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
u32 *cs) static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
{
*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
*cs++ = intel_hws_preempt_address(request->engine);
*cs++ = 0;
*cs++ = 0;
*cs++ = MI_NOOP;
return cs;
}
static __always_inline u32*
gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
{
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
if (intel_engine_has_semaphores(request->engine))
cs = gen12_emit_preempt_busywait(request, cs);
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
return gen8_emit_wa_tail(request, cs);
}
static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write(cs,
request->fence.seqno,
request->timeline->hwsp_offset,
0);
return gen12_emit_fini_breadcrumb_footer(request, cs);
}
static u32 *
gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{ {
cs = gen8_emit_ggtt_write_rcs(cs, cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno, request->fence.seqno,
@ -2910,7 +2967,7 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE); PIPE_CONTROL_FLUSH_ENABLE);
return gen8_emit_fini_breadcrumb_footer(request, cs); return gen12_emit_fini_breadcrumb_footer(request, cs);
} }
static void execlists_park(struct intel_engine_cs *engine) static void execlists_park(struct intel_engine_cs *engine)
@ -2939,9 +2996,6 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_HAS_PREEMPTION; engine->flags |= I915_ENGINE_HAS_PREEMPTION;
} }
if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */
engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES;
if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12) if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12)
engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
} }
@ -2971,6 +3025,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_flush = gen8_emit_flush; engine->emit_flush = gen8_emit_flush;
engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
if (INTEL_GEN(engine->i915) >= 12)
engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
engine->set_default_submission = intel_execlists_set_default_submission; engine->set_default_submission = intel_execlists_set_default_submission;
@ -3016,6 +3072,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
{ {
switch (INTEL_GEN(engine->i915)) { switch (INTEL_GEN(engine->i915)) {
case 12: case 12:
engine->emit_flush = gen11_emit_flush_render;
engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
break;
case 11: case 11:
engine->emit_flush = gen11_emit_flush_render; engine->emit_flush = gen11_emit_flush_render;
engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;

View File

@ -797,7 +797,6 @@ static const struct intel_device_info intel_tigerlake_12_info = {
.display.has_modular_fia = 1, .display.has_modular_fia = 1,
.engine_mask = .engine_mask =
BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
.has_logical_ring_preemption = false, /* XXX disabled for debugging */
.engine_mask = BIT(RCS0), /* XXX reduced for debugging */ .engine_mask = BIT(RCS0), /* XXX reduced for debugging */
}; };

View File

@ -783,7 +783,9 @@ emit_semaphore_wait(struct i915_request *to,
struct i915_request *from, struct i915_request *from,
gfp_t gfp) gfp_t gfp)
{ {
const int has_token = INTEL_GEN(to->i915) >= 12;
u32 hwsp_offset; u32 hwsp_offset;
int len;
u32 *cs; u32 *cs;
int err; int err;
@ -810,7 +812,11 @@ emit_semaphore_wait(struct i915_request *to,
if (err) if (err)
return err; return err;
cs = intel_ring_begin(to, 4); len = 4;
if (has_token)
len += 2;
cs = intel_ring_begin(to, len);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -822,13 +828,18 @@ emit_semaphore_wait(struct i915_request *to,
* (post-wrap) values than they were expecting (and so wait * (post-wrap) values than they were expecting (and so wait
* forever). * forever).
*/ */
*cs++ = MI_SEMAPHORE_WAIT | *cs++ = (MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT | MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL | MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_GTE_SDD; MI_SEMAPHORE_SAD_GTE_SDD) +
has_token;
*cs++ = from->fence.seqno; *cs++ = from->fence.seqno;
*cs++ = hwsp_offset; *cs++ = hwsp_offset;
*cs++ = 0; *cs++ = 0;
if (has_token) {
*cs++ = 0;
*cs++ = MI_NOOP;
}
intel_ring_advance(to, cs); intel_ring_advance(to, cs);
to->sched.semaphores |= from->engine->mask; to->sched.semaphores |= from->engine->mask;