mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-17 07:54:54 +08:00
drm/i915/perf: Configure OAR for specific context
Gen12 supports saving/restoring render counters per context. Apply OAR
configuration only for the context that is passed in to perf.
v2:
- Fix OACTXCONTROL value to only stop/resume counters.
- Remove gen12_update_reg_state_unlocked as power state is already
applied by the caller.
v3: (Lionel)
- Move register initialization into the array
- Assume a valid oa_config in enable_metric_set
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Fixes: 00a7f0d715
("drm/i915/tgl: Add perf support on TGL")
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191206194339.31356-2-umesh.nerlige.ramappa@intel.com
This commit is contained in:
parent
322d56aa31
commit
ccdeed4970
@ -2082,20 +2082,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
|
||||
u32 *reg_state = ce->lrc_reg_state;
|
||||
int i;
|
||||
|
||||
if (IS_GEN(stream->perf->i915, 12)) {
|
||||
u32 format = stream->oa_buffer.format;
|
||||
|
||||
reg_state[ctx_oactxctrl + 1] =
|
||||
(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
|
||||
(stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
|
||||
} else {
|
||||
reg_state[ctx_oactxctrl + 1] =
|
||||
(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
|
||||
(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
|
||||
GEN8_OA_COUNTER_RESUME;
|
||||
}
|
||||
|
||||
for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
|
||||
for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
|
||||
reg_state[ctx_flexeu0 + i * 2 + 1] =
|
||||
oa_config_flex_reg(stream->oa_config, flex_regs[i]);
|
||||
|
||||
@ -2228,34 +2220,51 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
|
||||
static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
|
||||
{
|
||||
struct i915_request *rq;
|
||||
u32 *cs;
|
||||
int err = 0;
|
||||
|
||||
rq = i915_request_create(ce);
|
||||
if (IS_ERR(rq))
|
||||
return PTR_ERR(rq);
|
||||
|
||||
cs = intel_ring_begin(rq, 4);
|
||||
if (IS_ERR(cs)) {
|
||||
err = PTR_ERR(cs);
|
||||
goto out;
|
||||
}
|
||||
|
||||
*cs++ = MI_LOAD_REGISTER_IMM(1);
|
||||
*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
|
||||
*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
|
||||
enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
|
||||
*cs++ = MI_NOOP;
|
||||
|
||||
intel_ring_advance(rq, cs);
|
||||
|
||||
out:
|
||||
i915_request_add(rq);
|
||||
int err;
|
||||
struct intel_context *ce = stream->pinned_ctx;
|
||||
u32 format = stream->oa_buffer.format;
|
||||
struct flex regs_context[] = {
|
||||
{
|
||||
GEN8_OACTXCONTROL,
|
||||
stream->perf->ctx_oactxctrl_offset + 1,
|
||||
enable ? GEN8_OA_COUNTER_RESUME : 0,
|
||||
},
|
||||
};
|
||||
/* Offsets in regs_lri are not used since this configuration is only
|
||||
* applied using LRI. Initialize the correct offsets for posterity.
|
||||
*/
|
||||
#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
|
||||
struct flex regs_lri[] = {
|
||||
{
|
||||
GEN12_OAR_OACONTROL,
|
||||
GEN12_OAR_OACONTROL_OFFSET + 1,
|
||||
(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
|
||||
(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
|
||||
},
|
||||
{
|
||||
RING_CONTEXT_CONTROL(ce->engine->mmio_base),
|
||||
CTX_CONTEXT_CONTROL,
|
||||
_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
|
||||
enable ?
|
||||
GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
|
||||
0)
|
||||
},
|
||||
};
|
||||
|
||||
/* Modify the context image of pinned context with regs_context*/
|
||||
err = intel_context_lock_pinned(ce);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
|
||||
intel_context_unlock_pinned(ce);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* Apply regs_lri using LRI with pinned context */
|
||||
return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2281,53 +2290,16 @@ out:
|
||||
* per-context OA state.
|
||||
*
|
||||
* Note: it's only the RCS/Render context that has any OA state.
|
||||
* Note: the first flex register passed must always be R_PWR_CLK_STATE
|
||||
*/
|
||||
static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
|
||||
const struct i915_oa_config *oa_config)
|
||||
static int oa_configure_all_contexts(struct i915_perf_stream *stream,
|
||||
struct flex *regs,
|
||||
size_t num_regs)
|
||||
{
|
||||
struct drm_i915_private *i915 = stream->perf->i915;
|
||||
/* The MMIO offsets for Flex EU registers aren't contiguous */
|
||||
const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
|
||||
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
|
||||
struct flex regs[] = {
|
||||
{
|
||||
GEN8_R_PWR_CLK_STATE,
|
||||
CTX_R_PWR_CLK_STATE,
|
||||
},
|
||||
{
|
||||
IS_GEN(i915, 12) ?
|
||||
GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
|
||||
stream->perf->ctx_oactxctrl_offset + 1,
|
||||
},
|
||||
{ EU_PERF_CNTL0, ctx_flexeuN(0) },
|
||||
{ EU_PERF_CNTL1, ctx_flexeuN(1) },
|
||||
{ EU_PERF_CNTL2, ctx_flexeuN(2) },
|
||||
{ EU_PERF_CNTL3, ctx_flexeuN(3) },
|
||||
{ EU_PERF_CNTL4, ctx_flexeuN(4) },
|
||||
{ EU_PERF_CNTL5, ctx_flexeuN(5) },
|
||||
{ EU_PERF_CNTL6, ctx_flexeuN(6) },
|
||||
};
|
||||
#undef ctx_flexeuN
|
||||
struct intel_engine_cs *engine;
|
||||
struct i915_gem_context *ctx, *cn;
|
||||
size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
|
||||
int i, err;
|
||||
|
||||
if (IS_GEN(i915, 12)) {
|
||||
u32 format = stream->oa_buffer.format;
|
||||
|
||||
regs[1].value =
|
||||
(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
|
||||
(oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
|
||||
} else {
|
||||
regs[1].value =
|
||||
(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
|
||||
(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
|
||||
GEN8_OA_COUNTER_RESUME;
|
||||
}
|
||||
|
||||
for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
|
||||
regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
|
||||
int err;
|
||||
|
||||
lockdep_assert_held(&stream->perf->lock);
|
||||
|
||||
@ -2357,7 +2329,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
|
||||
|
||||
spin_unlock(&i915->gem.contexts.lock);
|
||||
|
||||
err = gen8_configure_context(ctx, regs, array_size);
|
||||
err = gen8_configure_context(ctx, regs, num_regs);
|
||||
if (err) {
|
||||
i915_gem_context_put(ctx);
|
||||
return err;
|
||||
@ -2382,7 +2354,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
|
||||
|
||||
regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
|
||||
|
||||
err = gen8_modify_self(ce, regs, array_size);
|
||||
err = gen8_modify_self(ce, regs, num_regs);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
@ -2390,6 +2362,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
|
||||
const struct i915_oa_config *oa_config)
|
||||
{
|
||||
struct flex regs[] = {
|
||||
{
|
||||
GEN8_R_PWR_CLK_STATE,
|
||||
CTX_R_PWR_CLK_STATE,
|
||||
},
|
||||
};
|
||||
|
||||
return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
|
||||
}
|
||||
|
||||
static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
|
||||
const struct i915_oa_config *oa_config)
|
||||
{
|
||||
/* The MMIO offsets for Flex EU registers aren't contiguous */
|
||||
const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
|
||||
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
|
||||
struct flex regs[] = {
|
||||
{
|
||||
GEN8_R_PWR_CLK_STATE,
|
||||
CTX_R_PWR_CLK_STATE,
|
||||
},
|
||||
{
|
||||
GEN8_OACTXCONTROL,
|
||||
stream->perf->ctx_oactxctrl_offset + 1,
|
||||
},
|
||||
{ EU_PERF_CNTL0, ctx_flexeuN(0) },
|
||||
{ EU_PERF_CNTL1, ctx_flexeuN(1) },
|
||||
{ EU_PERF_CNTL2, ctx_flexeuN(2) },
|
||||
{ EU_PERF_CNTL3, ctx_flexeuN(3) },
|
||||
{ EU_PERF_CNTL4, ctx_flexeuN(4) },
|
||||
{ EU_PERF_CNTL5, ctx_flexeuN(5) },
|
||||
{ EU_PERF_CNTL6, ctx_flexeuN(6) },
|
||||
};
|
||||
#undef ctx_flexeuN
|
||||
int i;
|
||||
|
||||
regs[1].value =
|
||||
(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
|
||||
(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
|
||||
GEN8_OA_COUNTER_RESUME;
|
||||
|
||||
for (i = 2; i < ARRAY_SIZE(regs); i++)
|
||||
regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
|
||||
|
||||
return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
|
||||
}
|
||||
|
||||
static int gen8_enable_metric_set(struct i915_perf_stream *stream)
|
||||
{
|
||||
struct intel_uncore *uncore = stream->uncore;
|
||||
@ -2473,7 +2495,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
|
||||
* to make sure all slices/subslices are ON before writing to NOA
|
||||
* registers.
|
||||
*/
|
||||
ret = lrc_configure_all_contexts(stream, oa_config);
|
||||
ret = gen12_configure_all_contexts(stream, oa_config);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -2483,8 +2505,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
|
||||
* requested this.
|
||||
*/
|
||||
if (stream->ctx) {
|
||||
ret = gen12_emit_oar_config(stream->pinned_ctx,
|
||||
oa_config != NULL);
|
||||
ret = gen12_configure_oar_context(stream, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -2518,11 +2539,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
|
||||
struct intel_uncore *uncore = stream->uncore;
|
||||
|
||||
/* Reset all contexts' slices/subslices configurations. */
|
||||
lrc_configure_all_contexts(stream, NULL);
|
||||
gen12_configure_all_contexts(stream, NULL);
|
||||
|
||||
/* disable the context save/restore or OAR counters */
|
||||
if (stream->ctx)
|
||||
gen12_emit_oar_config(stream->pinned_ctx, false);
|
||||
gen12_configure_oar_context(stream, false);
|
||||
|
||||
/* Make sure we disable noa to save power. */
|
||||
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
|
||||
@ -2864,7 +2885,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
|
||||
return;
|
||||
|
||||
stream = engine->i915->perf.exclusive_stream;
|
||||
if (stream)
|
||||
/*
|
||||
* For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
|
||||
* is already doing that, so nothing to be done for gen12 here.
|
||||
*/
|
||||
if (stream && INTEL_GEN(stream->perf->i915) < 12)
|
||||
gen8_update_reg_state_unlocked(ce, stream);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user