mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-27 14:14:24 +08:00
drm/xe/oa: Add OAR support
Add OAR support to allow userspace to execute MI_REPORT_PERF_COUNT on render engines. Configuration batches are used to program the OAR unit, as well as modifying the render engine context image of a specified exec queue (to have correct register values when that context switches in). v2: Rename/refactor xe_oa_modify_self (Umesh) v3: Move IS_MI_LRI_CMD() into xe_oa.c (Michal) Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-11-ashutosh.dixit@intel.com
This commit is contained in:
parent
efb315d0a0
commit
2f4a730fcd
@ -45,6 +45,7 @@
|
||||
#define MI_LRI_MMIO_REMAP_EN REG_BIT(17)
|
||||
#define MI_LRI_NUM_REGS(x) XE_INSTR_NUM_DW(2 * (x) + 1)
|
||||
#define MI_LRI_FORCE_POSTED REG_BIT(12)
|
||||
#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
|
||||
|
||||
#define MI_FLUSH_DW __MI_INSTR(0x26)
|
||||
#define MI_FLUSH_DW_STORE_INDEX REG_BIT(21)
|
||||
|
@ -129,6 +129,7 @@
|
||||
#define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4)
|
||||
|
||||
#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
|
||||
#define CTX_CTRL_OAC_CONTEXT_ENABLE REG_BIT(8)
|
||||
#define CTX_CTRL_INDIRECT_RING_STATE_ENABLE REG_BIT(4)
|
||||
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
|
||||
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0)
|
||||
|
@ -651,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
|
||||
|
||||
/* Make the magic macros work */
|
||||
#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
|
||||
#define __xe_lrc_regs_offset xe_lrc_regs_offset
|
||||
|
||||
#define LRC_SEQNO_PPHWSP_OFFSET 512
|
||||
#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
|
||||
@ -658,6 +659,11 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
|
||||
#define LRC_PARALLEL_PPHWSP_OFFSET 2048
|
||||
#define LRC_PPHWSP_SIZE SZ_4K
|
||||
|
||||
u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
|
||||
{
|
||||
return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
|
||||
}
|
||||
|
||||
static size_t lrc_reg_size(struct xe_device *xe)
|
||||
{
|
||||
if (GRAPHICS_VERx100(xe) >= 1250)
|
||||
@ -695,11 +701,6 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
|
||||
return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
|
||||
}
|
||||
|
||||
static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
|
||||
{
|
||||
return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
|
||||
}
|
||||
|
||||
static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
|
||||
{
|
||||
return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
|
||||
|
@ -52,6 +52,7 @@ static inline void xe_lrc_put(struct xe_lrc *lrc)
|
||||
|
||||
size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class);
|
||||
u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
|
||||
u32 xe_lrc_regs_offset(struct xe_lrc *lrc);
|
||||
|
||||
void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail);
|
||||
u32 xe_lrc_ring_tail(struct xe_lrc *lrc);
|
||||
|
@ -13,7 +13,9 @@
|
||||
#include <drm/xe_drm.h>
|
||||
|
||||
#include "instructions/xe_mi_commands.h"
|
||||
#include "regs/xe_engine_regs.h"
|
||||
#include "regs/xe_gt_regs.h"
|
||||
#include "regs/xe_lrc_layout.h"
|
||||
#include "regs/xe_oa_regs.h"
|
||||
#include "xe_assert.h"
|
||||
#include "xe_bb.h"
|
||||
@ -24,6 +26,7 @@
|
||||
#include "xe_gt.h"
|
||||
#include "xe_gt_mcr.h"
|
||||
#include "xe_gt_printk.h"
|
||||
#include "xe_lrc.h"
|
||||
#include "xe_macros.h"
|
||||
#include "xe_mmio.h"
|
||||
#include "xe_oa.h"
|
||||
@ -58,6 +61,12 @@ struct xe_oa_config {
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
struct flex {
|
||||
struct xe_reg reg;
|
||||
u32 offset;
|
||||
u32 value;
|
||||
};
|
||||
|
||||
struct xe_oa_open_param {
|
||||
u32 oa_unit_id;
|
||||
bool sample;
|
||||
@ -598,6 +607,93 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
|
||||
free_oa_config_bo(oa_bo);
|
||||
}
|
||||
|
||||
static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
|
||||
struct xe_bb *bb, const struct flex *flex, u32 count)
|
||||
{
|
||||
u32 offset = xe_bo_ggtt_addr(lrc->bo);
|
||||
|
||||
do {
|
||||
bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2;
|
||||
bb->cs[bb->len++] = offset + flex->offset * sizeof(u32);
|
||||
bb->cs[bb->len++] = 0;
|
||||
bb->cs[bb->len++] = flex->value;
|
||||
|
||||
} while (flex++, --count);
|
||||
}
|
||||
|
||||
static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
|
||||
const struct flex *flex, u32 count)
|
||||
{
|
||||
struct xe_bb *bb;
|
||||
int err;
|
||||
|
||||
bb = xe_bb_new(stream->gt, 4 * count, false);
|
||||
if (IS_ERR(bb)) {
|
||||
err = PTR_ERR(bb);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
xe_oa_store_flex(stream, lrc, bb, flex, count);
|
||||
|
||||
err = xe_oa_submit_bb(stream, bb);
|
||||
xe_bb_free(bb, NULL);
|
||||
exit:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
|
||||
{
|
||||
struct xe_bb *bb;
|
||||
int err;
|
||||
|
||||
bb = xe_bb_new(stream->gt, 3, false);
|
||||
if (IS_ERR(bb)) {
|
||||
err = PTR_ERR(bb);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
write_cs_mi_lri(bb, reg_lri, 1);
|
||||
|
||||
err = xe_oa_submit_bb(stream, bb);
|
||||
xe_bb_free(bb, NULL);
|
||||
exit:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable)
|
||||
{
|
||||
const struct xe_oa_format *format = stream->oa_buffer.format;
|
||||
struct xe_lrc *lrc = stream->exec_q->lrc[0];
|
||||
u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
|
||||
u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
|
||||
(enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
|
||||
|
||||
struct flex regs_context[] = {
|
||||
{
|
||||
OACTXCONTROL(stream->hwe->mmio_base),
|
||||
stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
|
||||
enable ? OA_COUNTER_RESUME : 0,
|
||||
},
|
||||
{
|
||||
RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
|
||||
regs_offset + CTX_CONTEXT_CONTROL,
|
||||
_MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
|
||||
enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0)
|
||||
},
|
||||
};
|
||||
struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol };
|
||||
int err;
|
||||
|
||||
/* Modify stream hwe context image with regs_context */
|
||||
err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
|
||||
regs_context, ARRAY_SIZE(regs_context));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* Apply reg_lri using LRI */
|
||||
return xe_oa_load_with_lri(stream, ®_lri);
|
||||
}
|
||||
|
||||
#define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255)
|
||||
|
||||
static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
|
||||
@ -615,6 +711,10 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
|
||||
_MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
|
||||
}
|
||||
|
||||
/* disable the context save/restore or OAR counters */
|
||||
if (stream->exec_q)
|
||||
xe_oa_configure_oar_context(stream, false);
|
||||
|
||||
/* Make sure we disable noa to save power. */
|
||||
xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0);
|
||||
|
||||
@ -743,6 +843,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
|
||||
static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
|
||||
{
|
||||
u32 oa_debug, sqcnt1;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Wa_1508761755:xehpsdv, dg2
|
||||
@ -780,6 +881,12 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
|
||||
|
||||
xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1);
|
||||
|
||||
if (stream->exec_q) {
|
||||
ret = xe_oa_configure_oar_context(stream, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return xe_oa_emit_oa_config(stream);
|
||||
}
|
||||
|
||||
@ -949,6 +1056,81 @@ static const struct file_operations xe_oa_fops = {
|
||||
.unlocked_ioctl = xe_oa_ioctl,
|
||||
};
|
||||
|
||||
static bool engine_supports_mi_query(struct xe_hw_engine *hwe)
|
||||
{
|
||||
return hwe->class == XE_ENGINE_CLASS_RENDER ||
|
||||
hwe->class == XE_ENGINE_CLASS_COMPUTE;
|
||||
}
|
||||
|
||||
static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
|
||||
{
|
||||
u32 idx = *offset;
|
||||
u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
|
||||
bool found = false;
|
||||
|
||||
idx++;
|
||||
for (; idx < len; idx += 2) {
|
||||
if (state[idx] == reg) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*offset = idx;
|
||||
return found;
|
||||
}
|
||||
|
||||
#define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \
|
||||
REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM))
|
||||
|
||||
static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg)
|
||||
{
|
||||
struct xe_lrc *lrc = stream->exec_q->lrc[0];
|
||||
u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) +
|
||||
lrc->ring.size) / sizeof(u32);
|
||||
u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
|
||||
u32 *state = (u32 *)lrc->bo->vmap.vaddr;
|
||||
|
||||
if (drm_WARN_ON(&stream->oa->xe->drm, !state))
|
||||
return U32_MAX;
|
||||
|
||||
for (; offset < len; ) {
|
||||
if (IS_MI_LRI_CMD(state[offset])) {
|
||||
/*
|
||||
* We expect reg-value pairs in MI_LRI command, so
|
||||
* MI_LRI_LEN() should be even
|
||||
*/
|
||||
drm_WARN_ON(&stream->oa->xe->drm,
|
||||
MI_LRI_LEN(state[offset]) & 0x1);
|
||||
|
||||
if (xe_oa_find_reg_in_lri(state, reg, &offset, len))
|
||||
break;
|
||||
} else {
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
return offset < len ? offset : U32_MAX;
|
||||
}
|
||||
|
||||
static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream)
|
||||
{
|
||||
struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base);
|
||||
u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class];
|
||||
|
||||
/* Do this only once. Failure is stored as offset of U32_MAX */
|
||||
if (offset)
|
||||
goto exit;
|
||||
|
||||
offset = xe_oa_context_image_offset(stream, reg.addr);
|
||||
stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset;
|
||||
|
||||
drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n",
|
||||
stream->hwe->name, offset);
|
||||
exit:
|
||||
return offset && offset != U32_MAX ? 0 : -ENODEV;
|
||||
}
|
||||
|
||||
static int xe_oa_stream_init(struct xe_oa_stream *stream,
|
||||
struct xe_oa_open_param *param)
|
||||
{
|
||||
@ -966,6 +1148,17 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
|
||||
stream->periodic = param->period_exponent > 0;
|
||||
stream->period_exponent = param->period_exponent;
|
||||
|
||||
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
|
||||
/* If we don't find the context offset, just return error */
|
||||
ret = xe_oa_set_ctx_ctrl_offset(stream);
|
||||
if (ret) {
|
||||
drm_err(&stream->oa->xe->drm,
|
||||
"xe_oa_set_ctx_ctrl_offset failed for %s\n",
|
||||
stream->hwe->name);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set);
|
||||
if (!stream->oa_config) {
|
||||
drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set);
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include <drm/xe_drm.h>
|
||||
#include "regs/xe_reg_defs.h"
|
||||
#include "xe_hw_engine_types.h"
|
||||
|
||||
#define XE_OA_BUFFER_SIZE SZ_16M
|
||||
|
||||
@ -137,6 +138,9 @@ struct xe_oa {
|
||||
/** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */
|
||||
struct idr metrics_idr;
|
||||
|
||||
/** @ctx_oactxctrl_offset: offset of OACTXCONTROL register in context image */
|
||||
u32 ctx_oactxctrl_offset[XE_ENGINE_CLASS_MAX];
|
||||
|
||||
/** @oa_formats: tracks all OA formats across platforms */
|
||||
const struct xe_oa_format *oa_formats;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user