mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-19 18:53:52 +08:00
drm next for 5.19-rc1 (part 2/fixes)
msm: - Limiting WB modes to max sspp linewidth - Fixing the supported rotations to add 180 back for IGT - Fix to handle pm_runtime_get_sync() errors to avoid unclocked access in the bind() path for dpu driver - Fix the irq_free() without request issue which was a big-time hitter in the CI-runs. amdgpu: - Update fdinfo to the common drm format - uapi: Add VM_NOALLOC GPUVM attribute to prevent buffers for going into the MALL Add AMDGPU_GEM_CREATE_DISCARDABLE flag to create buffers that can be discarded on eviction Mesa code which uses these: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16466 - Link training fixes - DPIA fixes - Misc code cleanups - Aux fixes - Hotplug fixes - More FP clean up - Misc GFX9/10 fixes - Fix a possible memory leak in SMU shutdown - SMU 13 updates - RAS fixes - TMZ fixes - GC 11 updates - SMU 11 metrics fixes - Fix coverage blend mode for overlay plane - Note DDR vs LPDDR memory - Fuzz fix for CS IOCTL - Add new PCI DID amdkfd: - Clean up hive setup - Misc fixes tegra: - add some prelim 5.20 work to avoid inter-tree mess -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmKZfhwACgkQDHTzWXnE hr5v1A/+KLUDtz1vB/JGHYnqJYeGicSs48Pe2OtSW2YX6rI2IwaQBnYjXtm8J8NF 47Lw35KjzYLS9NRst9/+zhcyij6s573dibWVTi9NG5O0vYMoww6EvD7ORqPAv1JP WNl1ek9DmkorEtcjJWwWDpMUP34WrN1qCHRT1p6nkcIJ28pp+x4fwt3cPi3fmEmk hH5/pjwymvaWX9m/HBRi/esone/uLVVIDThX769spyY7c1jKG3BKO0/hGrvY/qHs 0QRIG4eroB2b6KEzg6gO4a8NX7PHamBtImT9Nqw+o8pBzYhuez39ITM5sMkCrChj kvCpF+YXwUWyr+4SbkKLGHZqerpgz219kHOh1Z7VOwCSQomIljkmdoKHUJ50QMKk pNzF/U7ng1zgPhTJVj/o3wymeE9y1X0W3p/yLXS1jXt+Nl4AjsBWSZLd0Nn+HKck ZpqwhIfH5vuFjpN+u2F3pVaRCjNM2UXavS6CD1thIOXDQ4gQ+p2rndHL/yWIyVFb HXkQu0PBSKzF6ys4ikvXxsT7xM0EJEa680dEjJuMhhC5LgDzw3A5jLdSfhIr2nal Z+CWW1NKukAarI2rPOUotO8GYsY3bMF3o/02wFRAmDwPwXUWvy/mULv8vSl63ocH 6B/YOitiGKz6tpOqRwBgjkFCTebMwKc7Mbt2JVZKiG73BMKHcFQ= =IC8q -----END PGP SIGNATURE----- Merge tag 'drm-next-2022-06-03-1' of git://anongit.freedesktop.org/drm/drm Pull more drm updates from Dave Airlie: "This is mostly regular fixes, msm and amdgpu. There is a tegra patch that is bit of prep work for a 5.20 feature to avoid some inter-tree syncs, and a couple of late addition amdgpu uAPI changes but best to get those in early, and the userspace pieces are ready. msm: - Limiting WB modes to max sspp linewidth - Fixing the supported rotations to add 180 back for IGT - Fix to handle pm_runtime_get_sync() errors to avoid unclocked access in the bind() path for dpu driver - Fix the irq_free() without request issue which was a big-time hitter in the CI-runs. amdgpu: - Update fdinfo to the common drm format - uapi: - Add VM_NOALLOC GPUVM attribute to prevent buffers for going into the MALL - Add AMDGPU_GEM_CREATE_DISCARDABLE flag to create buffers that can be discarded on eviction - Mesa code which uses these: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16466 - Link training fixes - DPIA fixes - Misc code cleanups - Aux fixes - Hotplug fixes - More FP clean up - Misc GFX9/10 fixes - Fix a possible memory leak in SMU shutdown - SMU 13 updates - RAS fixes - TMZ fixes - GC 11 updates - SMU 11 metrics fixes - Fix coverage blend mode for overlay plane - Note DDR vs LPDDR memory - Fuzz fix for CS IOCTL - Add new PCI DID amdkfd: - Clean up hive setup - Misc fixes tegra: - add some prelim 5.20 work to avoid inter-tree mess" * tag 'drm-next-2022-06-03-1' of git://anongit.freedesktop.org/drm/drm: (57 commits) drm/msm/dpu: Move min BW request and full BW disable back to mdss drm/msm/dpu: Fix pointer dereferenced before checking drm/msm/dpu: Remove unused code drm/msm/disp/dpu1: remove superfluous init drm/msm/dp: Always clear mask bits to disable interrupts at dp_ctrl_reset_irq_ctrl() gpu: host1x: Add context bus drm/amdgpu: add drm-client-id to fdinfo v2 drm/amdgpu: Convert to common fdinfo format v5 drm/amdgpu: bump minor version number drm/amdgpu: add AMDGPU_VM_NOALLOC v2 drm/amdgpu: add AMDGPU_GEM_CREATE_DISCARDABLE drm/amdgpu: add beige goby PCI ID drm/amd/pm: Return auto perf level, if unsupported drm/amdkfd: fix typo in comment drm/amdgpu/gfx: fix typos in comments drm/amdgpu/cs: make commands with 0 chunks illegal behaviour. drm/amdgpu: differentiate between LP and non-LP DDR memory drm/amdgpu: Resolve pcie_bif RAS recovery bug drm/amdgpu: clean up asd on the ta_firmware_header_v2_0 drm/amdgpu/discovery: validate VCN and SDMA instances ...
This commit is contained in:
commit
ab18b7b36a
@ -2,7 +2,6 @@
|
||||
# drm/tegra depends on host1x, so if both drivers are built-in care must be
|
||||
# taken to initialize them in the correct order. Link order is the only way
|
||||
# to ensure this currently.
|
||||
obj-$(CONFIG_TEGRA_HOST1X) += host1x/
|
||||
obj-y += drm/ vga/
|
||||
obj-y += host1x/ drm/ vga/
|
||||
obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/
|
||||
obj-$(CONFIG_TRACE_GPU_MEM) += trace/
|
||||
|
@ -1621,7 +1621,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
||||
|
||||
mutex_lock(&mem->lock);
|
||||
|
||||
/* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */
|
||||
/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
|
||||
if (mem->alloc_flags &
|
||||
(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
|
||||
|
@ -188,13 +188,17 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
|
||||
vram_type = AMDGPU_VRAM_TYPE_DDR3;
|
||||
break;
|
||||
case Ddr4MemType:
|
||||
case LpDdr4MemType:
|
||||
vram_type = AMDGPU_VRAM_TYPE_DDR4;
|
||||
break;
|
||||
case LpDdr4MemType:
|
||||
vram_type = AMDGPU_VRAM_TYPE_LPDDR4;
|
||||
break;
|
||||
case Ddr5MemType:
|
||||
case LpDdr5MemType:
|
||||
vram_type = AMDGPU_VRAM_TYPE_DDR5;
|
||||
break;
|
||||
case LpDdr5MemType:
|
||||
vram_type = AMDGPU_VRAM_TYPE_LPDDR5;
|
||||
break;
|
||||
default:
|
||||
vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
|
||||
break;
|
||||
|
@ -116,7 +116,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
|
||||
int ret;
|
||||
|
||||
if (cs->in.num_chunks == 0)
|
||||
return 0;
|
||||
return -EINVAL;
|
||||
|
||||
chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
|
||||
if (!chunk_array)
|
||||
@ -1252,7 +1252,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||
|
||||
p->fence = dma_fence_get(&job->base.s_fence->finished);
|
||||
|
||||
amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
|
||||
seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence);
|
||||
amdgpu_cs_post_dependencies(p);
|
||||
|
||||
if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
|
||||
|
@ -135,9 +135,9 @@ static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_
|
||||
|
||||
static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
|
||||
{
|
||||
struct amdgpu_device *adev = ctx->adev;
|
||||
int32_t ctx_prio;
|
||||
struct amdgpu_device *adev = ctx->mgr->adev;
|
||||
unsigned int hw_prio;
|
||||
int32_t ctx_prio;
|
||||
|
||||
ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
|
||||
ctx->init_priority : ctx->override_priority;
|
||||
@ -162,17 +162,50 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
|
||||
return hw_prio;
|
||||
}
|
||||
|
||||
/* Calculate the time spend on the hw */
|
||||
static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
|
||||
{
|
||||
struct drm_sched_fence *s_fence;
|
||||
|
||||
if (!fence)
|
||||
return ns_to_ktime(0);
|
||||
|
||||
/* When the fence is not even scheduled it can't have spend time */
|
||||
s_fence = to_drm_sched_fence(fence);
|
||||
if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
|
||||
return ns_to_ktime(0);
|
||||
|
||||
/* When it is still running account how much already spend */
|
||||
if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
|
||||
return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
|
||||
|
||||
return ktime_sub(s_fence->finished.timestamp,
|
||||
s_fence->scheduled.timestamp);
|
||||
}
|
||||
|
||||
static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
|
||||
struct amdgpu_ctx_entity *centity)
|
||||
{
|
||||
ktime_t res = ns_to_ktime(0);
|
||||
uint32_t i;
|
||||
|
||||
spin_lock(&ctx->ring_lock);
|
||||
for (i = 0; i < amdgpu_sched_jobs; i++) {
|
||||
res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
|
||||
}
|
||||
spin_unlock(&ctx->ring_lock);
|
||||
return res;
|
||||
}
|
||||
|
||||
static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
|
||||
const u32 ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ctx->adev;
|
||||
struct amdgpu_ctx_entity *entity;
|
||||
struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
|
||||
unsigned num_scheds = 0;
|
||||
int32_t ctx_prio;
|
||||
unsigned int hw_prio;
|
||||
struct amdgpu_device *adev = ctx->mgr->adev;
|
||||
struct amdgpu_ctx_entity *entity;
|
||||
enum drm_sched_priority drm_prio;
|
||||
unsigned int hw_prio, num_scheds;
|
||||
int32_t ctx_prio;
|
||||
int r;
|
||||
|
||||
entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
|
||||
@ -182,6 +215,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
|
||||
|
||||
ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
|
||||
ctx->init_priority : ctx->override_priority;
|
||||
entity->hw_ip = hw_ip;
|
||||
entity->sequence = 1;
|
||||
hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
|
||||
drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
|
||||
@ -220,10 +254,25 @@ error_free_entity:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int amdgpu_ctx_init(struct amdgpu_device *adev,
|
||||
int32_t priority,
|
||||
struct drm_file *filp,
|
||||
struct amdgpu_ctx *ctx)
|
||||
static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
|
||||
{
|
||||
ktime_t res = ns_to_ktime(0);
|
||||
int i;
|
||||
|
||||
if (!entity)
|
||||
return res;
|
||||
|
||||
for (i = 0; i < amdgpu_sched_jobs; ++i) {
|
||||
res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
|
||||
dma_fence_put(entity->fences[i]);
|
||||
}
|
||||
|
||||
kfree(entity);
|
||||
return res;
|
||||
}
|
||||
|
||||
static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
|
||||
struct drm_file *filp, struct amdgpu_ctx *ctx)
|
||||
{
|
||||
int r;
|
||||
|
||||
@ -233,15 +282,14 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
|
||||
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
|
||||
ctx->adev = adev;
|
||||
|
||||
kref_init(&ctx->refcount);
|
||||
ctx->mgr = mgr;
|
||||
spin_lock_init(&ctx->ring_lock);
|
||||
mutex_init(&ctx->lock);
|
||||
|
||||
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
|
||||
ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
|
||||
ctx->reset_counter_query = ctx->reset_counter;
|
||||
ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
|
||||
ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
|
||||
ctx->init_priority = priority;
|
||||
ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
|
||||
ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
|
||||
@ -249,24 +297,10 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
|
||||
{
|
||||
|
||||
int i;
|
||||
|
||||
if (!entity)
|
||||
return;
|
||||
|
||||
for (i = 0; i < amdgpu_sched_jobs; ++i)
|
||||
dma_fence_put(entity->fences[i]);
|
||||
|
||||
kfree(entity);
|
||||
}
|
||||
|
||||
static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
|
||||
u32 *stable_pstate)
|
||||
{
|
||||
struct amdgpu_device *adev = ctx->adev;
|
||||
struct amdgpu_device *adev = ctx->mgr->adev;
|
||||
enum amd_dpm_forced_level current_level;
|
||||
|
||||
current_level = amdgpu_dpm_get_performance_level(adev);
|
||||
@ -294,7 +328,7 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
|
||||
static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
|
||||
u32 stable_pstate)
|
||||
{
|
||||
struct amdgpu_device *adev = ctx->adev;
|
||||
struct amdgpu_device *adev = ctx->mgr->adev;
|
||||
enum amd_dpm_forced_level level;
|
||||
u32 current_stable_pstate;
|
||||
int r;
|
||||
@ -345,7 +379,8 @@ done:
|
||||
static void amdgpu_ctx_fini(struct kref *ref)
|
||||
{
|
||||
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
|
||||
struct amdgpu_device *adev = ctx->adev;
|
||||
struct amdgpu_ctx_mgr *mgr = ctx->mgr;
|
||||
struct amdgpu_device *adev = mgr->adev;
|
||||
unsigned i, j, idx;
|
||||
|
||||
if (!adev)
|
||||
@ -353,8 +388,10 @@ static void amdgpu_ctx_fini(struct kref *ref)
|
||||
|
||||
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
|
||||
for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
|
||||
amdgpu_ctx_fini_entity(ctx->entities[i][j]);
|
||||
ctx->entities[i][j] = NULL;
|
||||
ktime_t spend;
|
||||
|
||||
spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
|
||||
atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -421,7 +458,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
*id = (uint32_t)r;
|
||||
r = amdgpu_ctx_init(adev, priority, filp, ctx);
|
||||
r = amdgpu_ctx_init(mgr, priority, filp, ctx);
|
||||
if (r) {
|
||||
idr_remove(&mgr->ctx_handles, *id);
|
||||
*id = 0;
|
||||
@ -671,9 +708,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
|
||||
struct drm_sched_entity *entity,
|
||||
struct dma_fence *fence, uint64_t *handle)
|
||||
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
|
||||
struct drm_sched_entity *entity,
|
||||
struct dma_fence *fence)
|
||||
{
|
||||
struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
|
||||
uint64_t seq = centity->sequence;
|
||||
@ -682,8 +719,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
|
||||
|
||||
idx = seq & (amdgpu_sched_jobs - 1);
|
||||
other = centity->fences[idx];
|
||||
if (other)
|
||||
BUG_ON(!dma_fence_is_signaled(other));
|
||||
WARN_ON(other && !dma_fence_is_signaled(other));
|
||||
|
||||
dma_fence_get(fence);
|
||||
|
||||
@ -692,9 +728,11 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
|
||||
centity->sequence++;
|
||||
spin_unlock(&ctx->ring_lock);
|
||||
|
||||
atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
|
||||
&ctx->mgr->time_spend[centity->hw_ip]);
|
||||
|
||||
dma_fence_put(other);
|
||||
if (handle)
|
||||
*handle = seq;
|
||||
return seq;
|
||||
}
|
||||
|
||||
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
|
||||
@ -731,7 +769,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
|
||||
int hw_ip,
|
||||
int32_t priority)
|
||||
{
|
||||
struct amdgpu_device *adev = ctx->adev;
|
||||
struct amdgpu_device *adev = ctx->mgr->adev;
|
||||
unsigned int hw_prio;
|
||||
struct drm_gpu_scheduler **scheds = NULL;
|
||||
unsigned num_scheds;
|
||||
@ -796,10 +834,17 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
|
||||
return r;
|
||||
}
|
||||
|
||||
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
|
||||
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
|
||||
struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
mgr->adev = adev;
|
||||
mutex_init(&mgr->lock);
|
||||
idr_init(&mgr->ctx_handles);
|
||||
|
||||
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
|
||||
atomic64_set(&mgr->time_spend[i], 0);
|
||||
}
|
||||
|
||||
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
|
||||
@ -875,80 +920,38 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
|
||||
mutex_destroy(&mgr->lock);
|
||||
}
|
||||
|
||||
static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
|
||||
struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max)
|
||||
void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
|
||||
ktime_t usage[AMDGPU_HW_IP_NUM])
|
||||
{
|
||||
ktime_t now, t1;
|
||||
uint32_t i;
|
||||
|
||||
*total = *max = 0;
|
||||
|
||||
now = ktime_get();
|
||||
for (i = 0; i < amdgpu_sched_jobs; i++) {
|
||||
struct dma_fence *fence;
|
||||
struct drm_sched_fence *s_fence;
|
||||
|
||||
spin_lock(&ctx->ring_lock);
|
||||
fence = dma_fence_get(centity->fences[i]);
|
||||
spin_unlock(&ctx->ring_lock);
|
||||
if (!fence)
|
||||
continue;
|
||||
s_fence = to_drm_sched_fence(fence);
|
||||
if (!dma_fence_is_signaled(&s_fence->scheduled)) {
|
||||
dma_fence_put(fence);
|
||||
continue;
|
||||
}
|
||||
t1 = s_fence->scheduled.timestamp;
|
||||
if (!ktime_before(t1, now)) {
|
||||
dma_fence_put(fence);
|
||||
continue;
|
||||
}
|
||||
if (dma_fence_is_signaled(&s_fence->finished) &&
|
||||
s_fence->finished.timestamp < now)
|
||||
*total += ktime_sub(s_fence->finished.timestamp, t1);
|
||||
else
|
||||
*total += ktime_sub(now, t1);
|
||||
t1 = ktime_sub(now, t1);
|
||||
dma_fence_put(fence);
|
||||
*max = max(t1, *max);
|
||||
}
|
||||
}
|
||||
|
||||
ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
|
||||
uint32_t idx, uint64_t *elapsed)
|
||||
{
|
||||
struct idr *idp;
|
||||
struct amdgpu_ctx *ctx;
|
||||
unsigned int hw_ip, i;
|
||||
uint32_t id;
|
||||
struct amdgpu_ctx_entity *centity;
|
||||
ktime_t total = 0, max = 0;
|
||||
|
||||
if (idx >= AMDGPU_MAX_ENTITY_NUM)
|
||||
return 0;
|
||||
idp = &mgr->ctx_handles;
|
||||
/*
|
||||
* This is a little bit racy because it can be that a ctx or a fence are
|
||||
* destroyed just in the moment we try to account them. But that is ok
|
||||
* since exactly that case is explicitely allowed by the interface.
|
||||
*/
|
||||
mutex_lock(&mgr->lock);
|
||||
idr_for_each_entry(idp, ctx, id) {
|
||||
ktime_t ttotal, tmax;
|
||||
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
|
||||
uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
|
||||
|
||||
if (!ctx->entities[hwip][idx])
|
||||
continue;
|
||||
|
||||
centity = ctx->entities[hwip][idx];
|
||||
amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
|
||||
|
||||
/* Harmonic mean approximation diverges for very small
|
||||
* values. If ratio < 0.01% ignore
|
||||
*/
|
||||
if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
|
||||
continue;
|
||||
|
||||
total = ktime_add(total, ttotal);
|
||||
max = ktime_after(tmax, max) ? tmax : max;
|
||||
usage[hw_ip] = ns_to_ktime(ns);
|
||||
}
|
||||
|
||||
mutex_unlock(&mgr->lock);
|
||||
if (elapsed)
|
||||
*elapsed = max;
|
||||
idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
|
||||
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
|
||||
for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
|
||||
struct amdgpu_ctx_entity *centity;
|
||||
ktime_t spend;
|
||||
|
||||
return total;
|
||||
centity = ctx->entities[hw_ip][i];
|
||||
if (!centity)
|
||||
continue;
|
||||
spend = amdgpu_ctx_entity_time(ctx, centity);
|
||||
usage[hw_ip] = ktime_add(usage[hw_ip], spend);
|
||||
}
|
||||
}
|
||||
}
|
||||
mutex_unlock(&mgr->lock);
|
||||
}
|
||||
|
@ -23,16 +23,20 @@
|
||||
#ifndef __AMDGPU_CTX_H__
|
||||
#define __AMDGPU_CTX_H__
|
||||
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "amdgpu_ring.h"
|
||||
|
||||
struct drm_device;
|
||||
struct drm_file;
|
||||
struct amdgpu_fpriv;
|
||||
struct amdgpu_ctx_mgr;
|
||||
|
||||
#define AMDGPU_MAX_ENTITY_NUM 4
|
||||
#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) ((max) > 16384ULL*(total))
|
||||
|
||||
struct amdgpu_ctx_entity {
|
||||
uint32_t hw_ip;
|
||||
uint64_t sequence;
|
||||
struct drm_sched_entity entity;
|
||||
struct dma_fence *fences[];
|
||||
@ -40,7 +44,7 @@ struct amdgpu_ctx_entity {
|
||||
|
||||
struct amdgpu_ctx {
|
||||
struct kref refcount;
|
||||
struct amdgpu_device *adev;
|
||||
struct amdgpu_ctx_mgr *mgr;
|
||||
unsigned reset_counter;
|
||||
unsigned reset_counter_query;
|
||||
uint32_t vram_lost_counter;
|
||||
@ -61,6 +65,7 @@ struct amdgpu_ctx_mgr {
|
||||
struct mutex lock;
|
||||
/* protected by lock */
|
||||
struct idr ctx_handles;
|
||||
atomic64_t time_spend[AMDGPU_HW_IP_NUM];
|
||||
};
|
||||
|
||||
extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
|
||||
@ -70,9 +75,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
|
||||
|
||||
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
|
||||
u32 ring, struct drm_sched_entity **entity);
|
||||
void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
|
||||
struct drm_sched_entity *entity,
|
||||
struct dma_fence *fence, uint64_t *seq);
|
||||
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
|
||||
struct drm_sched_entity *entity,
|
||||
struct dma_fence *fence);
|
||||
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
|
||||
struct drm_sched_entity *entity,
|
||||
uint64_t seq);
|
||||
@ -85,10 +90,12 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
|
||||
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
|
||||
struct drm_sched_entity *entity);
|
||||
|
||||
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
|
||||
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
|
||||
struct amdgpu_device *adev);
|
||||
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
|
||||
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
|
||||
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
|
||||
ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
|
||||
uint32_t idx, uint64_t *elapsed);
|
||||
void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
|
||||
ktime_t usage[AMDGPU_HW_IP_NUM]);
|
||||
|
||||
#endif
|
||||
|
@ -1556,9 +1556,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
|
||||
|
||||
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
|
||||
|
||||
amdgpu_gmc_tmz_set(adev);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3701,6 +3698,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* Enable TMZ based on IP_VERSION */
|
||||
amdgpu_gmc_tmz_set(adev);
|
||||
|
||||
amdgpu_gmc_noretry_set(adev);
|
||||
/* Need to get xgmi info early to decide the reset behavior*/
|
||||
if (adev->gmc.xgmi.supported) {
|
||||
@ -5219,6 +5219,10 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
|
||||
r = amdgpu_device_reset_sriov(adev, job ? false : true);
|
||||
if (r)
|
||||
adev->asic_reset_res = r;
|
||||
|
||||
/* Aldebaran supports ras in SRIOV, so need resume ras during reset */
|
||||
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
|
||||
amdgpu_ras_resume(adev);
|
||||
} else {
|
||||
r = amdgpu_do_asic_reset(device_list_handle, &reset_context);
|
||||
if (r && r == -EAGAIN)
|
||||
|
@ -1130,13 +1130,24 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
|
||||
adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
|
||||
ip->revision & 0xc0;
|
||||
ip->revision &= ~0xc0;
|
||||
adev->vcn.num_vcn_inst++;
|
||||
if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES)
|
||||
adev->vcn.num_vcn_inst++;
|
||||
else
|
||||
dev_err(adev->dev, "Too many VCN instances: %d vs %d\n",
|
||||
adev->vcn.num_vcn_inst + 1,
|
||||
AMDGPU_MAX_VCN_INSTANCES);
|
||||
}
|
||||
if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
|
||||
le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
|
||||
le16_to_cpu(ip->hw_id) == SDMA2_HWID ||
|
||||
le16_to_cpu(ip->hw_id) == SDMA3_HWID)
|
||||
adev->sdma.num_instances++;
|
||||
le16_to_cpu(ip->hw_id) == SDMA3_HWID) {
|
||||
if (adev->sdma.num_instances < AMDGPU_MAX_SDMA_INSTANCES)
|
||||
adev->sdma.num_instances++;
|
||||
else
|
||||
dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n",
|
||||
adev->sdma.num_instances + 1,
|
||||
AMDGPU_MAX_SDMA_INSTANCES);
|
||||
}
|
||||
|
||||
if (le16_to_cpu(ip->hw_id) == UMC_HWID)
|
||||
adev->gmc.num_umc++;
|
||||
@ -1361,7 +1372,7 @@ union mall_info {
|
||||
struct mall_info_v1_0 v1;
|
||||
};
|
||||
|
||||
int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
|
||||
static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
|
||||
{
|
||||
struct binary_header *bhdr;
|
||||
union mall_info *mall_info;
|
||||
|
@ -99,10 +99,11 @@
|
||||
* - 3.43.0 - Add device hot plug/unplug support
|
||||
* - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
|
||||
* - 3.45.0 - Add context ioctl stable pstate interface
|
||||
* * 3.46.0 - To enable hot plug amdgpu tests in libdrm
|
||||
* - 3.46.0 - To enable hot plug amdgpu tests in libdrm
|
||||
* * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
|
||||
*/
|
||||
#define KMS_DRIVER_MAJOR 3
|
||||
#define KMS_DRIVER_MINOR 46
|
||||
#define KMS_DRIVER_MINOR 47
|
||||
#define KMS_DRIVER_PATCHLEVEL 0
|
||||
|
||||
int amdgpu_vram_limit;
|
||||
@ -1940,6 +1941,7 @@ static const struct pci_device_id pciidlist[] = {
|
||||
{0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
|
||||
{0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
|
||||
{0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
|
||||
{0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
|
||||
{0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
|
||||
|
||||
{ PCI_DEVICE(0x1002, PCI_ANY_ID),
|
||||
|
@ -32,6 +32,7 @@
|
||||
|
||||
#include <drm/amdgpu_drm.h>
|
||||
#include <drm/drm_debugfs.h>
|
||||
#include <drm/drm_drv.h>
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_vm.h"
|
||||
@ -54,58 +55,49 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
|
||||
|
||||
void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
|
||||
{
|
||||
struct amdgpu_fpriv *fpriv;
|
||||
uint32_t bus, dev, fn, i, domain;
|
||||
uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
|
||||
struct drm_file *file = f->private_data;
|
||||
struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
|
||||
struct amdgpu_bo *root;
|
||||
struct amdgpu_fpriv *fpriv = file->driver_priv;
|
||||
struct amdgpu_vm *vm = &fpriv->vm;
|
||||
|
||||
uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
|
||||
ktime_t usage[AMDGPU_HW_IP_NUM];
|
||||
uint32_t bus, dev, fn, domain;
|
||||
unsigned int hw_ip;
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_file_to_fpriv(f, &fpriv);
|
||||
if (ret)
|
||||
return;
|
||||
bus = adev->pdev->bus->number;
|
||||
domain = pci_domain_nr(adev->pdev->bus);
|
||||
dev = PCI_SLOT(adev->pdev->devfn);
|
||||
fn = PCI_FUNC(adev->pdev->devfn);
|
||||
|
||||
root = amdgpu_bo_ref(fpriv->vm.root.bo);
|
||||
if (!root)
|
||||
ret = amdgpu_bo_reserve(vm->root.bo, false);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
ret = amdgpu_bo_reserve(root, false);
|
||||
if (ret) {
|
||||
DRM_ERROR("Fail to reserve bo\n");
|
||||
return;
|
||||
}
|
||||
amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, >t_mem, &cpu_mem);
|
||||
amdgpu_bo_unreserve(root);
|
||||
amdgpu_bo_unref(&root);
|
||||
amdgpu_vm_get_memory(vm, &vram_mem, >t_mem, &cpu_mem);
|
||||
amdgpu_bo_unreserve(vm->root.bo);
|
||||
|
||||
seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus,
|
||||
dev, fn, fpriv->vm.pasid);
|
||||
seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL);
|
||||
seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL);
|
||||
seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL);
|
||||
for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
|
||||
uint32_t count = amdgpu_ctx_num_entities[i];
|
||||
int idx = 0;
|
||||
uint64_t total = 0, min = 0;
|
||||
uint32_t perc, frac;
|
||||
amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
|
||||
|
||||
for (idx = 0; idx < count; idx++) {
|
||||
total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr,
|
||||
i, idx, &min);
|
||||
if ((total == 0) || (min == 0))
|
||||
continue;
|
||||
/*
|
||||
* ******************************************************************
|
||||
* For text output format description please see drm-usage-stats.rst!
|
||||
* ******************************************************************
|
||||
*/
|
||||
|
||||
perc = div64_u64(10000 * total, min);
|
||||
frac = perc % 100;
|
||||
seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid);
|
||||
seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name);
|
||||
seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
|
||||
seq_printf(m, "drm-client-id:\t%Lu\n", vm->immediate.fence_context);
|
||||
seq_printf(m, "drm-memory-vram:\t%llu KiB\n", vram_mem/1024UL);
|
||||
seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", gtt_mem/1024UL);
|
||||
seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", cpu_mem/1024UL);
|
||||
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
|
||||
if (!usage[hw_ip])
|
||||
continue;
|
||||
|
||||
seq_printf(m, "%s%d:\t%d.%d%%\n",
|
||||
amdgpu_ip_name[i],
|
||||
idx, perc/100, frac);
|
||||
}
|
||||
seq_printf(m, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip],
|
||||
ktime_to_ns(usage[hw_ip]));
|
||||
}
|
||||
}
|
||||
|
@ -296,8 +296,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
|
||||
AMDGPU_GEM_CREATE_VRAM_CLEARED |
|
||||
AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
|
||||
AMDGPU_GEM_CREATE_EXPLICIT_SYNC |
|
||||
AMDGPU_GEM_CREATE_ENCRYPTED))
|
||||
|
||||
AMDGPU_GEM_CREATE_ENCRYPTED |
|
||||
AMDGPU_GEM_CREATE_DISCARDABLE))
|
||||
return -EINVAL;
|
||||
|
||||
/* reject invalid gem domains */
|
||||
@ -645,6 +645,8 @@ uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
|
||||
pte_flag |= AMDGPU_PTE_WRITEABLE;
|
||||
if (flags & AMDGPU_VM_PAGE_PRT)
|
||||
pte_flag |= AMDGPU_PTE_PRT;
|
||||
if (flags & AMDGPU_VM_PAGE_NOALLOC)
|
||||
pte_flag |= AMDGPU_PTE_NOALLOC;
|
||||
|
||||
if (adev->gmc.gmc_funcs->map_mtype)
|
||||
pte_flag |= amdgpu_gmc_map_mtype(adev,
|
||||
@ -658,7 +660,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
|
||||
{
|
||||
const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
|
||||
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
|
||||
AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK;
|
||||
AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK |
|
||||
AMDGPU_VM_PAGE_NOALLOC;
|
||||
const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
|
||||
AMDGPU_VM_PAGE_PRT;
|
||||
|
||||
|
@ -512,9 +512,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
|
||||
*/
|
||||
void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
|
||||
{
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_RAVEN:
|
||||
case CHIP_RENOIR:
|
||||
switch (adev->ip_versions[GC_HWIP][0]) {
|
||||
/* RAVEN */
|
||||
case IP_VERSION(9, 2, 2):
|
||||
case IP_VERSION(9, 1, 0):
|
||||
/* RENOIR looks like RAVEN */
|
||||
case IP_VERSION(9, 3, 0):
|
||||
if (amdgpu_tmz == 0) {
|
||||
adev->gmc.tmz_enabled = false;
|
||||
dev_info(adev->dev,
|
||||
@ -525,12 +528,20 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
|
||||
"Trusted Memory Zone (TMZ) feature enabled\n");
|
||||
}
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
case CHIP_VANGOGH:
|
||||
case CHIP_YELLOW_CARP:
|
||||
case CHIP_IP_DISCOVERY:
|
||||
case IP_VERSION(10, 1, 10):
|
||||
case IP_VERSION(10, 1, 1):
|
||||
case IP_VERSION(10, 1, 2):
|
||||
case IP_VERSION(10, 1, 3):
|
||||
case IP_VERSION(10, 3, 0):
|
||||
case IP_VERSION(10, 3, 2):
|
||||
case IP_VERSION(10, 3, 4):
|
||||
case IP_VERSION(10, 3, 5):
|
||||
/* VANGOGH */
|
||||
case IP_VERSION(10, 3, 1):
|
||||
/* YELLOW_CARP*/
|
||||
case IP_VERSION(10, 3, 3):
|
||||
/* GC 10.3.7 */
|
||||
case IP_VERSION(10, 3, 7):
|
||||
/* Don't enable it by default yet.
|
||||
*/
|
||||
if (amdgpu_tmz < 1) {
|
||||
|
@ -1152,7 +1152,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
|
||||
mutex_init(&fpriv->bo_list_lock);
|
||||
idr_init(&fpriv->bo_list_handles);
|
||||
|
||||
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
|
||||
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
|
||||
|
||||
file_priv->driver_priv = fpriv;
|
||||
goto out_suspend;
|
||||
|
@ -567,6 +567,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
|
||||
bp->domain;
|
||||
bo->allowed_domains = bo->preferred_domains;
|
||||
if (bp->type != ttm_bo_type_kernel &&
|
||||
!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE) &&
|
||||
bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
|
||||
bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
|
||||
|
||||
@ -1018,7 +1019,9 @@ static const char *amdgpu_vram_names[] = {
|
||||
"DDR3",
|
||||
"DDR4",
|
||||
"GDDR6",
|
||||
"DDR5"
|
||||
"DDR5",
|
||||
"LPDDR4",
|
||||
"LPDDR5"
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -41,7 +41,6 @@
|
||||
|
||||
/* BO flag to indicate a KFD userptr BO */
|
||||
#define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63)
|
||||
#define AMDGPU_AMDKFD_CREATE_SVM_BO (1ULL << 62)
|
||||
|
||||
#define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
|
||||
#define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo)
|
||||
|
@ -1177,7 +1177,7 @@ int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool lo
|
||||
psp->xgmi_context.context.mem_context.shared_mem_size = PSP_XGMI_SHARED_MEM_SIZE;
|
||||
psp->xgmi_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
|
||||
|
||||
if (!psp->xgmi_context.context.initialized) {
|
||||
if (!psp->xgmi_context.context.mem_context.shared_buf) {
|
||||
ret = psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -726,7 +726,9 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
|
||||
/* Do not enable if it is not allowed. */
|
||||
WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
|
||||
|
||||
if (!amdgpu_ras_intr_triggered()) {
|
||||
/* Only enable ras feature operation handle on host side */
|
||||
if (!amdgpu_sriov_vf(adev) &&
|
||||
!amdgpu_ras_intr_triggered()) {
|
||||
ret = psp_ras_enable_features(&adev->psp, info, enable);
|
||||
if (ret) {
|
||||
dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
|
||||
@ -1523,7 +1525,9 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
|
||||
*/
|
||||
void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
|
||||
{
|
||||
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
|
||||
/* Fatal error events are handled on host side */
|
||||
if (amdgpu_sriov_vf(adev) ||
|
||||
!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
|
||||
return;
|
||||
|
||||
if (adev->nbio.ras &&
|
||||
@ -2270,10 +2274,14 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->ras_hw_enabled = adev->ras_enabled = 0;
|
||||
|
||||
if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
|
||||
if (!adev->is_atom_fw ||
|
||||
!amdgpu_ras_asic_supported(adev))
|
||||
return;
|
||||
|
||||
if (!(amdgpu_sriov_vf(adev) &&
|
||||
(adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))))
|
||||
return;
|
||||
|
||||
if (!adev->gmc.xgmi.connected_to_cpu) {
|
||||
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
|
||||
dev_info(adev->dev, "MEM ECC is active.\n");
|
||||
@ -2285,15 +2293,21 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
|
||||
|
||||
if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
|
||||
dev_info(adev->dev, "SRAM ECC is active.\n");
|
||||
adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
|
||||
1 << AMDGPU_RAS_BLOCK__DF);
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
|
||||
1 << AMDGPU_RAS_BLOCK__DF);
|
||||
|
||||
if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0))
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
|
||||
1 << AMDGPU_RAS_BLOCK__JPEG);
|
||||
else
|
||||
adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
|
||||
1 << AMDGPU_RAS_BLOCK__JPEG);
|
||||
if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0))
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
|
||||
1 << AMDGPU_RAS_BLOCK__JPEG);
|
||||
else
|
||||
adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
|
||||
1 << AMDGPU_RAS_BLOCK__JPEG);
|
||||
} else {
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
|
||||
1 << AMDGPU_RAS_BLOCK__SDMA |
|
||||
1 << AMDGPU_RAS_BLOCK__GFX);
|
||||
}
|
||||
} else {
|
||||
dev_info(adev->dev, "SRAM ECC is not presented.\n");
|
||||
}
|
||||
@ -2637,6 +2651,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
|
||||
struct amdgpu_ras_block_object *obj;
|
||||
int r;
|
||||
|
||||
/* Guest side doesn't need init ras feature */
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return 0;
|
||||
|
||||
list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
|
||||
if (!node->ras_obj) {
|
||||
dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
|
||||
|
@ -124,6 +124,10 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return AMDGPU_RAS_SUCCESS;
|
||||
|
||||
amdgpu_ras_reset_gpu(adev);
|
||||
|
||||
return AMDGPU_RAS_SUCCESS;
|
||||
|
@ -117,7 +117,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
|
||||
}
|
||||
|
||||
abo = ttm_to_amdgpu_bo(bo);
|
||||
if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
|
||||
if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
|
||||
placement->num_placement = 0;
|
||||
placement->num_busy_placement = 0;
|
||||
return;
|
||||
|
@ -5111,7 +5111,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
|
||||
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
|
||||
acccess. These should be enabled by FW for target VMIDs. */
|
||||
access. These should be enabled by FW for target VMIDs. */
|
||||
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
|
||||
@ -6898,7 +6898,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
|
||||
(order_base_2(prop->queue_size / 4) - 1));
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
|
||||
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
|
||||
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
|
||||
#ifdef __BIG_ENDIAN
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
|
||||
#endif
|
||||
@ -6919,23 +6919,6 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
|
||||
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
|
||||
|
||||
tmp = 0;
|
||||
/* enable the doorbell if requested */
|
||||
if (prop->use_doorbell) {
|
||||
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
|
||||
DOORBELL_OFFSET, prop->doorbell_index);
|
||||
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
|
||||
DOORBELL_EN, 1);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
|
||||
DOORBELL_SOURCE, 0);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
|
||||
DOORBELL_HIT, 0);
|
||||
}
|
||||
|
||||
mqd->cp_hqd_pq_doorbell_control = tmp;
|
||||
|
||||
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
|
||||
mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
|
||||
|
||||
@ -6973,20 +6956,6 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
|
||||
/* disable wptr polling */
|
||||
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
|
||||
|
||||
/* write the EOP addr */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
|
||||
mqd->cp_hqd_eop_base_addr_lo);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
|
||||
mqd->cp_hqd_eop_base_addr_hi);
|
||||
|
||||
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
|
||||
mqd->cp_hqd_eop_control);
|
||||
|
||||
/* enable doorbell? */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
|
||||
mqd->cp_hqd_pq_doorbell_control);
|
||||
|
||||
/* disable the queue if it's active */
|
||||
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
|
||||
@ -7005,6 +6974,19 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
|
||||
mqd->cp_hqd_pq_wptr_hi);
|
||||
}
|
||||
|
||||
/* disable doorbells */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
|
||||
|
||||
/* write the EOP addr */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
|
||||
mqd->cp_hqd_eop_base_addr_lo);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
|
||||
mqd->cp_hqd_eop_base_addr_hi);
|
||||
|
||||
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
|
||||
mqd->cp_hqd_eop_control);
|
||||
|
||||
/* set the pointer to the MQD */
|
||||
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
|
||||
mqd->cp_mqd_base_addr_lo);
|
||||
|
@ -4082,7 +4082,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
|
||||
(order_base_2(prop->queue_size / 4) - 1));
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
|
||||
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
|
||||
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
|
||||
|
@ -3714,7 +3714,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
|
||||
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
|
||||
acccess. These should be enabled by FW for target VMIDs. */
|
||||
access. These should be enabled by FW for target VMIDs. */
|
||||
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
|
||||
WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
|
||||
WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
|
||||
@ -4490,7 +4490,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
|
||||
(order_base_2(ring->ring_size / 4) - 1));
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
|
||||
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
|
||||
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
|
||||
#ifdef __BIG_ENDIAN
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
|
||||
#endif
|
||||
@ -5815,7 +5815,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
|
||||
/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
|
||||
gfx_v8_0_wait_for_rlc_serdes(adev);
|
||||
|
||||
/* write cmd to Set CGCG Overrride */
|
||||
/* write cmd to Set CGCG Override */
|
||||
gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
|
||||
|
||||
/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
|
||||
|
@ -2535,7 +2535,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
|
||||
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
|
||||
acccess. These should be enabled by FW for target VMIDs. */
|
||||
access. These should be enabled by FW for target VMIDs. */
|
||||
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
|
||||
@ -3514,7 +3514,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
|
||||
(order_base_2(ring->ring_size / 4) - 1));
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
|
||||
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
|
||||
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
|
||||
#ifdef __BIG_ENDIAN
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
|
||||
#endif
|
||||
@ -3535,23 +3535,6 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
|
||||
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
|
||||
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
|
||||
|
||||
tmp = 0;
|
||||
/* enable the doorbell if requested */
|
||||
if (ring->use_doorbell) {
|
||||
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
|
||||
DOORBELL_OFFSET, ring->doorbell_index);
|
||||
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
|
||||
DOORBELL_EN, 1);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
|
||||
DOORBELL_SOURCE, 0);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
|
||||
DOORBELL_HIT, 0);
|
||||
}
|
||||
|
||||
mqd->cp_hqd_pq_doorbell_control = tmp;
|
||||
|
||||
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
|
||||
ring->wptr = 0;
|
||||
mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
|
||||
|
@ -613,6 +613,9 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
|
||||
*flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
|
||||
*flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
|
||||
|
||||
*flags &= ~AMDGPU_PTE_NOALLOC;
|
||||
*flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
|
||||
|
||||
if (mapping->flags & AMDGPU_PTE_PRT) {
|
||||
*flags |= AMDGPU_PTE_PRT;
|
||||
*flags |= AMDGPU_PTE_SNOOPED;
|
||||
|
@ -500,6 +500,9 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
|
||||
*flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
|
||||
*flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
|
||||
|
||||
*flags &= ~AMDGPU_PTE_NOALLOC;
|
||||
*flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
|
||||
|
||||
if (mapping->flags & AMDGPU_PTE_PRT) {
|
||||
*flags |= AMDGPU_PTE_PRT;
|
||||
*flags |= AMDGPU_PTE_SNOOPED;
|
||||
|
@ -32,13 +32,10 @@
|
||||
MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
|
||||
MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
|
||||
MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
|
||||
MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin");
|
||||
MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
|
||||
MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_5_asd.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_8_asd.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
|
||||
@ -85,17 +82,17 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
|
||||
err = psp_init_sos_microcode(psp, chip_name);
|
||||
if (err)
|
||||
return err;
|
||||
err = psp_init_ta_microcode(&adev->psp, chip_name);
|
||||
if (err)
|
||||
return err;
|
||||
/* It's not necessary to load ras ta on Guest side */
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
err = psp_init_ta_microcode(&adev->psp, chip_name);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
break;
|
||||
case IP_VERSION(13, 0, 1):
|
||||
case IP_VERSION(13, 0, 3):
|
||||
case IP_VERSION(13, 0, 5):
|
||||
case IP_VERSION(13, 0, 8):
|
||||
err = psp_init_asd_microcode(psp, chip_name);
|
||||
if (err)
|
||||
return err;
|
||||
err = psp_init_toc_microcode(psp, chip_name);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -42,6 +42,7 @@
|
||||
|
||||
#include "soc15.h"
|
||||
#include "soc15_common.h"
|
||||
#include "soc21.h"
|
||||
|
||||
static const struct amd_ip_funcs soc21_common_ip_funcs;
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -23,37 +23,52 @@
|
||||
/* To compile this assembly code:
|
||||
*
|
||||
* Navi1x:
|
||||
* cpp -DASIC_TARGET_NAVI1X=1 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
|
||||
* sp3-nv1x nv1x.sp3 -hex nv1x.hex
|
||||
* cpp -DASIC_FAMILY=CHIP_NAVI10 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
|
||||
* sp3 nv1x.sp3 -hex nv1x.hex
|
||||
*
|
||||
* Others:
|
||||
* cpp -DASIC_TARGET_NAVI1X=0 cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
|
||||
* sp3-gfx10 gfx10.sp3 -hex gfx10.hex
|
||||
* gfx10:
|
||||
* cpp -DASIC_FAMILY=CHIP_SIENNA_CICHLID cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
|
||||
* sp3 gfx10.sp3 -hex gfx10.hex
|
||||
*
|
||||
* gfx11:
|
||||
* cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
|
||||
* sp3 gfx11.sp3 -hex gfx11.hex
|
||||
*/
|
||||
|
||||
#define NO_SQC_STORE !ASIC_TARGET_NAVI1X
|
||||
#define CHIP_NAVI10 26
|
||||
#define CHIP_SIENNA_CICHLID 30
|
||||
#define CHIP_PLUM_BONITO 36
|
||||
|
||||
#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
|
||||
#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
|
||||
#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
|
||||
#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
|
||||
|
||||
var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
|
||||
|
||||
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
|
||||
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
|
||||
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
|
||||
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
|
||||
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
|
||||
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
|
||||
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
|
||||
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
|
||||
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
|
||||
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 4
|
||||
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
|
||||
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
|
||||
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
|
||||
var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11
|
||||
var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1
|
||||
|
||||
#if ASIC_FAMILY < CHIP_PLUM_BONITO
|
||||
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
|
||||
#else
|
||||
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
|
||||
#endif
|
||||
|
||||
var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
|
||||
var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF
|
||||
var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
|
||||
var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
|
||||
var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80
|
||||
var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7
|
||||
var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
|
||||
var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
|
||||
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
|
||||
@ -63,46 +78,37 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
|
||||
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
|
||||
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
|
||||
var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
|
||||
var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000
|
||||
|
||||
var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12
|
||||
var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19
|
||||
|
||||
var SQ_WAVE_IB_STS_RCNT_SHIFT = 16
|
||||
var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15
|
||||
var SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT = 25
|
||||
var SQ_WAVE_IB_STS_REPLAY_W64H_SIZE = 1
|
||||
var SQ_WAVE_IB_STS_REPLAY_W64H_MASK = 0x02000000
|
||||
var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1
|
||||
var SQ_WAVE_IB_STS_RCNT_SIZE = 6
|
||||
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000
|
||||
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF
|
||||
|
||||
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
|
||||
|
||||
var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
|
||||
var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
|
||||
|
||||
// bits [31:24] unused by SPI debug data
|
||||
var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31
|
||||
var TTMP11_SAVE_REPLAY_W64H_MASK = 0x80000000
|
||||
var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 24
|
||||
var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0x7F000000
|
||||
var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
|
||||
var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
|
||||
|
||||
// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
|
||||
// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
|
||||
var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
|
||||
var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
|
||||
|
||||
var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000
|
||||
var S_SAVE_SPI_INIT_ATC_SHIFT = 27
|
||||
var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000
|
||||
var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
|
||||
var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
|
||||
var S_SAVE_PC_HI_HT_MASK = 0x01000000
|
||||
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
|
||||
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
|
||||
|
||||
var S_SAVE_PC_HI_RCNT_SHIFT = 26
|
||||
var S_SAVE_PC_HI_RCNT_MASK = 0xFC000000
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 25
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x02000000
|
||||
var S_SAVE_PC_HI_REPLAY_W64H_SHIFT = 24
|
||||
var S_SAVE_PC_HI_REPLAY_W64H_MASK = 0x01000000
|
||||
var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000
|
||||
var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31
|
||||
|
||||
var s_sgpr_save_num = 108
|
||||
|
||||
@ -130,19 +136,10 @@ var s_save_ttmps_hi = s_save_trapsts
|
||||
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
|
||||
var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
|
||||
|
||||
var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000
|
||||
var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
|
||||
var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000
|
||||
var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
|
||||
var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
|
||||
var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
|
||||
var S_WAVE_SIZE = 25
|
||||
|
||||
var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
|
||||
var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
|
||||
var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
|
||||
var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
|
||||
|
||||
var s_restore_spi_init_lo = exec_lo
|
||||
var s_restore_spi_init_hi = exec_hi
|
||||
var s_restore_mem_offset = ttmp12
|
||||
@ -179,84 +176,133 @@ L_JUMP_TO_RESTORE:
|
||||
|
||||
L_SKIP_RESTORE:
|
||||
s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
|
||||
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK
|
||||
|
||||
if SINGLE_STEP_MISSED_WORKAROUND
|
||||
// No single step exceptions if MODE.DEBUG_EN=0.
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
|
||||
s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
|
||||
|
||||
// Second-level trap already handled exception if STATUS.HALT=1.
|
||||
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
|
||||
// Prioritize single step exception over context save.
|
||||
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
|
||||
s_cbranch_scc0 L_FETCH_2ND_TRAP
|
||||
|
||||
L_NO_SINGLE_STEP_WORKAROUND:
|
||||
end
|
||||
|
||||
// Clear SPI_PRIO: do not save with elevated priority.
|
||||
// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
|
||||
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
|
||||
|
||||
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
|
||||
|
||||
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
s_cbranch_scc0 L_NOT_HALTED
|
||||
|
||||
L_HALTED:
|
||||
// Host trap may occur while wave is halted.
|
||||
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
|
||||
L_CHECK_SAVE:
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
|
||||
s_cbranch_scc1 L_SAVE
|
||||
|
||||
// If STATUS.MEM_VIOL is asserted then halt the wave to prevent
|
||||
// the exception raising again and blocking context save.
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
|
||||
s_cbranch_scc0 L_FETCH_2ND_TRAP
|
||||
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
// Wave is halted but neither host trap nor SAVECTX is raised.
|
||||
// Caused by instruction fetch memory violation.
|
||||
// Spin wait until context saved to prevent interrupt storm.
|
||||
s_sleep 0x10
|
||||
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
|
||||
s_branch L_CHECK_SAVE
|
||||
|
||||
L_NOT_HALTED:
|
||||
// Let second-level handle non-SAVECTX exception or trap.
|
||||
// Any concurrent SAVECTX will be handled upon re-entry once halted.
|
||||
|
||||
// Check non-maskable exceptions. memory_violation, illegal_instruction
|
||||
// and xnack_error exceptions always cause the wave to enter the trap
|
||||
// handler.
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
|
||||
// Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
|
||||
// Maskable exceptions only cause the wave to enter the trap handler if
|
||||
// their respective bit in mode.excp_en is set.
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
|
||||
s_cbranch_scc0 L_CHECK_TRAP_ID
|
||||
|
||||
s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
|
||||
s_cbranch_scc0 L_NOT_ADDR_WATCH
|
||||
s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
|
||||
|
||||
L_NOT_ADDR_WATCH:
|
||||
s_getreg_b32 ttmp3, hwreg(HW_REG_MODE)
|
||||
s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
|
||||
s_and_b32 ttmp2, ttmp2, ttmp3
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
|
||||
L_CHECK_TRAP_ID:
|
||||
// Check trap_id != 0
|
||||
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
|
||||
if SINGLE_STEP_MISSED_WORKAROUND
|
||||
// Prioritize single step exception over context save.
|
||||
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
end
|
||||
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
|
||||
s_cbranch_scc1 L_SAVE
|
||||
|
||||
L_FETCH_2ND_TRAP:
|
||||
|
||||
#if ASIC_TARGET_NAVI1X
|
||||
// Preserve and clear scalar XNACK state before issuing scalar loads.
|
||||
// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
|
||||
// unused space ttmp11[31:24].
|
||||
s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK)
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS)
|
||||
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
|
||||
s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
|
||||
s_or_b32 ttmp11, ttmp11, ttmp3
|
||||
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_or_b32 ttmp11, ttmp11, ttmp3
|
||||
s_andn2_b32 ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
|
||||
#if HAVE_XNACK
|
||||
save_and_clear_ib_sts(ttmp14, ttmp15)
|
||||
#endif
|
||||
|
||||
// Read second-level TBA/TMA from first-level TMA and jump if available.
|
||||
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
|
||||
// ttmp12 holds SQ_WAVE_STATUS
|
||||
#if HAVE_SENDMSG_RTN
|
||||
s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
|
||||
s_waitcnt lgkmcnt(0)
|
||||
#else
|
||||
s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO)
|
||||
s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI)
|
||||
#endif
|
||||
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
|
||||
|
||||
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
|
||||
s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
|
||||
s_or_b32 ttmp11, ttmp11, ttmp2
|
||||
|
||||
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
|
||||
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
|
||||
s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
|
||||
|
||||
L_NO_NEXT_TRAP:
|
||||
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
|
||||
s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK
|
||||
s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
|
||||
s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
|
||||
s_addc_u32 ttmp1, ttmp1, 0
|
||||
L_EXCP_CASE:
|
||||
// If not caused by trap then halt wave to prevent re-entry.
|
||||
s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
|
||||
s_cbranch_scc1 L_TRAP_CASE
|
||||
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
|
||||
// If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
|
||||
// Rewind the PC to prevent this from occurring.
|
||||
s_sub_u32 ttmp0, ttmp0, 0x8
|
||||
s_subb_u32 ttmp1, ttmp1, 0x0
|
||||
|
||||
s_branch L_EXIT_TRAP
|
||||
|
||||
L_TRAP_CASE:
|
||||
// Host trap will not cause trap re-entry.
|
||||
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
|
||||
s_cbranch_scc1 L_EXIT_TRAP
|
||||
|
||||
// Advance past trap instruction to prevent re-entry.
|
||||
s_add_u32 ttmp0, ttmp0, 0x4
|
||||
s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
|
||||
L_EXIT_TRAP:
|
||||
s_and_b32 ttmp1, ttmp1, 0xFFFF
|
||||
|
||||
#if ASIC_TARGET_NAVI1X
|
||||
// Restore SQ_WAVE_IB_STS.
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
|
||||
s_or_b32 ttmp2, ttmp2, ttmp3
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
|
||||
#if HAVE_XNACK
|
||||
restore_ib_sts(ttmp14, ttmp15)
|
||||
#endif
|
||||
|
||||
// Restore SQ_WAVE_STATUS.
|
||||
@ -271,20 +317,8 @@ L_SAVE:
|
||||
s_mov_b32 s_save_tmp, 0
|
||||
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
|
||||
|
||||
#if ASIC_TARGET_NAVI1X
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)
|
||||
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
|
||||
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE)
|
||||
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
|
||||
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT, SQ_WAVE_IB_STS_REPLAY_W64H_SIZE)
|
||||
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
|
||||
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY and REPLAY_W64H in IB_STS
|
||||
s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
|
||||
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
|
||||
#if HAVE_XNACK
|
||||
save_and_clear_ib_sts(s_save_tmp, s_save_trapsts)
|
||||
#endif
|
||||
|
||||
/* inform SPI the readiness and wait for SPI's go signal */
|
||||
@ -292,9 +326,13 @@ L_SAVE:
|
||||
s_mov_b32 s_save_exec_hi, exec_hi
|
||||
s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
|
||||
|
||||
#if HAVE_SENDMSG_RTN
|
||||
s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
|
||||
#else
|
||||
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
|
||||
#endif
|
||||
|
||||
#if ASIC_TARGET_NAVI1X
|
||||
#if ASIC_FAMILY < CHIP_SIENNA_CICHLID
|
||||
L_SLEEP:
|
||||
// sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause
|
||||
// SQ hang, since the 7,8th wave could not get arbit to exec inst, while
|
||||
@ -305,16 +343,57 @@ L_SLEEP:
|
||||
s_waitcnt lgkmcnt(0)
|
||||
#endif
|
||||
|
||||
// Save first_wave flag so we can clear high bits of save address.
|
||||
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
|
||||
s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
|
||||
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
|
||||
|
||||
#if NO_SQC_STORE
|
||||
// Trap temporaries must be saved via VGPR but all VGPRs are in use.
|
||||
// There is no ttmp space to hold the resource constant for VGPR save.
|
||||
// Save v0 by itself since it requires only two SGPRs.
|
||||
s_mov_b32 s_save_ttmps_lo, exec_lo
|
||||
s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
|
||||
s_mov_b32 exec_lo, 0xFFFFFFFF
|
||||
s_mov_b32 exec_hi, 0xFFFFFFFF
|
||||
global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] slc:1 glc:1
|
||||
v_mov_b32 v0, 0x0
|
||||
s_mov_b32 exec_lo, s_save_ttmps_lo
|
||||
s_mov_b32 exec_hi, s_save_ttmps_hi
|
||||
#endif
|
||||
|
||||
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
|
||||
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
|
||||
// ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
|
||||
get_wave_size(s_save_ttmps_hi)
|
||||
get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
|
||||
get_svgpr_size_bytes(s_save_ttmps_hi)
|
||||
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
|
||||
s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
|
||||
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
|
||||
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
|
||||
s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
|
||||
|
||||
#if ASIC_TARGET_NAVI1X
|
||||
#if NO_SQC_STORE
|
||||
v_writelane_b32 v0, ttmp4, 0x4
|
||||
v_writelane_b32 v0, ttmp5, 0x5
|
||||
v_writelane_b32 v0, ttmp6, 0x6
|
||||
v_writelane_b32 v0, ttmp7, 0x7
|
||||
v_writelane_b32 v0, ttmp8, 0x8
|
||||
v_writelane_b32 v0, ttmp9, 0x9
|
||||
v_writelane_b32 v0, ttmp10, 0xA
|
||||
v_writelane_b32 v0, ttmp11, 0xB
|
||||
v_writelane_b32 v0, ttmp13, 0xD
|
||||
v_writelane_b32 v0, exec_lo, 0xE
|
||||
v_writelane_b32 v0, exec_hi, 0xF
|
||||
|
||||
s_mov_b32 exec_lo, 0x3FFF
|
||||
s_mov_b32 exec_hi, 0x0
|
||||
global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 slc:1 glc:1
|
||||
v_readlane_b32 ttmp14, v0, 0xE
|
||||
v_readlane_b32 ttmp15, v0, 0xF
|
||||
s_mov_b32 exec_lo, ttmp14
|
||||
s_mov_b32 exec_hi, ttmp15
|
||||
#else
|
||||
s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
|
||||
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
|
||||
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
|
||||
@ -326,12 +405,6 @@ L_SLEEP:
|
||||
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
|
||||
s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
|
||||
s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
|
||||
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
|
||||
s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
|
||||
s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
|
||||
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
|
||||
s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
|
||||
s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
|
||||
|
||||
s_mov_b32 s_save_m0, m0
|
||||
|
||||
@ -339,7 +412,7 @@ L_SLEEP:
|
||||
s_mov_b32 s_save_mem_offset, 0x0
|
||||
get_wave_size(s_wave_size)
|
||||
|
||||
#if ASIC_TARGET_NAVI1X
|
||||
#if HAVE_XNACK
|
||||
// Save and clear vector XNACK state late to free up SGPRs.
|
||||
s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
|
||||
s_setreg_imm32_b32 hwreg(HW_REG_SHADER_XNACK_MASK), 0x0
|
||||
@ -361,7 +434,9 @@ L_SAVE_4VGPR_WAVE32:
|
||||
|
||||
// VGPR Allocated in 4-GPR granularity
|
||||
|
||||
#if !NO_SQC_STORE
|
||||
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
#endif
|
||||
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
|
||||
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
|
||||
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
|
||||
@ -372,7 +447,9 @@ L_SAVE_4VGPR_WAVE64:
|
||||
|
||||
// VGPR Allocated in 4-GPR granularity
|
||||
|
||||
#if !NO_SQC_STORE
|
||||
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
#endif
|
||||
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
|
||||
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
|
||||
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
|
||||
@ -397,7 +474,8 @@ L_SAVE_HWREG:
|
||||
|
||||
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
|
||||
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
@ -418,9 +496,13 @@ L_SAVE_HWREG:
|
||||
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
|
||||
#if NO_SQC_STORE
|
||||
// Write HWREG/SGPRs with 32 VGPR lanes, wave32 is common case.
|
||||
// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
|
||||
s_mov_b32 exec_lo, 0xFFFF
|
||||
s_mov_b32 exec_hi, 0x0
|
||||
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
|
||||
// Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
|
||||
s_mov_b32 exec_lo, 0xFFFFFFFF
|
||||
#endif
|
||||
|
||||
/* save SGPRs */
|
||||
@ -506,7 +588,7 @@ L_SAVE_LDS_NORMAL:
|
||||
s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
|
||||
|
||||
s_barrier //LDS is used? wait for other waves in the same TG
|
||||
s_and_b32 s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
|
||||
s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
|
||||
s_cbranch_scc0 L_SAVE_LDS_DONE
|
||||
|
||||
// first wave do LDS save;
|
||||
@ -628,7 +710,7 @@ L_SAVE_VGPR_WAVE64:
|
||||
// VGPR store using dw burst
|
||||
s_mov_b32 m0, 0x4 //VGPR initial index value =4
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size
|
||||
s_cbranch_scc0 L_SAVE_VGPR_END
|
||||
s_cbranch_scc0 L_SAVE_SHARED_VGPR
|
||||
|
||||
L_SAVE_VGPR_W64_LOOP:
|
||||
v_movrels_b32 v0, v0 //v0 = v[0+m0]
|
||||
@ -646,6 +728,7 @@ L_SAVE_VGPR_W64_LOOP:
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
|
||||
s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete?
|
||||
|
||||
L_SAVE_SHARED_VGPR:
|
||||
//Below part will be the save shared vgpr part (new for gfx10)
|
||||
s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
|
||||
s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
|
||||
@ -674,12 +757,7 @@ L_RESTORE:
|
||||
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
|
||||
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
|
||||
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
|
||||
s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
|
||||
s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
|
||||
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
|
||||
s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
|
||||
s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
|
||||
|
||||
//determine it is wave32 or wave64
|
||||
get_wave_size(s_restore_size)
|
||||
|
||||
@ -722,7 +800,13 @@ L_RESTORE_LDS_NORMAL:
|
||||
s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64
|
||||
|
||||
L_RESTORE_LDS_LOOP_W32:
|
||||
#if HAVE_BUFFER_LDS_LOAD
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
|
||||
#else
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
|
||||
s_waitcnt vmcnt(0)
|
||||
ds_store_addtid_b32 v0
|
||||
#endif
|
||||
s_add_u32 m0, m0, 128 // 128 DW
|
||||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW
|
||||
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
|
||||
@ -730,7 +814,13 @@ L_RESTORE_LDS_LOOP_W32:
|
||||
s_branch L_RESTORE_VGPR
|
||||
|
||||
L_RESTORE_LDS_LOOP_W64:
|
||||
#if HAVE_BUFFER_LDS_LOAD
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
|
||||
#else
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
|
||||
s_waitcnt vmcnt(0)
|
||||
ds_store_addtid_b32 v0
|
||||
#endif
|
||||
s_add_u32 m0, m0, 256 // 256 DW
|
||||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW
|
||||
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
|
||||
@ -765,6 +855,8 @@ L_RESTORE_VGPR_NORMAL:
|
||||
s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
|
||||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4
|
||||
s_mov_b32 m0, 4 //VGPR initial index value = 4
|
||||
s_cmp_lt_u32 m0, s_restore_alloc_size
|
||||
s_cbranch_scc0 L_RESTORE_SGPR
|
||||
|
||||
L_RESTORE_VGPR_WAVE32_LOOP:
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
|
||||
@ -786,6 +878,7 @@ L_RESTORE_VGPR_WAVE32_LOOP:
|
||||
buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128
|
||||
buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2
|
||||
buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3
|
||||
s_waitcnt vmcnt(0)
|
||||
|
||||
s_branch L_RESTORE_SGPR
|
||||
|
||||
@ -796,6 +889,8 @@ L_RESTORE_VGPR_WAVE64:
|
||||
s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last
|
||||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
|
||||
s_mov_b32 m0, 4 //VGPR initial index value = 4
|
||||
s_cmp_lt_u32 m0, s_restore_alloc_size
|
||||
s_cbranch_scc0 L_RESTORE_SHARED_VGPR
|
||||
|
||||
L_RESTORE_VGPR_WAVE64_LOOP:
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
|
||||
@ -812,6 +907,7 @@ L_RESTORE_VGPR_WAVE64_LOOP:
|
||||
s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
|
||||
s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
|
||||
|
||||
L_RESTORE_SHARED_VGPR:
|
||||
//Below part will be the restore shared vgpr part (new for gfx10)
|
||||
s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size
|
||||
s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
|
||||
@ -935,7 +1031,7 @@ L_RESTORE_HWREG:
|
||||
s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
|
||||
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
|
||||
|
||||
#if ASIC_TARGET_NAVI1X
|
||||
#if HAVE_XNACK
|
||||
s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
|
||||
#endif
|
||||
|
||||
@ -945,8 +1041,10 @@ L_RESTORE_HWREG:
|
||||
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
|
||||
|
||||
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
|
||||
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
|
||||
// ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
|
||||
get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
|
||||
get_svgpr_size_bytes(s_restore_ttmps_hi)
|
||||
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
|
||||
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
|
||||
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
|
||||
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
|
||||
@ -956,24 +1054,8 @@ L_RESTORE_HWREG:
|
||||
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
#if ASIC_TARGET_NAVI1X
|
||||
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
|
||||
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
|
||||
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
|
||||
s_mov_b32 s_restore_tmp, 0x0
|
||||
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
|
||||
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
|
||||
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
|
||||
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
|
||||
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
|
||||
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_REPLAY_W64H_MASK
|
||||
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
|
||||
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT
|
||||
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
|
||||
|
||||
s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
|
||||
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
|
||||
#if HAVE_XNACK
|
||||
restore_ib_sts(s_restore_tmp, s_restore_m0)
|
||||
#endif
|
||||
|
||||
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
|
||||
@ -1089,5 +1171,29 @@ end
|
||||
function get_wave_size(s_reg)
|
||||
s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
|
||||
s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
|
||||
s_or_b32 s_reg, s_save_spi_init_hi, s_reg //share with exec_hi, it's at bit25
|
||||
end
|
||||
|
||||
function save_and_clear_ib_sts(tmp1, tmp2)
|
||||
// Preserve and clear scalar XNACK state before issuing scalar loads.
|
||||
// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
|
||||
// unused space ttmp11[31:24].
|
||||
s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK)
|
||||
s_getreg_b32 tmp1, hwreg(HW_REG_IB_STS)
|
||||
s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
|
||||
s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
|
||||
s_or_b32 ttmp11, ttmp11, tmp2
|
||||
s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_or_b32 ttmp11, ttmp11, tmp2
|
||||
s_andn2_b32 tmp1, tmp1, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1
|
||||
end
|
||||
|
||||
function restore_ib_sts(tmp1, tmp2)
|
||||
s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
|
||||
s_and_b32 tmp1, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
|
||||
s_or_b32 tmp1, tmp1, tmp2
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1
|
||||
end
|
||||
|
@ -46,8 +46,6 @@ var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN
|
||||
/**************************************************************************/
|
||||
/* variables */
|
||||
/**************************************************************************/
|
||||
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
|
||||
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
|
||||
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
|
||||
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
|
||||
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
|
||||
@ -56,6 +54,7 @@ var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
|
||||
var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
|
||||
var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
|
||||
var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000
|
||||
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
|
||||
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
|
||||
@ -72,8 +71,10 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
|
||||
#endif
|
||||
|
||||
var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
|
||||
var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask
|
||||
var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
|
||||
var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
|
||||
var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80
|
||||
var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7
|
||||
var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
|
||||
var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
|
||||
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
|
||||
@ -83,37 +84,30 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
|
||||
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
|
||||
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
|
||||
var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
|
||||
var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000
|
||||
var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000
|
||||
|
||||
var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
|
||||
var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12
|
||||
var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19
|
||||
|
||||
var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
|
||||
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000
|
||||
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME
|
||||
|
||||
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
|
||||
|
||||
var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
|
||||
var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
|
||||
|
||||
var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data
|
||||
var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000
|
||||
var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
|
||||
var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
|
||||
|
||||
/* Save */
|
||||
var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes
|
||||
var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
|
||||
|
||||
var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
|
||||
var S_SAVE_SPI_INIT_ATC_SHIFT = 27
|
||||
var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
|
||||
var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
|
||||
var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
|
||||
var S_SAVE_PC_HI_HT_MASK = 0x01000000
|
||||
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
|
||||
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
|
||||
|
||||
var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
|
||||
var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
|
||||
|
||||
var s_save_spi_init_lo = exec_lo
|
||||
var s_save_spi_init_hi = exec_hi
|
||||
|
||||
@ -140,18 +134,9 @@ var s_save_ttmps_hi = s_save_trapsts //no conflict
|
||||
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
|
||||
var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
|
||||
|
||||
var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
|
||||
var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
|
||||
var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
|
||||
var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
|
||||
var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
|
||||
var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
|
||||
|
||||
var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
|
||||
var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
|
||||
var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
|
||||
var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
|
||||
|
||||
var s_restore_spi_init_lo = exec_lo
|
||||
var s_restore_spi_init_hi = exec_hi
|
||||
|
||||
@ -199,71 +184,77 @@ L_JUMP_TO_RESTORE:
|
||||
L_SKIP_RESTORE:
|
||||
|
||||
s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
|
||||
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save
|
||||
|
||||
if SINGLE_STEP_MISSED_WORKAROUND
|
||||
// No single step exceptions if MODE.DEBUG_EN=0.
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
|
||||
s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
|
||||
|
||||
// Second-level trap already handled exception if STATUS.HALT=1.
|
||||
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
|
||||
// Prioritize single step exception over context save.
|
||||
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
|
||||
s_cbranch_scc0 L_FETCH_2ND_TRAP
|
||||
|
||||
L_NO_SINGLE_STEP_WORKAROUND:
|
||||
end
|
||||
// Clear SPI_PRIO: do not save with elevated priority.
|
||||
// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
|
||||
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
|
||||
|
||||
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
|
||||
|
||||
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
s_cbranch_scc0 L_NOT_HALTED
|
||||
|
||||
L_HALTED:
|
||||
// Host trap may occur while wave is halted.
|
||||
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
|
||||
L_CHECK_SAVE:
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
|
||||
s_cbranch_scc1 L_SAVE //this is the operation for save
|
||||
|
||||
// ********* Handle non-CWSR traps *******************
|
||||
|
||||
// Illegal instruction is a non-maskable exception which blocks context save.
|
||||
// Halt the wavefront and return from the trap.
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
|
||||
s_cbranch_scc1 L_HALT_WAVE
|
||||
|
||||
// If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA.
|
||||
// Instead, halt the wavefront and return from the trap.
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
|
||||
s_cbranch_scc0 L_FETCH_2ND_TRAP
|
||||
|
||||
L_HALT_WAVE:
|
||||
// If STATUS.HALT is set then this fault must come from SQC instruction fetch.
|
||||
// We cannot prevent further faults. Spin wait until context saved.
|
||||
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
s_cbranch_scc0 L_NOT_ALREADY_HALTED
|
||||
|
||||
L_WAIT_CTX_SAVE:
|
||||
// Wave is halted but neither host trap nor SAVECTX is raised.
|
||||
// Caused by instruction fetch memory violation.
|
||||
// Spin wait until context saved to prevent interrupt storm.
|
||||
s_sleep 0x10
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
|
||||
s_cbranch_scc0 L_WAIT_CTX_SAVE
|
||||
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
|
||||
s_branch L_CHECK_SAVE
|
||||
|
||||
L_NOT_ALREADY_HALTED:
|
||||
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
L_NOT_HALTED:
|
||||
// Let second-level handle non-SAVECTX exception or trap.
|
||||
// Any concurrent SAVECTX will be handled upon re-entry once halted.
|
||||
|
||||
// If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
|
||||
// Rewind the PC to prevent this from occurring. The debugger compensates for this.
|
||||
s_sub_u32 ttmp0, ttmp0, 0x8
|
||||
s_subb_u32 ttmp1, ttmp1, 0x0
|
||||
// Check non-maskable exceptions. memory_violation, illegal_instruction
|
||||
// and xnack_error exceptions always cause the wave to enter the trap
|
||||
// handler.
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
|
||||
// Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
|
||||
// Maskable exceptions only cause the wave to enter the trap handler if
|
||||
// their respective bit in mode.excp_en is set.
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
|
||||
s_cbranch_scc0 L_CHECK_TRAP_ID
|
||||
|
||||
s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
|
||||
s_cbranch_scc0 L_NOT_ADDR_WATCH
|
||||
s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
|
||||
|
||||
L_NOT_ADDR_WATCH:
|
||||
s_getreg_b32 ttmp3, hwreg(HW_REG_MODE)
|
||||
s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
|
||||
s_and_b32 ttmp2, ttmp2, ttmp3
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
|
||||
L_CHECK_TRAP_ID:
|
||||
// Check trap_id != 0
|
||||
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
|
||||
if SINGLE_STEP_MISSED_WORKAROUND
|
||||
// Prioritize single step exception over context save.
|
||||
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
end
|
||||
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
|
||||
s_cbranch_scc1 L_SAVE
|
||||
|
||||
L_FETCH_2ND_TRAP:
|
||||
// Preserve and clear scalar XNACK state before issuing scalar reads.
|
||||
// Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS)
|
||||
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
|
||||
s_or_b32 ttmp11, ttmp11, ttmp3
|
||||
|
||||
s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
|
||||
save_and_clear_ib_sts(ttmp14)
|
||||
|
||||
// Read second-level TBA/TMA from first-level TMA and jump if available.
|
||||
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
|
||||
@ -271,27 +262,48 @@ L_FETCH_2ND_TRAP:
|
||||
s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
|
||||
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
|
||||
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
|
||||
|
||||
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
|
||||
s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
|
||||
s_or_b32 ttmp11, ttmp11, ttmp2
|
||||
|
||||
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
|
||||
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
|
||||
s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
|
||||
|
||||
L_NO_NEXT_TRAP:
|
||||
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
|
||||
s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
|
||||
s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
|
||||
s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
|
||||
s_addc_u32 ttmp1, ttmp1, 0
|
||||
L_EXCP_CASE:
|
||||
// If not caused by trap then halt wave to prevent re-entry.
|
||||
s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
|
||||
s_cbranch_scc1 L_TRAP_CASE
|
||||
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
|
||||
// If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
|
||||
// Rewind the PC to prevent this from occurring.
|
||||
s_sub_u32 ttmp0, ttmp0, 0x8
|
||||
s_subb_u32 ttmp1, ttmp1, 0x0
|
||||
|
||||
s_branch L_EXIT_TRAP
|
||||
|
||||
L_TRAP_CASE:
|
||||
// Host trap will not cause trap re-entry.
|
||||
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
|
||||
s_cbranch_scc1 L_EXIT_TRAP
|
||||
|
||||
// Advance past trap instruction to prevent re-entry.
|
||||
s_add_u32 ttmp0, ttmp0, 0x4
|
||||
s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
|
||||
L_EXIT_TRAP:
|
||||
s_and_b32 ttmp1, ttmp1, 0xFFFF
|
||||
|
||||
// Restore SQ_WAVE_IB_STS.
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
|
||||
restore_ib_sts(ttmp14)
|
||||
|
||||
// Restore SQ_WAVE_STATUS.
|
||||
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
@ -312,16 +324,7 @@ L_SAVE:
|
||||
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
|
||||
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
|
||||
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT
|
||||
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
|
||||
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY
|
||||
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
|
||||
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS
|
||||
s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
|
||||
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
|
||||
save_and_clear_ib_sts(s_save_tmp)
|
||||
|
||||
/* inform SPI the readiness and wait for SPI's go signal */
|
||||
s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
|
||||
@ -360,12 +363,6 @@ L_SAVE:
|
||||
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
|
||||
s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
|
||||
s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
|
||||
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
|
||||
s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
|
||||
s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
|
||||
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
|
||||
s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
|
||||
s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
|
||||
|
||||
//FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?)
|
||||
s_mov_b32 s_save_m0, m0 //save M0
|
||||
@ -690,12 +687,6 @@ L_RESTORE:
|
||||
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
|
||||
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
|
||||
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
|
||||
s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
|
||||
s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
|
||||
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
|
||||
s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
|
||||
s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
|
||||
|
||||
/* global mem offset */
|
||||
// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0
|
||||
@ -889,19 +880,7 @@ L_RESTORE:
|
||||
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
//reuse s_restore_m0 as a temp register
|
||||
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
|
||||
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
|
||||
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
|
||||
s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero
|
||||
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
|
||||
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
|
||||
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
|
||||
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
|
||||
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
|
||||
s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
|
||||
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
|
||||
restore_ib_sts(s_restore_tmp)
|
||||
|
||||
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
|
||||
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
@ -910,8 +889,7 @@ L_RESTORE:
|
||||
|
||||
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
|
||||
|
||||
// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
|
||||
s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc
|
||||
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
|
||||
|
||||
|
||||
/**************************************************************************/
|
||||
@ -1078,3 +1056,19 @@ function set_status_without_spi_prio(status, tmp)
|
||||
s_nop 0x2 // avoid S_SETREG => S_SETREG hazard
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
|
||||
end
|
||||
|
||||
function save_and_clear_ib_sts(tmp)
|
||||
// Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
|
||||
s_getreg_b32 tmp, hwreg(HW_REG_IB_STS)
|
||||
s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_lshl_b32 tmp, tmp, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
|
||||
s_or_b32 ttmp11, ttmp11, tmp
|
||||
s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0x0
|
||||
end
|
||||
|
||||
function restore_ib_sts(tmp)
|
||||
s_lshr_b32 tmp, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), tmp
|
||||
end
|
||||
|
@ -441,10 +441,14 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
|
||||
BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
|
||||
kfd->cwsr_isa = cwsr_trap_nv1x_hex;
|
||||
kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
|
||||
} else {
|
||||
} else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
|
||||
BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
|
||||
kfd->cwsr_isa = cwsr_trap_gfx10_hex;
|
||||
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
|
||||
} else {
|
||||
BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
|
||||
kfd->cwsr_isa = cwsr_trap_gfx11_hex;
|
||||
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
|
||||
}
|
||||
|
||||
kfd->cwsr_enabled = true;
|
||||
|
@ -531,7 +531,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
|
||||
bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0;
|
||||
bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO;
|
||||
bp.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
|
||||
bp.type = ttm_bo_type_device;
|
||||
bp.resv = NULL;
|
||||
|
||||
|
@ -1271,6 +1271,12 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
|
||||
if (!peer_dev)
|
||||
continue;
|
||||
|
||||
/* Include the CPU peer in GPU hive if connected over xGMI. */
|
||||
if (!peer_dev->gpu && !peer_dev->node_props.hive_id &&
|
||||
dev->node_props.hive_id &&
|
||||
dev->gpu->adev->gmc.xgmi.connected_to_cpu)
|
||||
peer_dev->node_props.hive_id = dev->node_props.hive_id;
|
||||
|
||||
list_for_each_entry(inbound_link, &peer_dev->io_link_props,
|
||||
list) {
|
||||
if (inbound_link->node_to != link->node_from)
|
||||
@ -1302,22 +1308,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
|
||||
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
|
||||
|
||||
/* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */
|
||||
if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) {
|
||||
struct kfd_topology_device *top_dev;
|
||||
|
||||
down_read(&topology_lock);
|
||||
|
||||
list_for_each_entry(top_dev, &topology_device_list, list) {
|
||||
if (top_dev->gpu)
|
||||
break;
|
||||
|
||||
top_dev->node_props.hive_id = gpu->hive_id;
|
||||
}
|
||||
|
||||
up_read(&topology_lock);
|
||||
}
|
||||
|
||||
/* Check to see if this gpu device exists in the topology_device_list.
|
||||
* If so, assign the gpu to that device,
|
||||
* else create a Virtual CRAT for this gpu device and then parse that
|
||||
|
@ -769,7 +769,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params)
|
||||
|
||||
do {
|
||||
dc_stat_get_dmub_notification(adev->dm.dc, ¬ify);
|
||||
if (notify.type > ARRAY_SIZE(dm->dmub_thread_offload)) {
|
||||
if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) {
|
||||
DRM_ERROR("DM: notify type %d invalid!", notify.type);
|
||||
continue;
|
||||
}
|
||||
@ -5381,17 +5381,19 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
|
||||
|
||||
static void
|
||||
fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
|
||||
bool *per_pixel_alpha, bool *global_alpha,
|
||||
int *global_alpha_value)
|
||||
bool *per_pixel_alpha, bool *pre_multiplied_alpha,
|
||||
bool *global_alpha, int *global_alpha_value)
|
||||
{
|
||||
*per_pixel_alpha = false;
|
||||
*pre_multiplied_alpha = true;
|
||||
*global_alpha = false;
|
||||
*global_alpha_value = 0xff;
|
||||
|
||||
if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY)
|
||||
return;
|
||||
|
||||
if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) {
|
||||
if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ||
|
||||
plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) {
|
||||
static const uint32_t alpha_formats[] = {
|
||||
DRM_FORMAT_ARGB8888,
|
||||
DRM_FORMAT_RGBA8888,
|
||||
@ -5406,6 +5408,9 @@ fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE)
|
||||
*pre_multiplied_alpha = false;
|
||||
}
|
||||
|
||||
if (plane_state->alpha < 0xffff) {
|
||||
@ -5568,7 +5573,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
|
||||
return ret;
|
||||
|
||||
fill_blending_from_plane_state(
|
||||
plane_state, &plane_info->per_pixel_alpha,
|
||||
plane_state, &plane_info->per_pixel_alpha, &plane_info->pre_multiplied_alpha,
|
||||
&plane_info->global_alpha, &plane_info->global_alpha_value);
|
||||
|
||||
return 0;
|
||||
@ -5615,6 +5620,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
|
||||
dc_plane_state->tiling_info = plane_info.tiling_info;
|
||||
dc_plane_state->visible = plane_info.visible;
|
||||
dc_plane_state->per_pixel_alpha = plane_info.per_pixel_alpha;
|
||||
dc_plane_state->pre_multiplied_alpha = plane_info.pre_multiplied_alpha;
|
||||
dc_plane_state->global_alpha = plane_info.global_alpha;
|
||||
dc_plane_state->global_alpha_value = plane_info.global_alpha_value;
|
||||
dc_plane_state->dcc = plane_info.dcc;
|
||||
@ -7911,7 +7917,8 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
|
||||
if (plane->type == DRM_PLANE_TYPE_OVERLAY &&
|
||||
plane_cap && plane_cap->per_pixel_alpha) {
|
||||
unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) |
|
||||
BIT(DRM_MODE_BLEND_PREMULTI);
|
||||
BIT(DRM_MODE_BLEND_PREMULTI) |
|
||||
BIT(DRM_MODE_BLEND_COVERAGE);
|
||||
|
||||
drm_plane_create_alpha_property(plane);
|
||||
drm_plane_create_blend_mode_property(plane, blend_caps);
|
||||
|
@ -122,7 +122,7 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
|
||||
dpp_inst = clk_mgr->base.ctx->dc->res_pool->dpps[i]->inst;
|
||||
dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
|
||||
|
||||
prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
|
||||
prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[dpp_inst];
|
||||
|
||||
if (safe_to_lower || prev_dppclk_khz < dppclk_khz)
|
||||
clk_mgr->dccg->funcs->update_dpp_dto(
|
||||
|
@ -91,7 +91,8 @@ static void dcn315_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable)
|
||||
|
||||
if (pipe->top_pipe || pipe->prev_odm_pipe)
|
||||
continue;
|
||||
if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
|
||||
if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
|
||||
dc_is_virtual_signal(pipe->stream->signal))) {
|
||||
if (disable)
|
||||
pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
|
||||
else
|
||||
|
@ -122,7 +122,8 @@ static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable)
|
||||
|
||||
if (pipe->top_pipe || pipe->prev_odm_pipe)
|
||||
continue;
|
||||
if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
|
||||
if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
|
||||
dc_is_virtual_signal(pipe->stream->signal))) {
|
||||
if (disable)
|
||||
pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
|
||||
else
|
||||
|
@ -2901,14 +2901,15 @@ static void commit_planes_for_stream(struct dc *dc,
|
||||
top_pipe_to_program->stream_res.tg);
|
||||
}
|
||||
|
||||
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
|
||||
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
|
||||
dc->hwss.interdependent_update_lock(dc, context, true);
|
||||
else
|
||||
} else {
|
||||
/* Lock the top pipe while updating plane addrs, since freesync requires
|
||||
* plane addr update event triggers to be synchronized.
|
||||
* top_pipe_to_program is expected to never be NULL
|
||||
*/
|
||||
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, true);
|
||||
}
|
||||
|
||||
// Stream updates
|
||||
if (stream_update)
|
||||
@ -2924,10 +2925,11 @@ static void commit_planes_for_stream(struct dc *dc,
|
||||
if (dc->hwss.program_front_end_for_ctx)
|
||||
dc->hwss.program_front_end_for_ctx(dc, context);
|
||||
|
||||
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
|
||||
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
|
||||
dc->hwss.interdependent_update_lock(dc, context, false);
|
||||
else
|
||||
} else {
|
||||
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
|
||||
}
|
||||
dc->hwss.post_unlock_program_front_end(dc, context);
|
||||
return;
|
||||
}
|
||||
@ -3052,10 +3054,11 @@ static void commit_planes_for_stream(struct dc *dc,
|
||||
|
||||
}
|
||||
|
||||
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
|
||||
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
|
||||
dc->hwss.interdependent_update_lock(dc, context, false);
|
||||
else
|
||||
} else {
|
||||
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
|
||||
}
|
||||
|
||||
if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
|
||||
if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "gpio_service_interface.h"
|
||||
#include "core_status.h"
|
||||
#include "dc_link_dp.h"
|
||||
#include "dc_link_dpia.h"
|
||||
#include "dc_link_ddc.h"
|
||||
#include "link_hwss.h"
|
||||
#include "opp.h"
|
||||
@ -240,7 +241,7 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
|
||||
|
||||
/* Link may not have physical HPD pin. */
|
||||
if (link->ep_type != DISPLAY_ENDPOINT_PHY) {
|
||||
if (link->is_hpd_pending || !link->hpd_status)
|
||||
if (link->is_hpd_pending || !dc_link_dpia_query_hpd_status(link))
|
||||
*type = dc_connection_none;
|
||||
else
|
||||
*type = dc_connection_single;
|
||||
@ -1604,8 +1605,25 @@ static bool dc_link_construct_legacy(struct dc_link *link,
|
||||
if (link->hpd_gpio) {
|
||||
if (!link->dc->config.allow_edp_hotplug_detection)
|
||||
link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
|
||||
link->irq_source_hpd_rx =
|
||||
dal_irq_get_rx_source(link->hpd_gpio);
|
||||
|
||||
switch (link->dc->config.allow_edp_hotplug_detection) {
|
||||
case 1: // only the 1st eDP handles hotplug
|
||||
if (link->link_index == 0)
|
||||
link->irq_source_hpd_rx =
|
||||
dal_irq_get_rx_source(link->hpd_gpio);
|
||||
else
|
||||
link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
|
||||
break;
|
||||
case 2: // only the 2nd eDP handles hotplug
|
||||
if (link->link_index == 1)
|
||||
link->irq_source_hpd_rx =
|
||||
dal_irq_get_rx_source(link->hpd_gpio);
|
||||
else
|
||||
link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -2783,31 +2783,37 @@ bool perform_link_training_with_retries(
|
||||
struct dc_link *link = stream->link;
|
||||
enum dp_panel_mode panel_mode = dp_get_panel_mode(link);
|
||||
enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
|
||||
struct dc_link_settings current_setting = *link_setting;
|
||||
struct dc_link_settings cur_link_settings = *link_setting;
|
||||
const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
|
||||
int fail_count = 0;
|
||||
bool is_link_bw_low = false; /* link bandwidth < stream bandwidth */
|
||||
bool is_link_bw_min = /* RBR x 1 */
|
||||
(cur_link_settings.link_rate <= LINK_RATE_LOW) &&
|
||||
(cur_link_settings.lane_count <= LANE_COUNT_ONE);
|
||||
|
||||
dp_trace_commit_lt_init(link);
|
||||
|
||||
|
||||
if (dp_get_link_encoding_format(¤t_setting) == DP_8b_10b_ENCODING)
|
||||
if (dp_get_link_encoding_format(&cur_link_settings) == DP_8b_10b_ENCODING)
|
||||
/* We need to do this before the link training to ensure the idle
|
||||
* pattern in SST mode will be sent right after the link training
|
||||
*/
|
||||
link_hwss->setup_stream_encoder(pipe_ctx);
|
||||
|
||||
dp_trace_set_lt_start_timestamp(link, false);
|
||||
for (j = 0; j < attempts; ++j) {
|
||||
j = 0;
|
||||
while (j < attempts && fail_count < (attempts * 10)) {
|
||||
|
||||
DC_LOG_HW_LINK_TRAINING("%s: Beginning link training attempt %u of %d\n",
|
||||
__func__, (unsigned int)j + 1, attempts);
|
||||
DC_LOG_HW_LINK_TRAINING("%s: Beginning link training attempt %u of %d @ rate(%d) x lane(%d)\n",
|
||||
__func__, (unsigned int)j + 1, attempts, cur_link_settings.link_rate,
|
||||
cur_link_settings.lane_count);
|
||||
|
||||
dp_enable_link_phy(
|
||||
link,
|
||||
&pipe_ctx->link_res,
|
||||
signal,
|
||||
pipe_ctx->clock_source->id,
|
||||
¤t_setting);
|
||||
&cur_link_settings);
|
||||
|
||||
if (stream->sink_patches.dppowerup_delay > 0) {
|
||||
int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay;
|
||||
@ -2832,30 +2838,30 @@ bool perform_link_training_with_retries(
|
||||
dp_set_panel_mode(link, panel_mode);
|
||||
|
||||
if (link->aux_access_disabled) {
|
||||
dc_link_dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, ¤t_setting);
|
||||
dc_link_dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &cur_link_settings);
|
||||
return true;
|
||||
} else {
|
||||
/** @todo Consolidate USB4 DP and DPx.x training. */
|
||||
if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
|
||||
status = dc_link_dpia_perform_link_training(link,
|
||||
&pipe_ctx->link_res,
|
||||
¤t_setting,
|
||||
&cur_link_settings,
|
||||
skip_video_pattern);
|
||||
|
||||
/* Transmit idle pattern once training successful. */
|
||||
if (status == LINK_TRAINING_SUCCESS)
|
||||
if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low)
|
||||
dp_set_hw_test_pattern(link, &pipe_ctx->link_res, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0);
|
||||
} else {
|
||||
status = dc_link_dp_perform_link_training(link,
|
||||
&pipe_ctx->link_res,
|
||||
¤t_setting,
|
||||
&cur_link_settings,
|
||||
skip_video_pattern);
|
||||
}
|
||||
|
||||
dp_trace_lt_total_count_increment(link, false);
|
||||
dp_trace_lt_result_update(link, status, false);
|
||||
dp_trace_set_lt_end_timestamp(link, false);
|
||||
if (status == LINK_TRAINING_SUCCESS)
|
||||
if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low)
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2866,8 +2872,9 @@ bool perform_link_training_with_retries(
|
||||
if (j == (attempts - 1) && link->ep_type == DISPLAY_ENDPOINT_PHY)
|
||||
break;
|
||||
|
||||
DC_LOG_WARNING("%s: Link training attempt %u of %d failed\n",
|
||||
__func__, (unsigned int)j + 1, attempts);
|
||||
DC_LOG_WARNING("%s: Link training attempt %u of %d failed @ rate(%d) x lane(%d)\n",
|
||||
__func__, (unsigned int)j + 1, attempts, cur_link_settings.link_rate,
|
||||
cur_link_settings.lane_count);
|
||||
|
||||
dp_disable_link_phy(link, &pipe_ctx->link_res, signal);
|
||||
|
||||
@ -2876,27 +2883,49 @@ bool perform_link_training_with_retries(
|
||||
enum dc_connection_type type = dc_connection_none;
|
||||
|
||||
dc_link_detect_sink(link, &type);
|
||||
if (type == dc_connection_none)
|
||||
if (type == dc_connection_none) {
|
||||
DC_LOG_HW_LINK_TRAINING("%s: Aborting training because sink unplugged\n", __func__);
|
||||
break;
|
||||
} else if (do_fallback) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to train again at original settings if:
|
||||
* - not falling back between training attempts;
|
||||
* - aborted previous attempt due to reasons other than sink unplug;
|
||||
* - successfully trained but at a link rate lower than that required by stream;
|
||||
* - reached minimum link bandwidth.
|
||||
*/
|
||||
if (!do_fallback || (status == LINK_TRAINING_ABORT) ||
|
||||
(status == LINK_TRAINING_SUCCESS && is_link_bw_low) ||
|
||||
is_link_bw_min) {
|
||||
j++;
|
||||
cur_link_settings = *link_setting;
|
||||
delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
|
||||
is_link_bw_low = false;
|
||||
is_link_bw_min = (cur_link_settings.link_rate <= LINK_RATE_LOW) &&
|
||||
(cur_link_settings.lane_count <= LANE_COUNT_ONE);
|
||||
|
||||
} else if (do_fallback) { /* Try training at lower link bandwidth if doing fallback. */
|
||||
uint32_t req_bw;
|
||||
uint32_t link_bw;
|
||||
|
||||
decide_fallback_link_setting(link, *link_setting, ¤t_setting, status);
|
||||
/* Fail link training if reduced link bandwidth no longer meets
|
||||
* stream requirements.
|
||||
decide_fallback_link_setting(link, *link_setting, &cur_link_settings, status);
|
||||
/* Flag if reduced link bandwidth no longer meets stream requirements or fallen back to
|
||||
* minimum link bandwidth.
|
||||
*/
|
||||
req_bw = dc_bandwidth_in_kbps_from_timing(&stream->timing);
|
||||
link_bw = dc_link_bandwidth_kbps(link, ¤t_setting);
|
||||
if (req_bw > link_bw)
|
||||
break;
|
||||
link_bw = dc_link_bandwidth_kbps(link, &cur_link_settings);
|
||||
is_link_bw_low = (req_bw > link_bw);
|
||||
is_link_bw_min = ((cur_link_settings.link_rate <= LINK_RATE_LOW) &&
|
||||
(cur_link_settings.lane_count <= LANE_COUNT_ONE));
|
||||
|
||||
if (is_link_bw_low)
|
||||
DC_LOG_WARNING("%s: Link bandwidth too low after fallback req_bw(%d) > link_bw(%d)\n",
|
||||
__func__, req_bw, link_bw);
|
||||
}
|
||||
|
||||
msleep(delay_between_attempts);
|
||||
|
||||
delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -5097,13 +5126,16 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link)
|
||||
return true;
|
||||
}
|
||||
|
||||
void dp_retrieve_lttpr_cap(struct dc_link *link)
|
||||
bool dp_retrieve_lttpr_cap(struct dc_link *link)
|
||||
{
|
||||
uint8_t lttpr_dpcd_data[8];
|
||||
bool allow_lttpr_non_transparent_mode = 0;
|
||||
bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable;
|
||||
bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware;
|
||||
enum dc_status status = DC_ERROR_UNEXPECTED;
|
||||
bool is_lttpr_present = false;
|
||||
|
||||
memset(link->lttpr_dpcd_data, '\0', sizeof(link->lttpr_dpcd_data));
|
||||
memset(lttpr_dpcd_data, '\0', sizeof(lttpr_dpcd_data));
|
||||
|
||||
if ((link->dc->config.allow_lttpr_non_transparent_mode.bits.DP2_0 &&
|
||||
link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)) {
|
||||
@ -5113,116 +5145,82 @@ void dp_retrieve_lttpr_cap(struct dc_link *link)
|
||||
allow_lttpr_non_transparent_mode = 1;
|
||||
}
|
||||
|
||||
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
|
||||
link->lttpr_support = LTTPR_UNSUPPORTED;
|
||||
|
||||
/*
|
||||
* Logic to determine LTTPR support
|
||||
* Logic to determine LTTPR mode
|
||||
*/
|
||||
if (vbios_lttpr_interop)
|
||||
link->lttpr_support = LTTPR_SUPPORTED;
|
||||
else if (link->dc->config.allow_lttpr_non_transparent_mode.raw == 0
|
||||
|| !link->dc->caps.extended_aux_timeout_support)
|
||||
link->lttpr_support = LTTPR_UNSUPPORTED;
|
||||
else
|
||||
link->lttpr_support = LTTPR_CHECK_EXT_SUPPORT;
|
||||
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
|
||||
if (vbios_lttpr_enable && vbios_lttpr_interop)
|
||||
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
|
||||
else if (!vbios_lttpr_enable && vbios_lttpr_interop) {
|
||||
if (allow_lttpr_non_transparent_mode)
|
||||
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
|
||||
else
|
||||
link->lttpr_mode = LTTPR_MODE_TRANSPARENT;
|
||||
} else if (!vbios_lttpr_enable && !vbios_lttpr_interop) {
|
||||
if (!allow_lttpr_non_transparent_mode || !link->dc->caps.extended_aux_timeout_support)
|
||||
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
|
||||
else
|
||||
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DRM_AMD_DC_DCN)
|
||||
/* Check DP tunnel LTTPR mode debug option. */
|
||||
if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
|
||||
link->dc->debug.dpia_debug.bits.force_non_lttpr)
|
||||
link->lttpr_support = LTTPR_UNSUPPORTED;
|
||||
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
|
||||
#endif
|
||||
|
||||
if (link->lttpr_support > LTTPR_UNSUPPORTED) {
|
||||
if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT || link->lttpr_mode == LTTPR_MODE_TRANSPARENT) {
|
||||
/* By reading LTTPR capability, RX assumes that we will enable
|
||||
* LTTPR extended aux timeout if LTTPR is present.
|
||||
*/
|
||||
status = core_link_read_dpcd(
|
||||
link,
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV,
|
||||
link->lttpr_dpcd_data,
|
||||
sizeof(link->lttpr_dpcd_data));
|
||||
lttpr_dpcd_data,
|
||||
sizeof(lttpr_dpcd_data));
|
||||
|
||||
link->dpcd_caps.lttpr_caps.revision.raw =
|
||||
lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.max_link_rate =
|
||||
lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.phy_repeater_cnt =
|
||||
lttpr_dpcd_data[DP_PHY_REPEATER_CNT -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.max_lane_count =
|
||||
lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.mode =
|
||||
lttpr_dpcd_data[DP_PHY_REPEATER_MODE -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.max_ext_timeout =
|
||||
lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
link->dpcd_caps.lttpr_caps.main_link_channel_coding.raw =
|
||||
lttpr_dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.raw =
|
||||
lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
/* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
|
||||
is_lttpr_present = (link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
|
||||
link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
|
||||
link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);
|
||||
if (is_lttpr_present) {
|
||||
CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: ");
|
||||
configure_lttpr_mode_transparent(link);
|
||||
} else
|
||||
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
|
||||
}
|
||||
}
|
||||
|
||||
bool dp_parse_lttpr_mode(struct dc_link *link)
|
||||
{
|
||||
bool dpcd_allow_lttpr_non_transparent_mode = false;
|
||||
bool is_lttpr_present = false;
|
||||
|
||||
bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable;
|
||||
|
||||
if ((link->dc->config.allow_lttpr_non_transparent_mode.bits.DP2_0 &&
|
||||
link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)) {
|
||||
dpcd_allow_lttpr_non_transparent_mode = true;
|
||||
} else if (link->dc->config.allow_lttpr_non_transparent_mode.bits.DP1_4A &&
|
||||
!link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
|
||||
dpcd_allow_lttpr_non_transparent_mode = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Logic to determine LTTPR mode
|
||||
*/
|
||||
if (link->lttpr_support == LTTPR_SUPPORTED)
|
||||
if (vbios_lttpr_enable)
|
||||
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
|
||||
else if (dpcd_allow_lttpr_non_transparent_mode)
|
||||
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
|
||||
else
|
||||
link->lttpr_mode = LTTPR_MODE_TRANSPARENT;
|
||||
else // lttpr_support == LTTPR_CHECK_EXT_SUPPORT
|
||||
if (dpcd_allow_lttpr_non_transparent_mode) {
|
||||
link->lttpr_support = LTTPR_SUPPORTED;
|
||||
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
|
||||
} else {
|
||||
link->lttpr_support = LTTPR_UNSUPPORTED;
|
||||
}
|
||||
|
||||
if (link->lttpr_support == LTTPR_UNSUPPORTED)
|
||||
return false;
|
||||
|
||||
link->dpcd_caps.lttpr_caps.revision.raw =
|
||||
link->lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.max_link_rate =
|
||||
link->lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.phy_repeater_cnt =
|
||||
link->lttpr_dpcd_data[DP_PHY_REPEATER_CNT -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.max_lane_count =
|
||||
link->lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.mode =
|
||||
link->lttpr_dpcd_data[DP_PHY_REPEATER_MODE -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.max_ext_timeout =
|
||||
link->lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.main_link_channel_coding.raw =
|
||||
link->lttpr_dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.raw =
|
||||
link->lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
|
||||
/* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
|
||||
is_lttpr_present = (link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
|
||||
link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
|
||||
link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);
|
||||
if (is_lttpr_present) {
|
||||
CONN_DATA_DETECT(link, link->lttpr_dpcd_data, sizeof(link->lttpr_dpcd_data), "LTTPR Caps: ");
|
||||
configure_lttpr_mode_transparent(link);
|
||||
} else
|
||||
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
|
||||
|
||||
return is_lttpr_present;
|
||||
}
|
||||
|
||||
@ -5374,8 +5372,7 @@ static bool retrieve_link_cap(struct dc_link *link)
|
||||
status = wa_try_to_wake_dprx(link, timeout_ms);
|
||||
}
|
||||
|
||||
dp_retrieve_lttpr_cap(link);
|
||||
|
||||
is_lttpr_present = dp_retrieve_lttpr_cap(link);
|
||||
/* Read DP tunneling information. */
|
||||
status = dpcd_get_tunneling_device_data(link);
|
||||
|
||||
@ -5411,9 +5408,6 @@ static bool retrieve_link_cap(struct dc_link *link)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (link->lttpr_support > LTTPR_UNSUPPORTED)
|
||||
is_lttpr_present = dp_parse_lttpr_mode(link);
|
||||
|
||||
if (!is_lttpr_present)
|
||||
dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);
|
||||
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "dm_helpers.h"
|
||||
#include "dmub/inc/dmub_cmd.h"
|
||||
#include "inc/link_dpcd.h"
|
||||
#include "dc_dmub_srv.h"
|
||||
|
||||
#define DC_LOGGER \
|
||||
link->ctx->logger
|
||||
@ -69,6 +70,24 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link)
|
||||
return status;
|
||||
}
|
||||
|
||||
bool dc_link_dpia_query_hpd_status(struct dc_link *link)
|
||||
{
|
||||
union dmub_rb_cmd cmd = {0};
|
||||
struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv;
|
||||
bool is_hpd_high = false;
|
||||
|
||||
/* prepare QUERY_HPD command */
|
||||
cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE;
|
||||
cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1;
|
||||
cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA;
|
||||
|
||||
/* Return HPD status reported by DMUB if query successfully executed. */
|
||||
if (dc_dmub_srv_cmd_with_reply_data(dmub_srv, &cmd) && cmd.query_hpd.data.status == AUX_RET_SUCCESS)
|
||||
is_hpd_high = cmd.query_hpd.data.result;
|
||||
|
||||
return is_hpd_high;
|
||||
}
|
||||
|
||||
/* Configure link as prescribed in link_setting; set LTTPR mode; and
|
||||
* Initialize link training settings.
|
||||
* Abort link training if sink unplug detected.
|
||||
|
@ -61,6 +61,8 @@ static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *pl
|
||||
plane_state->blend_tf->type = TF_TYPE_BYPASS;
|
||||
}
|
||||
|
||||
plane_state->pre_multiplied_alpha = true;
|
||||
|
||||
}
|
||||
|
||||
static void dc_plane_destruct(struct dc_plane_state *plane_state)
|
||||
|
@ -47,7 +47,7 @@ struct aux_payload;
|
||||
struct set_config_cmd_payload;
|
||||
struct dmub_notification;
|
||||
|
||||
#define DC_VER "3.2.185"
|
||||
#define DC_VER "3.2.186"
|
||||
|
||||
#define MAX_SURFACES 3
|
||||
#define MAX_PLANES 6
|
||||
@ -329,7 +329,7 @@ struct dc_config {
|
||||
bool disable_dmcu;
|
||||
bool enable_4to1MPC;
|
||||
bool enable_windowed_mpo_odm;
|
||||
bool allow_edp_hotplug_detection;
|
||||
uint32_t allow_edp_hotplug_detection;
|
||||
bool clamp_min_dcfclk;
|
||||
uint64_t vblank_alignment_dto_params;
|
||||
uint8_t vblank_alignment_max_frame_time_diff;
|
||||
@ -1011,6 +1011,7 @@ struct dc_plane_state {
|
||||
|
||||
bool is_tiling_rotated;
|
||||
bool per_pixel_alpha;
|
||||
bool pre_multiplied_alpha;
|
||||
bool global_alpha;
|
||||
int global_alpha_value;
|
||||
bool visible;
|
||||
@ -1045,6 +1046,7 @@ struct dc_plane_info {
|
||||
bool horizontal_mirror;
|
||||
bool visible;
|
||||
bool per_pixel_alpha;
|
||||
bool pre_multiplied_alpha;
|
||||
bool global_alpha;
|
||||
int global_alpha_value;
|
||||
bool input_csc_enabled;
|
||||
|
@ -129,8 +129,6 @@ struct dc_link {
|
||||
bool link_state_valid;
|
||||
bool aux_access_disabled;
|
||||
bool sync_lt_in_progress;
|
||||
uint8_t lttpr_dpcd_data[8];
|
||||
enum lttpr_support lttpr_support;
|
||||
enum lttpr_mode lttpr_mode;
|
||||
bool is_internal_display;
|
||||
|
||||
|
@ -87,7 +87,8 @@ static void release_engine(
|
||||
|
||||
engine->ddc = NULL;
|
||||
|
||||
REG_UPDATE(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1);
|
||||
REG_UPDATE_2(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1,
|
||||
AUX_SW_USE_AUX_REG_REQ, 0);
|
||||
}
|
||||
|
||||
#define SW_CAN_ACCESS_AUX 1
|
||||
|
@ -1101,9 +1101,12 @@ static bool get_pixel_clk_frequency_100hz(
|
||||
* not be programmed equal to DPREFCLK
|
||||
*/
|
||||
modulo_hz = REG_READ(MODULO[inst]);
|
||||
*pixel_clk_khz = div_u64((uint64_t)clock_hz*
|
||||
clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
|
||||
modulo_hz);
|
||||
if (modulo_hz)
|
||||
*pixel_clk_khz = div_u64((uint64_t)clock_hz*
|
||||
clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
|
||||
modulo_hz);
|
||||
else
|
||||
*pixel_clk_khz = 0;
|
||||
} else {
|
||||
/* NOTE: There is agreement with VBIOS here that MODULO is
|
||||
* programmed equal to DPREFCLK, in which case PHASE will be
|
||||
|
@ -2550,12 +2550,21 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
|
||||
blnd_cfg.overlap_only = false;
|
||||
blnd_cfg.global_gain = 0xff;
|
||||
|
||||
if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
|
||||
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
|
||||
blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
|
||||
} else if (per_pixel_alpha) {
|
||||
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
|
||||
if (per_pixel_alpha) {
|
||||
/* DCN1.0 has output CM before MPC which seems to screw with
|
||||
* pre-multiplied alpha.
|
||||
*/
|
||||
blnd_cfg.pre_multiplied_alpha = (is_rgb_cspace(
|
||||
pipe_ctx->stream->output_color_space)
|
||||
&& pipe_ctx->plane_state->pre_multiplied_alpha);
|
||||
if (pipe_ctx->plane_state->global_alpha) {
|
||||
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
|
||||
blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
|
||||
} else {
|
||||
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
|
||||
}
|
||||
} else {
|
||||
blnd_cfg.pre_multiplied_alpha = false;
|
||||
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
|
||||
}
|
||||
|
||||
@ -2564,14 +2573,6 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
|
||||
else
|
||||
blnd_cfg.global_alpha = 0xff;
|
||||
|
||||
/* DCN1.0 has output CM before MPC which seems to screw with
|
||||
* pre-multiplied alpha.
|
||||
*/
|
||||
blnd_cfg.pre_multiplied_alpha = is_rgb_cspace(
|
||||
pipe_ctx->stream->output_color_space)
|
||||
&& per_pixel_alpha;
|
||||
|
||||
|
||||
/*
|
||||
* TODO: remove hack
|
||||
* Note: currently there is a bug in init_hw such that
|
||||
|
@ -1773,7 +1773,6 @@ void dcn20_post_unlock_program_front_end(
|
||||
*/
|
||||
for (i = 0; i < dc->res_pool->pipe_count; i++) {
|
||||
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
|
||||
|
||||
if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable) {
|
||||
struct hubp *hubp = pipe->plane_res.hubp;
|
||||
int j = 0;
|
||||
@ -2346,12 +2345,16 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
|
||||
blnd_cfg.overlap_only = false;
|
||||
blnd_cfg.global_gain = 0xff;
|
||||
|
||||
if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
|
||||
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
|
||||
blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
|
||||
} else if (per_pixel_alpha) {
|
||||
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
|
||||
if (per_pixel_alpha) {
|
||||
blnd_cfg.pre_multiplied_alpha = pipe_ctx->plane_state->pre_multiplied_alpha;
|
||||
if (pipe_ctx->plane_state->global_alpha) {
|
||||
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
|
||||
blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
|
||||
} else {
|
||||
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
|
||||
}
|
||||
} else {
|
||||
blnd_cfg.pre_multiplied_alpha = false;
|
||||
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
|
||||
}
|
||||
|
||||
@ -2365,7 +2368,7 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
|
||||
blnd_cfg.top_gain = 0x1f000;
|
||||
blnd_cfg.bottom_inside_gain = 0x1f000;
|
||||
blnd_cfg.bottom_outside_gain = 0x1f000;
|
||||
blnd_cfg.pre_multiplied_alpha = per_pixel_alpha;
|
||||
|
||||
if (pipe_ctx->plane_state->format
|
||||
== SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA)
|
||||
blnd_cfg.pre_multiplied_alpha = false;
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include "dc.h"
|
||||
#include "dcn_calc_math.h"
|
||||
|
||||
#include "dml/dcn30/dcn30_fpu.h"
|
||||
|
||||
#define REG(reg)\
|
||||
optc1->tg_regs->reg
|
||||
|
||||
@ -184,6 +186,14 @@ void optc3_set_dsc_config(struct timing_generator *optc,
|
||||
|
||||
}
|
||||
|
||||
void optc3_set_vrr_m_const(struct timing_generator *optc,
|
||||
double vtotal_avg)
|
||||
{
|
||||
DC_FP_START();
|
||||
optc3_fpu_set_vrr_m_const(optc, vtotal_avg);
|
||||
DC_FP_END();
|
||||
}
|
||||
|
||||
void optc3_set_odm_bypass(struct timing_generator *optc,
|
||||
const struct dc_crtc_timing *dc_crtc_timing)
|
||||
{
|
||||
|
@ -84,6 +84,7 @@
|
||||
#include "dce/dce_aux.h"
|
||||
#include "dce/dce_i2c.h"
|
||||
|
||||
#include "dml/dcn30/dcn30_fpu.h"
|
||||
#include "dml/dcn30/display_mode_vba_30.h"
|
||||
#include "vm_helper.h"
|
||||
#include "dcn20/dcn20_vmid.h"
|
||||
@ -91,137 +92,6 @@
|
||||
|
||||
#define DC_LOGGER_INIT(logger)
|
||||
|
||||
struct _vcs_dpi_ip_params_st dcn3_0_ip = {
|
||||
.use_min_dcfclk = 0,
|
||||
.clamp_min_dcfclk = 0,
|
||||
.odm_capable = 1,
|
||||
.gpuvm_enable = 0,
|
||||
.hostvm_enable = 0,
|
||||
.gpuvm_max_page_table_levels = 4,
|
||||
.hostvm_max_page_table_levels = 4,
|
||||
.hostvm_cached_page_table_levels = 0,
|
||||
.pte_group_size_bytes = 2048,
|
||||
.num_dsc = 6,
|
||||
.rob_buffer_size_kbytes = 184,
|
||||
.det_buffer_size_kbytes = 184,
|
||||
.dpte_buffer_size_in_pte_reqs_luma = 84,
|
||||
.pde_proc_buffer_size_64k_reqs = 48,
|
||||
.dpp_output_buffer_pixels = 2560,
|
||||
.opp_output_buffer_lines = 1,
|
||||
.pixel_chunk_size_kbytes = 8,
|
||||
.pte_enable = 1,
|
||||
.max_page_table_levels = 2,
|
||||
.pte_chunk_size_kbytes = 2, // ?
|
||||
.meta_chunk_size_kbytes = 2,
|
||||
.writeback_chunk_size_kbytes = 8,
|
||||
.line_buffer_size_bits = 789504,
|
||||
.is_line_buffer_bpp_fixed = 0, // ?
|
||||
.line_buffer_fixed_bpp = 0, // ?
|
||||
.dcc_supported = true,
|
||||
.writeback_interface_buffer_size_kbytes = 90,
|
||||
.writeback_line_buffer_buffer_size = 0,
|
||||
.max_line_buffer_lines = 12,
|
||||
.writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
|
||||
.writeback_chroma_buffer_size_kbytes = 8,
|
||||
.writeback_chroma_line_buffer_width_pixels = 4,
|
||||
.writeback_max_hscl_ratio = 1,
|
||||
.writeback_max_vscl_ratio = 1,
|
||||
.writeback_min_hscl_ratio = 1,
|
||||
.writeback_min_vscl_ratio = 1,
|
||||
.writeback_max_hscl_taps = 1,
|
||||
.writeback_max_vscl_taps = 1,
|
||||
.writeback_line_buffer_luma_buffer_size = 0,
|
||||
.writeback_line_buffer_chroma_buffer_size = 14643,
|
||||
.cursor_buffer_size = 8,
|
||||
.cursor_chunk_size = 2,
|
||||
.max_num_otg = 6,
|
||||
.max_num_dpp = 6,
|
||||
.max_num_wb = 1,
|
||||
.max_dchub_pscl_bw_pix_per_clk = 4,
|
||||
.max_pscl_lb_bw_pix_per_clk = 2,
|
||||
.max_lb_vscl_bw_pix_per_clk = 4,
|
||||
.max_vscl_hscl_bw_pix_per_clk = 4,
|
||||
.max_hscl_ratio = 6,
|
||||
.max_vscl_ratio = 6,
|
||||
.hscl_mults = 4,
|
||||
.vscl_mults = 4,
|
||||
.max_hscl_taps = 8,
|
||||
.max_vscl_taps = 8,
|
||||
.dispclk_ramp_margin_percent = 1,
|
||||
.underscan_factor = 1.11,
|
||||
.min_vblank_lines = 32,
|
||||
.dppclk_delay_subtotal = 46,
|
||||
.dynamic_metadata_vm_enabled = true,
|
||||
.dppclk_delay_scl_lb_only = 16,
|
||||
.dppclk_delay_scl = 50,
|
||||
.dppclk_delay_cnvc_formatter = 27,
|
||||
.dppclk_delay_cnvc_cursor = 6,
|
||||
.dispclk_delay_subtotal = 119,
|
||||
.dcfclk_cstate_latency = 5.2, // SRExitTime
|
||||
.max_inter_dcn_tile_repeaters = 8,
|
||||
.odm_combine_4to1_supported = true,
|
||||
|
||||
.xfc_supported = false,
|
||||
.xfc_fill_bw_overhead_percent = 10.0,
|
||||
.xfc_fill_constant_bytes = 0,
|
||||
.gfx7_compat_tiling_supported = 0,
|
||||
.number_of_cursors = 1,
|
||||
};
|
||||
|
||||
struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = {
|
||||
.clock_limits = {
|
||||
{
|
||||
.state = 0,
|
||||
.dispclk_mhz = 562.0,
|
||||
.dppclk_mhz = 300.0,
|
||||
.phyclk_mhz = 300.0,
|
||||
.phyclk_d18_mhz = 667.0,
|
||||
.dscclk_mhz = 405.6,
|
||||
},
|
||||
},
|
||||
.min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
|
||||
.num_states = 1,
|
||||
.sr_exit_time_us = 15.5,
|
||||
.sr_enter_plus_exit_time_us = 20,
|
||||
.urgent_latency_us = 4.0,
|
||||
.urgent_latency_pixel_data_only_us = 4.0,
|
||||
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
|
||||
.urgent_latency_vm_data_only_us = 4.0,
|
||||
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
|
||||
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
|
||||
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
|
||||
.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
|
||||
.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
|
||||
.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
|
||||
.max_avg_sdp_bw_use_normal_percent = 60.0,
|
||||
.max_avg_dram_bw_use_normal_percent = 40.0,
|
||||
.writeback_latency_us = 12.0,
|
||||
.max_request_size_bytes = 256,
|
||||
.fabric_datapath_to_dcn_data_return_bytes = 64,
|
||||
.dcn_downspread_percent = 0.5,
|
||||
.downspread_percent = 0.38,
|
||||
.dram_page_open_time_ns = 50.0,
|
||||
.dram_rw_turnaround_time_ns = 17.5,
|
||||
.dram_return_buffer_per_channel_bytes = 8192,
|
||||
.round_trip_ping_latency_dcfclk_cycles = 191,
|
||||
.urgent_out_of_order_return_per_channel_bytes = 4096,
|
||||
.channel_interleave_bytes = 256,
|
||||
.num_banks = 8,
|
||||
.gpuvm_min_page_size_bytes = 4096,
|
||||
.hostvm_min_page_size_bytes = 4096,
|
||||
.dram_clock_change_latency_us = 404,
|
||||
.dummy_pstate_latency_us = 5,
|
||||
.writeback_dram_clock_change_latency_us = 23.0,
|
||||
.return_bus_width_bytes = 64,
|
||||
.dispclk_dppclk_vco_speed_mhz = 3650,
|
||||
.xfc_bus_transport_time_us = 20, // ?
|
||||
.xfc_xbuf_latency_tolerance_us = 4, // ?
|
||||
.use_urgent_burst_bw = 1, // ?
|
||||
.do_urgent_latency_adjustment = true,
|
||||
.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
|
||||
.urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
|
||||
};
|
||||
|
||||
enum dcn30_clk_src_array_id {
|
||||
DCN30_CLK_SRC_PLL0,
|
||||
DCN30_CLK_SRC_PLL1,
|
||||
@ -1480,90 +1350,9 @@ int dcn30_populate_dml_pipes_from_context(
|
||||
void dcn30_populate_dml_writeback_from_context(
|
||||
struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
|
||||
{
|
||||
int pipe_cnt, i, j;
|
||||
double max_calc_writeback_dispclk;
|
||||
double writeback_dispclk;
|
||||
struct writeback_st dout_wb;
|
||||
|
||||
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
|
||||
struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
|
||||
|
||||
if (!stream)
|
||||
continue;
|
||||
max_calc_writeback_dispclk = 0;
|
||||
|
||||
/* Set writeback information */
|
||||
pipes[pipe_cnt].dout.wb_enable = 0;
|
||||
pipes[pipe_cnt].dout.num_active_wb = 0;
|
||||
for (j = 0; j < stream->num_wb_info; j++) {
|
||||
struct dc_writeback_info *wb_info = &stream->writeback_info[j];
|
||||
|
||||
if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
|
||||
(wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
|
||||
pipes[pipe_cnt].dout.wb_enable = 1;
|
||||
pipes[pipe_cnt].dout.num_active_wb++;
|
||||
dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
|
||||
wb_info->dwb_params.cnv_params.crop_height :
|
||||
wb_info->dwb_params.cnv_params.src_height;
|
||||
dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
|
||||
wb_info->dwb_params.cnv_params.crop_width :
|
||||
wb_info->dwb_params.cnv_params.src_width;
|
||||
dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
|
||||
dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
|
||||
|
||||
/* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */
|
||||
if (dc->dml.ip.writeback_max_hscl_taps > 1) {
|
||||
dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
|
||||
dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
|
||||
} else {
|
||||
dout_wb.wb_htaps_luma = 1;
|
||||
dout_wb.wb_vtaps_luma = 1;
|
||||
}
|
||||
dout_wb.wb_htaps_chroma = 0;
|
||||
dout_wb.wb_vtaps_chroma = 0;
|
||||
dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
|
||||
(double)wb_info->dwb_params.cnv_params.crop_width /
|
||||
(double)wb_info->dwb_params.dest_width :
|
||||
(double)wb_info->dwb_params.cnv_params.src_width /
|
||||
(double)wb_info->dwb_params.dest_width;
|
||||
dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
|
||||
(double)wb_info->dwb_params.cnv_params.crop_height /
|
||||
(double)wb_info->dwb_params.dest_height :
|
||||
(double)wb_info->dwb_params.cnv_params.src_height /
|
||||
(double)wb_info->dwb_params.dest_height;
|
||||
if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB ||
|
||||
wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA)
|
||||
dout_wb.wb_pixel_format = dm_444_64;
|
||||
else
|
||||
dout_wb.wb_pixel_format = dm_444_32;
|
||||
|
||||
/* Workaround for cases where multiple writebacks are connected to same plane
|
||||
* In which case, need to compute worst case and set the associated writeback parameters
|
||||
* This workaround is necessary due to DML computation assuming only 1 set of writeback
|
||||
* parameters per pipe
|
||||
*/
|
||||
writeback_dispclk = dml30_CalculateWriteBackDISPCLK(
|
||||
dout_wb.wb_pixel_format,
|
||||
pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
|
||||
dout_wb.wb_hratio,
|
||||
dout_wb.wb_vratio,
|
||||
dout_wb.wb_htaps_luma,
|
||||
dout_wb.wb_vtaps_luma,
|
||||
dout_wb.wb_src_width,
|
||||
dout_wb.wb_dst_width,
|
||||
pipes[pipe_cnt].pipe.dest.htotal,
|
||||
dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size);
|
||||
|
||||
if (writeback_dispclk > max_calc_writeback_dispclk) {
|
||||
max_calc_writeback_dispclk = writeback_dispclk;
|
||||
pipes[pipe_cnt].dout.wb = dout_wb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pipe_cnt++;
|
||||
}
|
||||
|
||||
DC_FP_START();
|
||||
dcn30_fpu_populate_dml_writeback_from_context(dc, res_ctx, pipes);
|
||||
DC_FP_END();
|
||||
}
|
||||
|
||||
unsigned int dcn30_calc_max_scaled_time(
|
||||
@ -1598,7 +1387,7 @@ void dcn30_set_mcif_arb_params(
|
||||
enum mmhubbub_wbif_mode wbif_mode;
|
||||
struct display_mode_lib *dml = &context->bw_ctx.dml;
|
||||
struct mcif_arb_params *wb_arb_params;
|
||||
int i, j, k, dwb_pipe;
|
||||
int i, j, dwb_pipe;
|
||||
|
||||
/* Writeback MCIF_WB arbitration parameters */
|
||||
dwb_pipe = 0;
|
||||
@ -1622,17 +1411,15 @@ void dcn30_set_mcif_arb_params(
|
||||
else
|
||||
wbif_mode = PACKED_444;
|
||||
|
||||
for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) {
|
||||
wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000;
|
||||
wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
|
||||
}
|
||||
DC_FP_START();
|
||||
dcn30_fpu_set_mcif_arb_params(wb_arb_params, dml, pipes, pipe_cnt, j);
|
||||
DC_FP_END();
|
||||
wb_arb_params->time_per_pixel = (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; /* time_per_pixel should be in u6.6 format */
|
||||
wb_arb_params->slice_lines = 32;
|
||||
wb_arb_params->arbitration_slice = 2; /* irrelevant since there is no YUV output */
|
||||
wb_arb_params->max_scaled_time = dcn30_calc_max_scaled_time(wb_arb_params->time_per_pixel,
|
||||
wbif_mode,
|
||||
wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */
|
||||
wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[j] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */
|
||||
|
||||
dwb_pipe++;
|
||||
|
||||
@ -2111,178 +1898,11 @@ validate_out:
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* This must be noinline to ensure anything that deals with FP registers
|
||||
* is contained within this call; previously our compiling with hard-float
|
||||
* would result in fp instructions being emitted outside of the boundaries
|
||||
* of the DC_FP_START/END macros, which makes sense as the compiler has no
|
||||
* idea about what is wrapped and what is not
|
||||
*
|
||||
* This is largely just a workaround to avoid breakage introduced with 5.6,
|
||||
* ideally all fp-using code should be moved into its own file, only that
|
||||
* should be compiled with hard-float, and all code exported from there
|
||||
* should be strictly wrapped with DC_FP_START/END
|
||||
*/
|
||||
static noinline void dcn30_calculate_wm_and_dlg_fp(
|
||||
struct dc *dc, struct dc_state *context,
|
||||
display_e2e_pipe_params_st *pipes,
|
||||
int pipe_cnt,
|
||||
int vlevel)
|
||||
{
|
||||
int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
|
||||
int i, pipe_idx;
|
||||
double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
|
||||
bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
|
||||
|
||||
if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
|
||||
dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
|
||||
|
||||
pipes[0].clks_cfg.voltage = vlevel;
|
||||
pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
|
||||
pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
|
||||
|
||||
/* Set B:
|
||||
* DCFCLK: 1GHz or min required above 1GHz
|
||||
* FCLK/UCLK: Max
|
||||
*/
|
||||
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
|
||||
if (vlevel == 0) {
|
||||
pipes[0].clks_cfg.voltage = 1;
|
||||
pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
|
||||
}
|
||||
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
|
||||
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
|
||||
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
|
||||
}
|
||||
context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
|
||||
pipes[0].clks_cfg.voltage = vlevel;
|
||||
pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
|
||||
|
||||
/* Set D:
|
||||
* DCFCLK: Min Required
|
||||
* FCLK(proportional to UCLK): 1GHz or Max
|
||||
* MALL stutter, sr_enter_exit = 4, sr_exit = 2us
|
||||
*/
|
||||
/*
|
||||
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
|
||||
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
|
||||
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
|
||||
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
|
||||
}
|
||||
context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
*/
|
||||
|
||||
/* Set C:
|
||||
* DCFCLK: Min Required
|
||||
* FCLK(proportional to UCLK): 1GHz or Max
|
||||
* pstate latency overridden to 5us
|
||||
*/
|
||||
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
|
||||
unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
|
||||
unsigned int min_dram_speed_mts_margin = 160;
|
||||
|
||||
if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported)
|
||||
min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;
|
||||
|
||||
/* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */
|
||||
for (i = 3; i > 0; i--)
|
||||
if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)
|
||||
break;
|
||||
|
||||
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
|
||||
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
|
||||
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
|
||||
}
|
||||
|
||||
context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
|
||||
if (!pstate_en) {
|
||||
/* The only difference between A and C is p-state latency, if p-state is not supported we want to
|
||||
* calculate DLG based on dummy p-state latency, and max out the set A p-state watermark
|
||||
*/
|
||||
context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
|
||||
} else {
|
||||
/* Set A:
|
||||
* DCFCLK: Min Required
|
||||
* FCLK(proportional to UCLK): 1GHz or Max
|
||||
*
|
||||
* Set A calculated last so that following calculations are based on Set A
|
||||
*/
|
||||
dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
|
||||
context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
}
|
||||
|
||||
context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
|
||||
|
||||
/* Make set D = set A until set D is enabled */
|
||||
context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
|
||||
|
||||
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
|
||||
if (!context->res_ctx.pipe_ctx[i].stream)
|
||||
continue;
|
||||
|
||||
pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
|
||||
pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
|
||||
|
||||
if (dc->config.forced_clocks) {
|
||||
pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
|
||||
pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
|
||||
}
|
||||
if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
|
||||
pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
|
||||
if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
|
||||
pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
|
||||
|
||||
pipe_idx++;
|
||||
}
|
||||
|
||||
DC_FP_START();
|
||||
dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
|
||||
DC_FP_END();
|
||||
|
||||
if (!pstate_en)
|
||||
/* Restore full p-state latency */
|
||||
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
|
||||
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
|
||||
}
|
||||
|
||||
void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
|
||||
{
|
||||
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
|
||||
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
|
||||
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
|
||||
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
|
||||
}
|
||||
DC_FP_START();
|
||||
dcn30_fpu_update_soc_for_wm_a(dc, context);
|
||||
DC_FP_END();
|
||||
}
|
||||
|
||||
void dcn30_calculate_wm_and_dlg(
|
||||
@ -2292,7 +1912,7 @@ void dcn30_calculate_wm_and_dlg(
|
||||
int vlevel)
|
||||
{
|
||||
DC_FP_START();
|
||||
dcn30_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel);
|
||||
dcn30_fpu_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel);
|
||||
DC_FP_END();
|
||||
}
|
||||
|
||||
@ -2351,40 +1971,6 @@ validate_out:
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* This must be noinline to ensure anything that deals with FP registers
|
||||
* is contained within this call; previously our compiling with hard-float
|
||||
* would result in fp instructions being emitted outside of the boundaries
|
||||
* of the DC_FP_START/END macros, which makes sense as the compiler has no
|
||||
* idea about what is wrapped and what is not
|
||||
*
|
||||
* This is largely just a workaround to avoid breakage introduced with 5.6,
|
||||
* ideally all fp-using code should be moved into its own file, only that
|
||||
* should be compiled with hard-float, and all code exported from there
|
||||
* should be strictly wrapped with DC_FP_START/END
|
||||
*/
|
||||
static noinline void dcn30_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
|
||||
unsigned int *optimal_dcfclk,
|
||||
unsigned int *optimal_fclk)
|
||||
{
|
||||
double bw_from_dram, bw_from_dram1, bw_from_dram2;
|
||||
|
||||
bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans *
|
||||
dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100);
|
||||
bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans *
|
||||
dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100);
|
||||
|
||||
bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
|
||||
|
||||
if (optimal_fclk)
|
||||
*optimal_fclk = bw_from_dram /
|
||||
(dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
|
||||
|
||||
if (optimal_dcfclk)
|
||||
*optimal_dcfclk = bw_from_dram /
|
||||
(dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
|
||||
}
|
||||
|
||||
void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
|
||||
{
|
||||
unsigned int i, j;
|
||||
@ -2399,47 +1985,43 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
|
||||
unsigned int num_dcfclk_sta_targets = 4;
|
||||
unsigned int num_uclk_states;
|
||||
|
||||
struct dc_bounding_box_max_clk dcn30_bb_max_clk;
|
||||
|
||||
memset(&dcn30_bb_max_clk, 0, sizeof(dcn30_bb_max_clk));
|
||||
|
||||
if (dc->ctx->dc_bios->vram_info.num_chans)
|
||||
dcn3_0_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
|
||||
|
||||
if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
|
||||
dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
|
||||
|
||||
dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
|
||||
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
|
||||
DC_FP_START();
|
||||
dcn30_fpu_update_dram_channel_width_bytes(dc);
|
||||
DC_FP_END();
|
||||
|
||||
if (bw_params->clk_table.entries[0].memclk_mhz) {
|
||||
int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
|
||||
|
||||
for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
|
||||
if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
|
||||
max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
|
||||
if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
|
||||
max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
|
||||
if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
|
||||
max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
|
||||
if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
|
||||
max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
|
||||
if (bw_params->clk_table.entries[i].dcfclk_mhz > dcn30_bb_max_clk.max_dcfclk_mhz)
|
||||
dcn30_bb_max_clk.max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
|
||||
if (bw_params->clk_table.entries[i].dispclk_mhz > dcn30_bb_max_clk.max_dispclk_mhz)
|
||||
dcn30_bb_max_clk.max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
|
||||
if (bw_params->clk_table.entries[i].dppclk_mhz > dcn30_bb_max_clk.max_dppclk_mhz)
|
||||
dcn30_bb_max_clk.max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
|
||||
if (bw_params->clk_table.entries[i].phyclk_mhz > dcn30_bb_max_clk.max_phyclk_mhz)
|
||||
dcn30_bb_max_clk.max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
|
||||
}
|
||||
|
||||
if (!max_dcfclk_mhz)
|
||||
max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz;
|
||||
if (!max_dispclk_mhz)
|
||||
max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz;
|
||||
if (!max_dppclk_mhz)
|
||||
max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz;
|
||||
if (!max_phyclk_mhz)
|
||||
max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz;
|
||||
DC_FP_START();
|
||||
dcn30_fpu_update_max_clk(&dcn30_bb_max_clk);
|
||||
DC_FP_END();
|
||||
|
||||
if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
|
||||
if (dcn30_bb_max_clk.max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
|
||||
// If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
|
||||
dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
|
||||
dcfclk_sta_targets[num_dcfclk_sta_targets] = dcn30_bb_max_clk.max_dcfclk_mhz;
|
||||
num_dcfclk_sta_targets++;
|
||||
} else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
|
||||
} else if (dcn30_bb_max_clk.max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
|
||||
// If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates
|
||||
for (i = 0; i < num_dcfclk_sta_targets; i++) {
|
||||
if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
|
||||
dcfclk_sta_targets[i] = max_dcfclk_mhz;
|
||||
if (dcfclk_sta_targets[i] > dcn30_bb_max_clk.max_dcfclk_mhz) {
|
||||
dcfclk_sta_targets[i] = dcn30_bb_max_clk.max_dcfclk_mhz;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -2452,7 +2034,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
|
||||
// Calculate optimal dcfclk for each uclk
|
||||
for (i = 0; i < num_uclk_states; i++) {
|
||||
DC_FP_START();
|
||||
dcn30_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
|
||||
dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
|
||||
&optimal_dcfclk_for_uclk[i], NULL);
|
||||
DC_FP_END();
|
||||
if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
|
||||
@ -2479,7 +2061,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
|
||||
dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
|
||||
dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
|
||||
} else {
|
||||
if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
|
||||
if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) {
|
||||
dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
|
||||
dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
|
||||
} else {
|
||||
@ -2494,33 +2076,15 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
|
||||
}
|
||||
|
||||
while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
|
||||
optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
|
||||
optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) {
|
||||
dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
|
||||
dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
|
||||
}
|
||||
|
||||
dcn3_0_soc.num_states = num_states;
|
||||
for (i = 0; i < dcn3_0_soc.num_states; i++) {
|
||||
dcn3_0_soc.clock_limits[i].state = i;
|
||||
dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
|
||||
dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
|
||||
dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
|
||||
|
||||
/* Fill all states with max values of all other clocks */
|
||||
dcn3_0_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
|
||||
dcn3_0_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
|
||||
dcn3_0_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
|
||||
dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz;
|
||||
/* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */
|
||||
/* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
|
||||
dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz;
|
||||
dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz;
|
||||
dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz;
|
||||
}
|
||||
/* re-init DML with updated bb */
|
||||
dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
|
||||
if (dc->current_state)
|
||||
dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
|
||||
DC_FP_START();
|
||||
dcn30_fpu_update_bw_bounding_box(dc, bw_params, &dcn30_bb_max_clk, dcfclk_mhz, dram_speed_mts);
|
||||
DC_FP_END();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,9 @@ struct dc;
|
||||
struct resource_pool;
|
||||
struct _vcs_dpi_display_pipe_params_st;
|
||||
|
||||
extern struct _vcs_dpi_ip_params_st dcn3_0_ip;
|
||||
extern struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc;
|
||||
|
||||
struct dcn30_resource_pool {
|
||||
struct resource_pool base;
|
||||
};
|
||||
@ -96,4 +99,6 @@ enum dc_status dcn30_add_stream_to_ctx(
|
||||
|
||||
void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
|
||||
|
||||
void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);
|
||||
|
||||
#endif /* _DCN30_RESOURCE_H_ */
|
||||
|
@ -81,6 +81,8 @@
|
||||
#include "dce/dce_aux.h"
|
||||
#include "dce/dce_i2c.h"
|
||||
|
||||
#include "dml/dcn30/dcn30_fpu.h"
|
||||
|
||||
#include "dml/dcn30/display_mode_vba_30.h"
|
||||
#include "dml/dcn301/dcn301_fpu.h"
|
||||
#include "vm_helper.h"
|
||||
|
@ -43,6 +43,8 @@
|
||||
#include "dcn20/dcn20_dsc.h"
|
||||
#include "dcn20/dcn20_resource.h"
|
||||
|
||||
#include "dml/dcn30/dcn30_fpu.h"
|
||||
|
||||
#include "dcn10/dcn10_resource.h"
|
||||
|
||||
#include "dce/dce_abm.h"
|
||||
|
@ -25,6 +25,8 @@
|
||||
#include "dcn20/dcn20_dsc.h"
|
||||
#include "dcn20/dcn20_resource.h"
|
||||
|
||||
#include "dml/dcn30/dcn30_fpu.h"
|
||||
|
||||
#include "dcn10/dcn10_resource.h"
|
||||
|
||||
#include "dc_link_ddc.h"
|
||||
|
@ -36,6 +36,8 @@
|
||||
#include "dcn20/dcn20_resource.h"
|
||||
#include "dcn30/dcn30_resource.h"
|
||||
|
||||
#include "dml/dcn30/dcn30_fpu.h"
|
||||
|
||||
#include "dcn10/dcn10_ipp.h"
|
||||
#include "dcn30/dcn30_hubbub.h"
|
||||
#include "dcn31/dcn31_hubbub.h"
|
||||
|
@ -71,6 +71,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram
|
||||
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
|
||||
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
|
||||
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
|
||||
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags)
|
||||
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags)
|
||||
CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
|
||||
CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
|
||||
@ -113,7 +114,7 @@ DML += dcn20/dcn20_fpu.o
|
||||
DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o
|
||||
DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o
|
||||
DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o
|
||||
DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
|
||||
DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
|
||||
DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o
|
||||
DML += dcn31/dcn31_fpu.o
|
||||
DML += dcn301/dcn301_fpu.o
|
||||
|
617
drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
Normal file
617
drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
Normal file
@ -0,0 +1,617 @@
|
||||
/*
|
||||
* Copyright 2020-2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors: AMD
|
||||
*
|
||||
*/
|
||||
#include "resource.h"
|
||||
#include "clk_mgr.h"
|
||||
#include "reg_helper.h"
|
||||
#include "dcn_calc_math.h"
|
||||
#include "dcn20/dcn20_resource.h"
|
||||
#include "dcn30/dcn30_resource.h"
|
||||
|
||||
|
||||
#include "display_mode_vba_30.h"
|
||||
#include "dcn30_fpu.h"
|
||||
|
||||
#define REG(reg)\
|
||||
optc1->tg_regs->reg
|
||||
|
||||
#define CTX \
|
||||
optc1->base.ctx
|
||||
|
||||
#undef FN
|
||||
#define FN(reg_name, field_name) \
|
||||
optc1->tg_shift->field_name, optc1->tg_mask->field_name
|
||||
|
||||
|
||||
struct _vcs_dpi_ip_params_st dcn3_0_ip = {
|
||||
.use_min_dcfclk = 0,
|
||||
.clamp_min_dcfclk = 0,
|
||||
.odm_capable = 1,
|
||||
.gpuvm_enable = 0,
|
||||
.hostvm_enable = 0,
|
||||
.gpuvm_max_page_table_levels = 4,
|
||||
.hostvm_max_page_table_levels = 4,
|
||||
.hostvm_cached_page_table_levels = 0,
|
||||
.pte_group_size_bytes = 2048,
|
||||
.num_dsc = 6,
|
||||
.rob_buffer_size_kbytes = 184,
|
||||
.det_buffer_size_kbytes = 184,
|
||||
.dpte_buffer_size_in_pte_reqs_luma = 84,
|
||||
.pde_proc_buffer_size_64k_reqs = 48,
|
||||
.dpp_output_buffer_pixels = 2560,
|
||||
.opp_output_buffer_lines = 1,
|
||||
.pixel_chunk_size_kbytes = 8,
|
||||
.pte_enable = 1,
|
||||
.max_page_table_levels = 2,
|
||||
.pte_chunk_size_kbytes = 2, // ?
|
||||
.meta_chunk_size_kbytes = 2,
|
||||
.writeback_chunk_size_kbytes = 8,
|
||||
.line_buffer_size_bits = 789504,
|
||||
.is_line_buffer_bpp_fixed = 0, // ?
|
||||
.line_buffer_fixed_bpp = 0, // ?
|
||||
.dcc_supported = true,
|
||||
.writeback_interface_buffer_size_kbytes = 90,
|
||||
.writeback_line_buffer_buffer_size = 0,
|
||||
.max_line_buffer_lines = 12,
|
||||
.writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
|
||||
.writeback_chroma_buffer_size_kbytes = 8,
|
||||
.writeback_chroma_line_buffer_width_pixels = 4,
|
||||
.writeback_max_hscl_ratio = 1,
|
||||
.writeback_max_vscl_ratio = 1,
|
||||
.writeback_min_hscl_ratio = 1,
|
||||
.writeback_min_vscl_ratio = 1,
|
||||
.writeback_max_hscl_taps = 1,
|
||||
.writeback_max_vscl_taps = 1,
|
||||
.writeback_line_buffer_luma_buffer_size = 0,
|
||||
.writeback_line_buffer_chroma_buffer_size = 14643,
|
||||
.cursor_buffer_size = 8,
|
||||
.cursor_chunk_size = 2,
|
||||
.max_num_otg = 6,
|
||||
.max_num_dpp = 6,
|
||||
.max_num_wb = 1,
|
||||
.max_dchub_pscl_bw_pix_per_clk = 4,
|
||||
.max_pscl_lb_bw_pix_per_clk = 2,
|
||||
.max_lb_vscl_bw_pix_per_clk = 4,
|
||||
.max_vscl_hscl_bw_pix_per_clk = 4,
|
||||
.max_hscl_ratio = 6,
|
||||
.max_vscl_ratio = 6,
|
||||
.hscl_mults = 4,
|
||||
.vscl_mults = 4,
|
||||
.max_hscl_taps = 8,
|
||||
.max_vscl_taps = 8,
|
||||
.dispclk_ramp_margin_percent = 1,
|
||||
.underscan_factor = 1.11,
|
||||
.min_vblank_lines = 32,
|
||||
.dppclk_delay_subtotal = 46,
|
||||
.dynamic_metadata_vm_enabled = true,
|
||||
.dppclk_delay_scl_lb_only = 16,
|
||||
.dppclk_delay_scl = 50,
|
||||
.dppclk_delay_cnvc_formatter = 27,
|
||||
.dppclk_delay_cnvc_cursor = 6,
|
||||
.dispclk_delay_subtotal = 119,
|
||||
.dcfclk_cstate_latency = 5.2, // SRExitTime
|
||||
.max_inter_dcn_tile_repeaters = 8,
|
||||
.max_num_hdmi_frl_outputs = 1,
|
||||
.odm_combine_4to1_supported = true,
|
||||
|
||||
.xfc_supported = false,
|
||||
.xfc_fill_bw_overhead_percent = 10.0,
|
||||
.xfc_fill_constant_bytes = 0,
|
||||
.gfx7_compat_tiling_supported = 0,
|
||||
.number_of_cursors = 1,
|
||||
};
|
||||
|
||||
struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = {
|
||||
.clock_limits = {
|
||||
{
|
||||
.state = 0,
|
||||
.dispclk_mhz = 562.0,
|
||||
.dppclk_mhz = 300.0,
|
||||
.phyclk_mhz = 300.0,
|
||||
.phyclk_d18_mhz = 667.0,
|
||||
.dscclk_mhz = 405.6,
|
||||
},
|
||||
},
|
||||
|
||||
.min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
|
||||
.num_states = 1,
|
||||
.sr_exit_time_us = 15.5,
|
||||
.sr_enter_plus_exit_time_us = 20,
|
||||
.urgent_latency_us = 4.0,
|
||||
.urgent_latency_pixel_data_only_us = 4.0,
|
||||
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
|
||||
.urgent_latency_vm_data_only_us = 4.0,
|
||||
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
|
||||
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
|
||||
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
|
||||
.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
|
||||
.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
|
||||
.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
|
||||
.max_avg_sdp_bw_use_normal_percent = 60.0,
|
||||
.max_avg_dram_bw_use_normal_percent = 40.0,
|
||||
.writeback_latency_us = 12.0,
|
||||
.max_request_size_bytes = 256,
|
||||
.fabric_datapath_to_dcn_data_return_bytes = 64,
|
||||
.dcn_downspread_percent = 0.5,
|
||||
.downspread_percent = 0.38,
|
||||
.dram_page_open_time_ns = 50.0,
|
||||
.dram_rw_turnaround_time_ns = 17.5,
|
||||
.dram_return_buffer_per_channel_bytes = 8192,
|
||||
.round_trip_ping_latency_dcfclk_cycles = 191,
|
||||
.urgent_out_of_order_return_per_channel_bytes = 4096,
|
||||
.channel_interleave_bytes = 256,
|
||||
.num_banks = 8,
|
||||
.gpuvm_min_page_size_bytes = 4096,
|
||||
.hostvm_min_page_size_bytes = 4096,
|
||||
.dram_clock_change_latency_us = 404,
|
||||
.dummy_pstate_latency_us = 5,
|
||||
.writeback_dram_clock_change_latency_us = 23.0,
|
||||
.return_bus_width_bytes = 64,
|
||||
.dispclk_dppclk_vco_speed_mhz = 3650,
|
||||
.xfc_bus_transport_time_us = 20, // ?
|
||||
.xfc_xbuf_latency_tolerance_us = 4, // ?
|
||||
.use_urgent_burst_bw = 1, // ?
|
||||
.do_urgent_latency_adjustment = true,
|
||||
.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
|
||||
.urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
|
||||
};
|
||||
|
||||
|
||||
void optc3_fpu_set_vrr_m_const(struct timing_generator *optc,
|
||||
double vtotal_avg)
|
||||
{
|
||||
struct optc *optc1 = DCN10TG_FROM_TG(optc);
|
||||
double vtotal_min, vtotal_max;
|
||||
double ratio, modulo, phase;
|
||||
uint32_t vblank_start;
|
||||
uint32_t v_total_mask_value = 0;
|
||||
|
||||
dc_assert_fp_enabled();
|
||||
|
||||
/* Compute VTOTAL_MIN and VTOTAL_MAX, so that
|
||||
* VOTAL_MAX - VTOTAL_MIN = 1
|
||||
*/
|
||||
v_total_mask_value = 16;
|
||||
vtotal_min = dcn_bw_floor(vtotal_avg);
|
||||
vtotal_max = dcn_bw_ceil(vtotal_avg);
|
||||
|
||||
/* Check that bottom VBLANK is at least 2 lines tall when running with
|
||||
* VTOTAL_MIN. Note that VTOTAL registers are defined as 'total number
|
||||
* of lines in a frame - 1'.
|
||||
*/
|
||||
REG_GET(OTG_V_BLANK_START_END, OTG_V_BLANK_START,
|
||||
&vblank_start);
|
||||
ASSERT(vtotal_min >= vblank_start + 1);
|
||||
|
||||
/* Special case where the average frame rate can be achieved
|
||||
* without using the DTO
|
||||
*/
|
||||
if (vtotal_min == vtotal_max) {
|
||||
REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min);
|
||||
|
||||
optc->funcs->set_vtotal_min_max(optc, 0, 0);
|
||||
REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0);
|
||||
REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0);
|
||||
REG_UPDATE_3(OTG_V_TOTAL_CONTROL,
|
||||
OTG_V_TOTAL_MIN_SEL, 0,
|
||||
OTG_V_TOTAL_MAX_SEL, 0,
|
||||
OTG_SET_V_TOTAL_MIN_MASK_EN, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
ratio = vtotal_max - vtotal_avg;
|
||||
modulo = 65536.0 * 65536.0 - 1.0; /* 2^32 - 1 */
|
||||
phase = ratio * modulo;
|
||||
|
||||
/* Special cases where the DTO phase gets rounded to 0 or
|
||||
* to DTO modulo
|
||||
*/
|
||||
if (phase <= 0 || phase >= modulo) {
|
||||
REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL,
|
||||
phase <= 0 ?
|
||||
(uint32_t)vtotal_max : (uint32_t)vtotal_min);
|
||||
REG_SET(OTG_V_TOTAL_MIN, 0, OTG_V_TOTAL_MIN, 0);
|
||||
REG_SET(OTG_V_TOTAL_MAX, 0, OTG_V_TOTAL_MAX, 0);
|
||||
REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0);
|
||||
REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0);
|
||||
REG_UPDATE_3(OTG_V_TOTAL_CONTROL,
|
||||
OTG_V_TOTAL_MIN_SEL, 0,
|
||||
OTG_V_TOTAL_MAX_SEL, 0,
|
||||
OTG_SET_V_TOTAL_MIN_MASK_EN, 0);
|
||||
return;
|
||||
}
|
||||
REG_UPDATE_6(OTG_V_TOTAL_CONTROL,
|
||||
OTG_V_TOTAL_MIN_SEL, 1,
|
||||
OTG_V_TOTAL_MAX_SEL, 1,
|
||||
OTG_SET_V_TOTAL_MIN_MASK_EN, 1,
|
||||
OTG_SET_V_TOTAL_MIN_MASK, v_total_mask_value,
|
||||
OTG_VTOTAL_MID_REPLACING_MIN_EN, 0,
|
||||
OTG_VTOTAL_MID_REPLACING_MAX_EN, 0);
|
||||
REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min);
|
||||
optc->funcs->set_vtotal_min_max(optc, vtotal_min, vtotal_max);
|
||||
REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, (uint32_t)phase);
|
||||
REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, (uint32_t)modulo);
|
||||
}
|
||||
|
||||
void dcn30_fpu_populate_dml_writeback_from_context(
|
||||
struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
|
||||
{
|
||||
int pipe_cnt, i, j;
|
||||
double max_calc_writeback_dispclk;
|
||||
double writeback_dispclk;
|
||||
struct writeback_st dout_wb;
|
||||
|
||||
dc_assert_fp_enabled();
|
||||
|
||||
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
|
||||
struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
|
||||
|
||||
if (!stream)
|
||||
continue;
|
||||
max_calc_writeback_dispclk = 0;
|
||||
|
||||
/* Set writeback information */
|
||||
pipes[pipe_cnt].dout.wb_enable = 0;
|
||||
pipes[pipe_cnt].dout.num_active_wb = 0;
|
||||
for (j = 0; j < stream->num_wb_info; j++) {
|
||||
struct dc_writeback_info *wb_info = &stream->writeback_info[j];
|
||||
|
||||
if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
|
||||
(wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
|
||||
pipes[pipe_cnt].dout.wb_enable = 1;
|
||||
pipes[pipe_cnt].dout.num_active_wb++;
|
||||
dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
|
||||
wb_info->dwb_params.cnv_params.crop_height :
|
||||
wb_info->dwb_params.cnv_params.src_height;
|
||||
dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
|
||||
wb_info->dwb_params.cnv_params.crop_width :
|
||||
wb_info->dwb_params.cnv_params.src_width;
|
||||
dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
|
||||
dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
|
||||
|
||||
/* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */
|
||||
if (dc->dml.ip.writeback_max_hscl_taps > 1) {
|
||||
dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
|
||||
dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
|
||||
} else {
|
||||
dout_wb.wb_htaps_luma = 1;
|
||||
dout_wb.wb_vtaps_luma = 1;
|
||||
}
|
||||
dout_wb.wb_htaps_chroma = 0;
|
||||
dout_wb.wb_vtaps_chroma = 0;
|
||||
dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
|
||||
(double)wb_info->dwb_params.cnv_params.crop_width /
|
||||
(double)wb_info->dwb_params.dest_width :
|
||||
(double)wb_info->dwb_params.cnv_params.src_width /
|
||||
(double)wb_info->dwb_params.dest_width;
|
||||
dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
|
||||
(double)wb_info->dwb_params.cnv_params.crop_height /
|
||||
(double)wb_info->dwb_params.dest_height :
|
||||
(double)wb_info->dwb_params.cnv_params.src_height /
|
||||
(double)wb_info->dwb_params.dest_height;
|
||||
if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB ||
|
||||
wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA)
|
||||
dout_wb.wb_pixel_format = dm_444_64;
|
||||
else
|
||||
dout_wb.wb_pixel_format = dm_444_32;
|
||||
|
||||
/* Workaround for cases where multiple writebacks are connected to same plane
|
||||
* In which case, need to compute worst case and set the associated writeback parameters
|
||||
* This workaround is necessary due to DML computation assuming only 1 set of writeback
|
||||
* parameters per pipe
|
||||
*/
|
||||
writeback_dispclk = dml30_CalculateWriteBackDISPCLK(
|
||||
dout_wb.wb_pixel_format,
|
||||
pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
|
||||
dout_wb.wb_hratio,
|
||||
dout_wb.wb_vratio,
|
||||
dout_wb.wb_htaps_luma,
|
||||
dout_wb.wb_vtaps_luma,
|
||||
dout_wb.wb_src_width,
|
||||
dout_wb.wb_dst_width,
|
||||
pipes[pipe_cnt].pipe.dest.htotal,
|
||||
dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size);
|
||||
|
||||
if (writeback_dispclk > max_calc_writeback_dispclk) {
|
||||
max_calc_writeback_dispclk = writeback_dispclk;
|
||||
pipes[pipe_cnt].dout.wb = dout_wb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pipe_cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params,
|
||||
struct display_mode_lib *dml,
|
||||
display_e2e_pipe_params_st *pipes,
|
||||
int pipe_cnt,
|
||||
int cur_pipe)
|
||||
{
|
||||
int i;
|
||||
|
||||
dc_assert_fp_enabled();
|
||||
|
||||
for (i = 0; i < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); i++) {
|
||||
wb_arb_params->cli_watermark[i] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000;
|
||||
wb_arb_params->pstate_watermark[i] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
|
||||
}
|
||||
|
||||
wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[cur_pipe] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */
|
||||
}
|
||||
|
||||
void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
|
||||
{
|
||||
|
||||
dc_assert_fp_enabled();
|
||||
|
||||
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
|
||||
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
|
||||
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
|
||||
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
|
||||
}
|
||||
}
|
||||
|
||||
void dcn30_fpu_calculate_wm_and_dlg(
|
||||
struct dc *dc, struct dc_state *context,
|
||||
display_e2e_pipe_params_st *pipes,
|
||||
int pipe_cnt,
|
||||
int vlevel)
|
||||
{
|
||||
int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
|
||||
int i, pipe_idx;
|
||||
double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
|
||||
bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
|
||||
|
||||
dc_assert_fp_enabled();
|
||||
|
||||
if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
|
||||
dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
|
||||
|
||||
pipes[0].clks_cfg.voltage = vlevel;
|
||||
pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
|
||||
pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
|
||||
|
||||
/* Set B:
|
||||
* DCFCLK: 1GHz or min required above 1GHz
|
||||
* FCLK/UCLK: Max
|
||||
*/
|
||||
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
|
||||
if (vlevel == 0) {
|
||||
pipes[0].clks_cfg.voltage = 1;
|
||||
pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
|
||||
}
|
||||
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
|
||||
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
|
||||
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
|
||||
}
|
||||
context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
|
||||
pipes[0].clks_cfg.voltage = vlevel;
|
||||
pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
|
||||
|
||||
/* Set D:
|
||||
* DCFCLK: Min Required
|
||||
* FCLK(proportional to UCLK): 1GHz or Max
|
||||
* MALL stutter, sr_enter_exit = 4, sr_exit = 2us
|
||||
*/
|
||||
/*
|
||||
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
|
||||
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
|
||||
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
|
||||
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
|
||||
}
|
||||
context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
*/
|
||||
|
||||
/* Set C:
|
||||
* DCFCLK: Min Required
|
||||
* FCLK(proportional to UCLK): 1GHz or Max
|
||||
* pstate latency overridden to 5us
|
||||
*/
|
||||
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
|
||||
unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
|
||||
unsigned int min_dram_speed_mts_margin = 160;
|
||||
|
||||
if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported)
|
||||
min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;
|
||||
|
||||
/* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */
|
||||
for (i = 3; i > 0; i--)
|
||||
if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)
|
||||
break;
|
||||
|
||||
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
|
||||
|
||||
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
|
||||
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
|
||||
}
|
||||
|
||||
context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
|
||||
if (!pstate_en) {
|
||||
/* The only difference between A and C is p-state latency, if p-state is not supported we want to
|
||||
* calculate DLG based on dummy p-state latency, and max out the set A p-state watermark
|
||||
*/
|
||||
context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
|
||||
} else {
|
||||
/* Set A:
|
||||
* DCFCLK: Min Required
|
||||
* FCLK(proportional to UCLK): 1GHz or Max
|
||||
*
|
||||
* Set A calculated last so that following calculations are based on Set A
|
||||
*/
|
||||
dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
|
||||
context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
|
||||
}
|
||||
|
||||
context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
|
||||
|
||||
/* Make set D = set A until set D is enabled */
|
||||
context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
|
||||
|
||||
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
|
||||
if (!context->res_ctx.pipe_ctx[i].stream)
|
||||
continue;
|
||||
|
||||
pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
|
||||
pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
|
||||
|
||||
if (dc->config.forced_clocks) {
|
||||
pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
|
||||
pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
|
||||
}
|
||||
if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
|
||||
pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
|
||||
if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
|
||||
pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
|
||||
|
||||
pipe_idx++;
|
||||
}
|
||||
|
||||
DC_FP_START();
|
||||
dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
|
||||
DC_FP_END();
|
||||
|
||||
if (!pstate_en)
|
||||
/* Restore full p-state latency */
|
||||
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
|
||||
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
|
||||
|
||||
}
|
||||
|
||||
void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc)
|
||||
{
|
||||
dc_assert_fp_enabled();
|
||||
|
||||
if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
|
||||
dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
|
||||
}
|
||||
|
||||
void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk)
|
||||
{
|
||||
dc_assert_fp_enabled();
|
||||
|
||||
if (!dcn30_bb_max_clk->max_dcfclk_mhz)
|
||||
dcn30_bb_max_clk->max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz;
|
||||
if (!dcn30_bb_max_clk->max_dispclk_mhz)
|
||||
dcn30_bb_max_clk->max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz;
|
||||
if (!dcn30_bb_max_clk->max_dppclk_mhz)
|
||||
dcn30_bb_max_clk->max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz;
|
||||
if (!dcn30_bb_max_clk->max_phyclk_mhz)
|
||||
dcn30_bb_max_clk->max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz;
|
||||
}
|
||||
|
||||
void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
|
||||
unsigned int *optimal_dcfclk,
|
||||
unsigned int *optimal_fclk)
|
||||
{
|
||||
double bw_from_dram, bw_from_dram1, bw_from_dram2;
|
||||
|
||||
dc_assert_fp_enabled();
|
||||
|
||||
bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans *
|
||||
dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100);
|
||||
bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans *
|
||||
dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100);
|
||||
|
||||
bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
|
||||
|
||||
if (optimal_fclk)
|
||||
*optimal_fclk = bw_from_dram /
|
||||
(dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
|
||||
|
||||
if (optimal_dcfclk)
|
||||
*optimal_dcfclk = bw_from_dram /
|
||||
(dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
|
||||
}
|
||||
|
||||
void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
|
||||
struct clk_bw_params *bw_params,
|
||||
struct dc_bounding_box_max_clk *dcn30_bb_max_clk,
|
||||
unsigned int *dcfclk_mhz,
|
||||
unsigned int *dram_speed_mts)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
dc_assert_fp_enabled();
|
||||
|
||||
dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
|
||||
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
|
||||
|
||||
for (i = 0; i < dcn3_0_soc.num_states; i++) {
|
||||
dcn3_0_soc.clock_limits[i].state = i;
|
||||
dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
|
||||
dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
|
||||
dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
|
||||
|
||||
/* Fill all states with max values of all other clocks */
|
||||
dcn3_0_soc.clock_limits[i].dispclk_mhz = dcn30_bb_max_clk->max_dispclk_mhz;
|
||||
dcn3_0_soc.clock_limits[i].dppclk_mhz = dcn30_bb_max_clk->max_dppclk_mhz;
|
||||
dcn3_0_soc.clock_limits[i].phyclk_mhz = dcn30_bb_max_clk->max_phyclk_mhz;
|
||||
dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz;
|
||||
/* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */
|
||||
/* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
|
||||
dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz;
|
||||
dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz;
|
||||
dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz;
|
||||
}
|
||||
/* re-init DML with updated bb */
|
||||
dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
|
||||
if (dc->current_state)
|
||||
dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
|
||||
|
||||
}
|
||||
|
||||
|
67
drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
Normal file
67
drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright 2020-2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors: AMD
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __DCN30_FPU_H__
|
||||
#define __DCN30_FPU_H__
|
||||
|
||||
#include "core_types.h"
|
||||
#include "dcn20/dcn20_optc.h"
|
||||
|
||||
void optc3_fpu_set_vrr_m_const(struct timing_generator *optc,
|
||||
double vtotal_avg);
|
||||
|
||||
void dcn30_fpu_populate_dml_writeback_from_context(
|
||||
struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes);
|
||||
|
||||
void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params,
|
||||
struct display_mode_lib *dml,
|
||||
display_e2e_pipe_params_st *pipes,
|
||||
int pipe_cnt,
|
||||
int cur_pipe);
|
||||
|
||||
void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
|
||||
|
||||
void dcn30_fpu_calculate_wm_and_dlg(
|
||||
struct dc *dc, struct dc_state *context,
|
||||
display_e2e_pipe_params_st *pipes,
|
||||
int pipe_cnt,
|
||||
int vlevel);
|
||||
|
||||
void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc);
|
||||
|
||||
void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk);
|
||||
|
||||
void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
|
||||
unsigned int *optimal_dcfclk,
|
||||
unsigned int *optimal_fclk);
|
||||
|
||||
void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
|
||||
struct clk_bw_params *bw_params,
|
||||
struct dc_bounding_box_max_clk *dcn30_bb_max_clk,
|
||||
unsigned int *dcfclk_mhz,
|
||||
unsigned int *dram_speed_mts);
|
||||
|
||||
|
||||
#endif /* __DCN30_FPU_H__*/
|
@ -486,4 +486,11 @@ struct dc_state {
|
||||
} perf_params;
|
||||
};
|
||||
|
||||
struct dc_bounding_box_max_clk {
|
||||
int max_dcfclk_mhz;
|
||||
int max_dispclk_mhz;
|
||||
int max_dppclk_mhz;
|
||||
int max_phyclk_mhz;
|
||||
};
|
||||
|
||||
#endif /* _CORE_TYPES_H_ */
|
||||
|
@ -217,8 +217,7 @@ void disable_dp_hpo_output(struct dc_link *link,
|
||||
void setup_dp_hpo_stream(struct pipe_ctx *pipe_ctx, bool enable);
|
||||
bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx);
|
||||
|
||||
void dp_retrieve_lttpr_cap(struct dc_link *link);
|
||||
bool dp_apply_lttpr_mode(struct dc_link *link);
|
||||
bool dp_retrieve_lttpr_cap(struct dc_link *link);
|
||||
void edp_panel_backlight_power_on(struct dc_link *link);
|
||||
void dp_receiver_power_ctrl(struct dc_link *link, bool on);
|
||||
void dp_source_sequence_trace(struct dc_link *link, uint8_t dp_test_mode);
|
||||
|
@ -87,6 +87,11 @@ union dpia_set_config_data {
|
||||
*/
|
||||
enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link);
|
||||
|
||||
/* Query hot plug status of USB4 DP tunnel.
|
||||
* Returns true if HPD high.
|
||||
*/
|
||||
bool dc_link_dpia_query_hpd_status(struct dc_link *link);
|
||||
|
||||
/* Train DP tunneling link for USB4 DPIA display endpoint.
|
||||
* DPIA equivalent of dc_link_dp_perfrorm_link_training.
|
||||
* Aborts link training upon detection of sink unplug.
|
||||
|
@ -80,12 +80,6 @@ enum link_training_result {
|
||||
DP_128b_132b_CDS_DONE_TIMEOUT,
|
||||
};
|
||||
|
||||
enum lttpr_support {
|
||||
LTTPR_UNSUPPORTED,
|
||||
LTTPR_CHECK_EXT_SUPPORT,
|
||||
LTTPR_SUPPORTED,
|
||||
};
|
||||
|
||||
enum lttpr_mode {
|
||||
LTTPR_MODE_NON_LTTPR,
|
||||
LTTPR_MODE_TRANSPARENT,
|
||||
|
@ -6070,6 +6070,8 @@
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
|
||||
#define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1
|
||||
|
@ -6058,6 +6058,8 @@
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
|
||||
#define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1
|
||||
|
@ -7142,6 +7142,8 @@
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
|
||||
#define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1
|
||||
|
@ -37285,12 +37285,14 @@
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L
|
||||
//DIG0_HDMI_INFOFRAME_CONTROL0
|
||||
#define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND__SHIFT 0x0
|
||||
|
@ -5584,6 +5584,8 @@
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
|
||||
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
|
||||
#define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1
|
||||
|
@ -30357,12 +30357,14 @@
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L
|
||||
//DIG0_HDMI_INFOFRAME_CONTROL0
|
||||
#define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4
|
||||
|
@ -39439,12 +39439,14 @@
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L
|
||||
//DIG0_HDMI_INFOFRAME_CONTROL0
|
||||
#define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4
|
||||
|
@ -16956,7 +16956,7 @@
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
|
||||
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
|
||||
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L
|
||||
@ -16964,7 +16964,7 @@
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L
|
||||
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L
|
||||
|
||||
//DIG0_HDMI_INFOFRAME_CONTROL0
|
||||
|
@ -35487,12 +35487,14 @@
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L
|
||||
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L
|
||||
//DIG0_HDMI_INFOFRAME_CONTROL0
|
||||
#define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4
|
||||
|
@ -770,6 +770,9 @@ enum amd_dpm_forced_level amdgpu_dpm_get_performance_level(struct amdgpu_device
|
||||
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
|
||||
enum amd_dpm_forced_level level;
|
||||
|
||||
if (!pp_funcs)
|
||||
return AMD_DPM_FORCED_LEVEL_AUTO;
|
||||
|
||||
mutex_lock(&adev->pm.mutex);
|
||||
if (pp_funcs->get_performance_level)
|
||||
level = pp_funcs->get_performance_level(adev->powerplay.pp_handle);
|
||||
|
@ -1436,6 +1436,7 @@ static int smu_disable_dpms(struct smu_context *smu)
|
||||
case IP_VERSION(11, 0, 0):
|
||||
case IP_VERSION(11, 0, 5):
|
||||
case IP_VERSION(11, 0, 9):
|
||||
case IP_VERSION(13, 0, 0):
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
|
@ -671,8 +671,8 @@ typedef struct {
|
||||
uint16_t reserved[2];
|
||||
|
||||
//Frequency changes
|
||||
uint16_t GfxclkFmin; // MHz
|
||||
uint16_t GfxclkFmax; // MHz
|
||||
int16_t GfxclkFmin; // MHz
|
||||
int16_t GfxclkFmax; // MHz
|
||||
uint16_t UclkFmin; // MHz
|
||||
uint16_t UclkFmax; // MHz
|
||||
|
||||
@ -683,15 +683,14 @@ typedef struct {
|
||||
//Fan control
|
||||
uint8_t FanLinearPwmPoints[NUM_OD_FAN_MAX_POINTS];
|
||||
uint8_t FanLinearTempPoints[NUM_OD_FAN_MAX_POINTS];
|
||||
uint16_t FanMaximumRpm;
|
||||
uint16_t FanMinimumPwm;
|
||||
uint16_t FanAcousticLimitRpm;
|
||||
uint16_t AcousticTargetRpmThreshold;
|
||||
uint16_t AcousticLimitRpmThreshold;
|
||||
uint16_t FanTargetTemperature; // Degree Celcius
|
||||
uint8_t FanZeroRpmEnable;
|
||||
uint8_t FanZeroRpmStopTemp;
|
||||
uint8_t FanMode;
|
||||
uint8_t Padding[1];
|
||||
|
||||
uint8_t MaxOpTemp;
|
||||
|
||||
uint32_t Spare[13];
|
||||
uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround
|
||||
@ -719,15 +718,14 @@ typedef struct {
|
||||
|
||||
uint8_t FanLinearPwmPoints;
|
||||
uint8_t FanLinearTempPoints;
|
||||
uint16_t FanMaximumRpm;
|
||||
uint16_t FanMinimumPwm;
|
||||
uint16_t FanAcousticLimitRpm;
|
||||
uint16_t AcousticTargetRpmThreshold;
|
||||
uint16_t AcousticLimitRpmThreshold;
|
||||
uint16_t FanTargetTemperature; // Degree Celcius
|
||||
uint8_t FanZeroRpmEnable;
|
||||
uint8_t FanZeroRpmStopTemp;
|
||||
uint8_t FanMode;
|
||||
uint8_t Padding[1];
|
||||
|
||||
uint8_t MaxOpTemp;
|
||||
|
||||
uint32_t Spare[13];
|
||||
|
||||
@ -997,7 +995,8 @@ typedef struct {
|
||||
uint16_t SocketPowerLimitAcTau[PPT_THROTTLER_COUNT]; // Time constant of LPF in ms
|
||||
uint16_t SocketPowerLimitDcTau[PPT_THROTTLER_COUNT]; // Time constant of LPF in ms
|
||||
|
||||
uint32_t SpareVmin[12];
|
||||
QuadraticInt_t Vmin_droop;
|
||||
uint32_t SpareVmin[9];
|
||||
|
||||
|
||||
//SECTION: DPM Configuration 1
|
||||
@ -1286,7 +1285,6 @@ typedef struct {
|
||||
uint32_t PostVoltageSetBacoDelay; // in microseconds. Amount of time FW will wait after power good is established or PSI0 command is issued
|
||||
uint32_t BacoEntryDelay; // in milliseconds. Amount of time FW will wait to trigger BACO entry after receiving entry notification from OS
|
||||
|
||||
|
||||
// SECTION: Board Reserved
|
||||
uint32_t BoardSpare[64];
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
|
||||
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04
|
||||
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
|
||||
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x27
|
||||
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x28
|
||||
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x28
|
||||
|
||||
#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
|
||||
|
@ -697,12 +697,28 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu,
|
||||
uint32_t apu_percent = 0;
|
||||
uint32_t dgpu_percent = 0;
|
||||
|
||||
if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
|
||||
(smu->smc_fw_version >= 0x3A4900))
|
||||
use_metrics_v3 = true;
|
||||
else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
|
||||
(smu->smc_fw_version >= 0x3A4300))
|
||||
use_metrics_v2 = true;
|
||||
switch (smu->adev->ip_versions[MP1_HWIP][0]) {
|
||||
case IP_VERSION(11, 0, 7):
|
||||
if (smu->smc_fw_version >= 0x3A4900)
|
||||
use_metrics_v3 = true;
|
||||
else if (smu->smc_fw_version >= 0x3A4300)
|
||||
use_metrics_v2 = true;
|
||||
break;
|
||||
case IP_VERSION(11, 0, 11):
|
||||
if (smu->smc_fw_version >= 0x412D00)
|
||||
use_metrics_v2 = true;
|
||||
break;
|
||||
case IP_VERSION(11, 0, 12):
|
||||
if (smu->smc_fw_version >= 0x3B2300)
|
||||
use_metrics_v2 = true;
|
||||
break;
|
||||
case IP_VERSION(11, 0, 13):
|
||||
if (smu->smc_fw_version >= 0x491100)
|
||||
use_metrics_v2 = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
ret = smu_cmn_get_metrics_table(smu,
|
||||
NULL,
|
||||
@ -3833,13 +3849,28 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
|
||||
uint16_t average_gfx_activity;
|
||||
int ret = 0;
|
||||
|
||||
if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
|
||||
(smu->smc_fw_version >= 0x3A4900))
|
||||
use_metrics_v3 = true;
|
||||
else if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
|
||||
(smu->smc_fw_version >= 0x3A4300))
|
||||
use_metrics_v2 = true;
|
||||
|
||||
switch (smu->adev->ip_versions[MP1_HWIP][0]) {
|
||||
case IP_VERSION(11, 0, 7):
|
||||
if (smu->smc_fw_version >= 0x3A4900)
|
||||
use_metrics_v3 = true;
|
||||
else if (smu->smc_fw_version >= 0x3A4300)
|
||||
use_metrics_v2 = true;
|
||||
break;
|
||||
case IP_VERSION(11, 0, 11):
|
||||
if (smu->smc_fw_version >= 0x412D00)
|
||||
use_metrics_v2 = true;
|
||||
break;
|
||||
case IP_VERSION(11, 0, 12):
|
||||
if (smu->smc_fw_version >= 0x3B2300)
|
||||
use_metrics_v2 = true;
|
||||
break;
|
||||
case IP_VERSION(11, 0, 13):
|
||||
if (smu->smc_fw_version >= 0x491100)
|
||||
use_metrics_v2 = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
ret = smu_cmn_get_metrics_table(smu,
|
||||
&metrics_external,
|
||||
|
@ -1666,6 +1666,7 @@ static const struct throttling_logging_label {
|
||||
uint32_t feature_mask;
|
||||
const char *label;
|
||||
} logging_label[] = {
|
||||
{(1U << THROTTLER_TEMP_GPU_BIT), "GPU"},
|
||||
{(1U << THROTTLER_TEMP_MEM_BIT), "HBM"},
|
||||
{(1U << THROTTLER_TEMP_VR_GFX_BIT), "VR of GFX rail"},
|
||||
{(1U << THROTTLER_TEMP_VR_MEM_BIT), "VR of HBM rail"},
|
||||
|
@ -218,13 +218,25 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
|
||||
pptable_id == 3688)
|
||||
pptable_id = 36881;
|
||||
/*
|
||||
* Temporary solution for SMU V13.0.0:
|
||||
* - use 99991 signed pptable when SCPM enabled
|
||||
* TODO: drop this when the pptable carried in vbios
|
||||
* is ready.
|
||||
* Temporary solution for SMU V13.0.0 with SCPM enabled:
|
||||
* - use 36831 signed pptable when pp_table_id is 3683
|
||||
* - use 36641 signed pptable when pp_table_id is 3664 or 0
|
||||
* TODO: drop these when the pptable carried in vbios is ready.
|
||||
*/
|
||||
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0))
|
||||
pptable_id = 99991;
|
||||
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) {
|
||||
switch (pptable_id) {
|
||||
case 0:
|
||||
case 3664:
|
||||
pptable_id = 36641;
|
||||
break;
|
||||
case 3683:
|
||||
pptable_id = 36831;
|
||||
break;
|
||||
default:
|
||||
dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* "pptable_id == 0" means vbios carries the pptable. */
|
||||
@ -448,13 +460,24 @@ int smu_v13_0_setup_pptable(struct smu_context *smu)
|
||||
pptable_id = smu->smu_table.boot_values.pp_table_id;
|
||||
|
||||
/*
|
||||
* Temporary solution for SMU V13.0.0:
|
||||
* - use 9999 unsigned pptable when SCPM disabled
|
||||
* TODO: drop this when the pptable carried in vbios
|
||||
* is ready.
|
||||
* Temporary solution for SMU V13.0.0 with SCPM disabled:
|
||||
* - use 3664 or 3683 on request
|
||||
* - use 3664 when pptable_id is 0
|
||||
* TODO: drop these when the pptable carried in vbios is ready.
|
||||
*/
|
||||
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0))
|
||||
pptable_id = 9999;
|
||||
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) {
|
||||
switch (pptable_id) {
|
||||
case 0:
|
||||
pptable_id = 3664;
|
||||
break;
|
||||
case 3664:
|
||||
case 3683:
|
||||
break;
|
||||
default:
|
||||
dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* force using vbios pptable in sriov mode */
|
||||
|
@ -275,9 +275,7 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu,
|
||||
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VDDIO_MEM_SCALING_BIT);
|
||||
}
|
||||
|
||||
#if 0
|
||||
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MEM_TEMP_READ_BIT);
|
||||
#endif
|
||||
|
||||
if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
|
||||
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_GFXCLK_BIT);
|
||||
@ -296,6 +294,12 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu,
|
||||
|
||||
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT);
|
||||
|
||||
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT);
|
||||
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_DSTATE_BIT);
|
||||
|
||||
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT);
|
||||
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -644,42 +644,40 @@ static int smu_v13_0_4_set_watermarks_table(struct smu_context *smu,
|
||||
if (!table || !clock_ranges)
|
||||
return -EINVAL;
|
||||
|
||||
if (clock_ranges) {
|
||||
if (clock_ranges->num_reader_wm_sets > NUM_WM_RANGES ||
|
||||
clock_ranges->num_writer_wm_sets > NUM_WM_RANGES)
|
||||
return -EINVAL;
|
||||
if (clock_ranges->num_reader_wm_sets > NUM_WM_RANGES ||
|
||||
clock_ranges->num_writer_wm_sets > NUM_WM_RANGES)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < clock_ranges->num_reader_wm_sets; i++) {
|
||||
table->WatermarkRow[WM_DCFCLK][i].MinClock =
|
||||
clock_ranges->reader_wm_sets[i].min_drain_clk_mhz;
|
||||
table->WatermarkRow[WM_DCFCLK][i].MaxClock =
|
||||
clock_ranges->reader_wm_sets[i].max_drain_clk_mhz;
|
||||
table->WatermarkRow[WM_DCFCLK][i].MinMclk =
|
||||
clock_ranges->reader_wm_sets[i].min_fill_clk_mhz;
|
||||
table->WatermarkRow[WM_DCFCLK][i].MaxMclk =
|
||||
clock_ranges->reader_wm_sets[i].max_fill_clk_mhz;
|
||||
for (i = 0; i < clock_ranges->num_reader_wm_sets; i++) {
|
||||
table->WatermarkRow[WM_DCFCLK][i].MinClock =
|
||||
clock_ranges->reader_wm_sets[i].min_drain_clk_mhz;
|
||||
table->WatermarkRow[WM_DCFCLK][i].MaxClock =
|
||||
clock_ranges->reader_wm_sets[i].max_drain_clk_mhz;
|
||||
table->WatermarkRow[WM_DCFCLK][i].MinMclk =
|
||||
clock_ranges->reader_wm_sets[i].min_fill_clk_mhz;
|
||||
table->WatermarkRow[WM_DCFCLK][i].MaxMclk =
|
||||
clock_ranges->reader_wm_sets[i].max_fill_clk_mhz;
|
||||
|
||||
table->WatermarkRow[WM_DCFCLK][i].WmSetting =
|
||||
clock_ranges->reader_wm_sets[i].wm_inst;
|
||||
}
|
||||
|
||||
for (i = 0; i < clock_ranges->num_writer_wm_sets; i++) {
|
||||
table->WatermarkRow[WM_SOCCLK][i].MinClock =
|
||||
clock_ranges->writer_wm_sets[i].min_fill_clk_mhz;
|
||||
table->WatermarkRow[WM_SOCCLK][i].MaxClock =
|
||||
clock_ranges->writer_wm_sets[i].max_fill_clk_mhz;
|
||||
table->WatermarkRow[WM_SOCCLK][i].MinMclk =
|
||||
clock_ranges->writer_wm_sets[i].min_drain_clk_mhz;
|
||||
table->WatermarkRow[WM_SOCCLK][i].MaxMclk =
|
||||
clock_ranges->writer_wm_sets[i].max_drain_clk_mhz;
|
||||
|
||||
table->WatermarkRow[WM_SOCCLK][i].WmSetting =
|
||||
clock_ranges->writer_wm_sets[i].wm_inst;
|
||||
}
|
||||
|
||||
smu->watermarks_bitmap |= WATERMARKS_EXIST;
|
||||
table->WatermarkRow[WM_DCFCLK][i].WmSetting =
|
||||
clock_ranges->reader_wm_sets[i].wm_inst;
|
||||
}
|
||||
|
||||
for (i = 0; i < clock_ranges->num_writer_wm_sets; i++) {
|
||||
table->WatermarkRow[WM_SOCCLK][i].MinClock =
|
||||
clock_ranges->writer_wm_sets[i].min_fill_clk_mhz;
|
||||
table->WatermarkRow[WM_SOCCLK][i].MaxClock =
|
||||
clock_ranges->writer_wm_sets[i].max_fill_clk_mhz;
|
||||
table->WatermarkRow[WM_SOCCLK][i].MinMclk =
|
||||
clock_ranges->writer_wm_sets[i].min_drain_clk_mhz;
|
||||
table->WatermarkRow[WM_SOCCLK][i].MaxMclk =
|
||||
clock_ranges->writer_wm_sets[i].max_drain_clk_mhz;
|
||||
|
||||
table->WatermarkRow[WM_SOCCLK][i].WmSetting =
|
||||
clock_ranges->writer_wm_sets[i].wm_inst;
|
||||
}
|
||||
|
||||
smu->watermarks_bitmap |= WATERMARKS_EXIST;
|
||||
|
||||
/* pass data to smu controller */
|
||||
if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
|
||||
!(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
|
||||
|
@ -190,6 +190,9 @@ static int yellow_carp_fini_smc_tables(struct smu_context *smu)
|
||||
kfree(smu_table->watermarks_table);
|
||||
smu_table->watermarks_table = NULL;
|
||||
|
||||
kfree(smu_table->gpu_metrics_table);
|
||||
smu_table->gpu_metrics_table = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -541,7 +541,6 @@ static int dpu_encoder_virt_atomic_check(
|
||||
struct dpu_encoder_virt *dpu_enc;
|
||||
struct msm_drm_private *priv;
|
||||
struct dpu_kms *dpu_kms;
|
||||
const struct drm_display_mode *mode;
|
||||
struct drm_display_mode *adj_mode;
|
||||
struct msm_display_topology topology;
|
||||
struct dpu_global_state *global_state;
|
||||
@ -559,7 +558,6 @@ static int dpu_encoder_virt_atomic_check(
|
||||
|
||||
priv = drm_enc->dev->dev_private;
|
||||
dpu_kms = to_dpu_kms(priv->kms);
|
||||
mode = &crtc_state->mode;
|
||||
adj_mode = &crtc_state->adjusted_mode;
|
||||
global_state = dpu_kms_get_global_state(crtc_state->state);
|
||||
if (IS_ERR(global_state))
|
||||
@ -1814,7 +1812,6 @@ static void dpu_encoder_prep_dsc(struct dpu_encoder_virt *dpu_enc,
|
||||
}
|
||||
}
|
||||
|
||||
dsc_common_mode = 0;
|
||||
pic_width = dsc->drm->pic_width;
|
||||
|
||||
dsc_common_mode = DSC_MODE_MULTIPLEX | DSC_MODE_SPLIT_PANEL;
|
||||
|
@ -574,11 +574,11 @@ static void dpu_encoder_phys_wb_disable(struct dpu_encoder_phys *phys_enc)
|
||||
*/
|
||||
static void dpu_encoder_phys_wb_destroy(struct dpu_encoder_phys *phys_enc)
|
||||
{
|
||||
DPU_DEBUG("[wb:%d]\n", phys_enc->wb_idx - WB_0);
|
||||
|
||||
if (!phys_enc)
|
||||
return;
|
||||
|
||||
DPU_DEBUG("[wb:%d]\n", phys_enc->wb_idx - WB_0);
|
||||
|
||||
kfree(phys_enc);
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,6 @@
|
||||
#define DPU_DEBUGFS_DIR "msm_dpu"
|
||||
#define DPU_DEBUGFS_HWMASKNAME "hw_log_mask"
|
||||
|
||||
#define MIN_IB_BW 400000000ULL /* Min ib vote 400MB */
|
||||
|
||||
static int dpu_kms_hw_init(struct msm_kms *kms);
|
||||
static void _dpu_kms_mmu_destroy(struct dpu_kms *dpu_kms);
|
||||
|
||||
@ -1305,15 +1303,9 @@ static int __maybe_unused dpu_runtime_resume(struct device *dev)
|
||||
struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms);
|
||||
struct drm_encoder *encoder;
|
||||
struct drm_device *ddev;
|
||||
int i;
|
||||
|
||||
ddev = dpu_kms->dev;
|
||||
|
||||
WARN_ON(!(dpu_kms->num_paths));
|
||||
/* Min vote of BW is required before turning on AXI clk */
|
||||
for (i = 0; i < dpu_kms->num_paths; i++)
|
||||
icc_set_bw(dpu_kms->path[i], 0, Bps_to_icc(MIN_IB_BW));
|
||||
|
||||
rc = clk_bulk_prepare_enable(dpu_kms->num_clocks, dpu_kms->clocks);
|
||||
if (rc) {
|
||||
DPU_ERROR("clock enable failed rc:%d\n", rc);
|
||||
|
@ -1390,8 +1390,13 @@ void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable)
|
||||
|
||||
dp_catalog_ctrl_reset(ctrl->catalog);
|
||||
|
||||
if (enable)
|
||||
dp_catalog_ctrl_enable_irq(ctrl->catalog, enable);
|
||||
/*
|
||||
* all dp controller programmable registers will not
|
||||
* be reset to default value after DP_SW_RESET
|
||||
* therefore interrupt mask bits have to be updated
|
||||
* to enable/disable interrupts
|
||||
*/
|
||||
dp_catalog_ctrl_enable_irq(ctrl->catalog, enable);
|
||||
}
|
||||
|
||||
void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl)
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include <linux/clk.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/interconnect.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/irqchip.h>
|
||||
#include <linux/irqdesc.h>
|
||||
@ -25,6 +26,8 @@
|
||||
#define UBWC_CTRL_2 0x150
|
||||
#define UBWC_PREDICTION_MODE 0x154
|
||||
|
||||
#define MIN_IB_BW 400000000UL /* Min ib vote 400MB */
|
||||
|
||||
struct msm_mdss {
|
||||
struct device *dev;
|
||||
|
||||
@ -36,8 +39,47 @@ struct msm_mdss {
|
||||
unsigned long enabled_mask;
|
||||
struct irq_domain *domain;
|
||||
} irq_controller;
|
||||
struct icc_path *path[2];
|
||||
u32 num_paths;
|
||||
};
|
||||
|
||||
static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
|
||||
struct msm_mdss *msm_mdss)
|
||||
{
|
||||
struct icc_path *path0 = of_icc_get(dev, "mdp0-mem");
|
||||
struct icc_path *path1 = of_icc_get(dev, "mdp1-mem");
|
||||
|
||||
if (IS_ERR_OR_NULL(path0))
|
||||
return PTR_ERR_OR_ZERO(path0);
|
||||
|
||||
msm_mdss->path[0] = path0;
|
||||
msm_mdss->num_paths = 1;
|
||||
|
||||
if (!IS_ERR_OR_NULL(path1)) {
|
||||
msm_mdss->path[1] = path1;
|
||||
msm_mdss->num_paths++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void msm_mdss_put_icc_path(void *data)
|
||||
{
|
||||
struct msm_mdss *msm_mdss = data;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < msm_mdss->num_paths; i++)
|
||||
icc_put(msm_mdss->path[i]);
|
||||
}
|
||||
|
||||
static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < msm_mdss->num_paths; i++)
|
||||
icc_set_bw(msm_mdss->path[i], 0, Bps_to_icc(bw));
|
||||
}
|
||||
|
||||
static void msm_mdss_irq(struct irq_desc *desc)
|
||||
{
|
||||
struct msm_mdss *msm_mdss = irq_desc_get_handler_data(desc);
|
||||
@ -136,6 +178,13 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Several components have AXI clocks that can only be turned on if
|
||||
* the interconnect is enabled (non-zero bandwidth). Let's make sure
|
||||
* that the interconnects are at least at a minimum amount.
|
||||
*/
|
||||
msm_mdss_icc_request_bw(msm_mdss, MIN_IB_BW);
|
||||
|
||||
ret = clk_bulk_prepare_enable(msm_mdss->num_clocks, msm_mdss->clocks);
|
||||
if (ret) {
|
||||
dev_err(msm_mdss->dev, "clock enable failed, ret:%d\n", ret);
|
||||
@ -178,6 +227,7 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
|
||||
static int msm_mdss_disable(struct msm_mdss *msm_mdss)
|
||||
{
|
||||
clk_bulk_disable_unprepare(msm_mdss->num_clocks, msm_mdss->clocks);
|
||||
msm_mdss_icc_request_bw(msm_mdss, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -271,6 +321,13 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5
|
||||
|
||||
dev_dbg(&pdev->dev, "mapped mdss address space @%pK\n", msm_mdss->mmio);
|
||||
|
||||
ret = msm_mdss_parse_data_bus_icc_path(&pdev->dev, msm_mdss);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
ret = devm_add_action_or_reset(&pdev->dev, msm_mdss_put_icc_path, msm_mdss);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
if (is_mdp5)
|
||||
ret = mdp5_mdss_parse_clock(pdev, &msm_mdss->clocks);
|
||||
else
|
||||
|
@ -473,6 +473,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode
|
||||
native_mode->vdisplay != 0 &&
|
||||
native_mode->clock != 0) {
|
||||
mode = drm_mode_duplicate(dev, native_mode);
|
||||
if (!mode)
|
||||
return NULL;
|
||||
mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
|
||||
drm_mode_set_name(mode);
|
||||
|
||||
@ -487,6 +489,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode
|
||||
* simpler.
|
||||
*/
|
||||
mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false);
|
||||
if (!mode)
|
||||
return NULL;
|
||||
mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
|
||||
DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name);
|
||||
}
|
||||
|
@ -1,8 +1,13 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
config TEGRA_HOST1X_CONTEXT_BUS
|
||||
bool
|
||||
|
||||
config TEGRA_HOST1X
|
||||
tristate "NVIDIA Tegra host1x driver"
|
||||
depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
|
||||
select DMA_SHARED_BUFFER
|
||||
select TEGRA_HOST1X_CONTEXT_BUS
|
||||
select IOMMU_IOVA
|
||||
help
|
||||
Driver for the NVIDIA Tegra host1x hardware.
|
||||
|
@ -18,3 +18,4 @@ host1x-y = \
|
||||
hw/host1x07.o
|
||||
|
||||
obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
|
||||
obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o
|
||||
|
31
drivers/gpu/host1x/context_bus.c
Normal file
31
drivers/gpu/host1x/context_bus.c
Normal file
@ -0,0 +1,31 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA Corporation.
|
||||
*/
|
||||
|
||||
#include <linux/device.h>
|
||||
#include <linux/of.h>
|
||||
|
||||
struct bus_type host1x_context_device_bus_type = {
|
||||
.name = "host1x-context",
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(host1x_context_device_bus_type);
|
||||
|
||||
static int __init host1x_context_device_bus_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (!of_machine_is_compatible("nvidia,tegra186") &&
|
||||
!of_machine_is_compatible("nvidia,tegra194") &&
|
||||
!of_machine_is_compatible("nvidia,tegra234"))
|
||||
return 0;
|
||||
|
||||
err = bus_register(&host1x_context_device_bus_type);
|
||||
if (err < 0) {
|
||||
pr_err("bus type registration failed: %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
postcore_initcall(host1x_context_device_bus_init);
|
15
include/linux/host1x_context_bus.h
Normal file
15
include/linux/host1x_context_bus.h
Normal file
@ -0,0 +1,15 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef __LINUX_HOST1X_CONTEXT_BUS_H
|
||||
#define __LINUX_HOST1X_CONTEXT_BUS_H
|
||||
|
||||
#include <linux/device.h>
|
||||
|
||||
#ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
|
||||
extern struct bus_type host1x_context_device_bus_type;
|
||||
#endif
|
||||
|
||||
#endif
|
@ -140,6 +140,10 @@ extern "C" {
|
||||
* not require GTT memory accounting
|
||||
*/
|
||||
#define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11)
|
||||
/* Flag that BO can be discarded under memory pressure without keeping the
|
||||
* content.
|
||||
*/
|
||||
#define AMDGPU_GEM_CREATE_DISCARDABLE (1 << 12)
|
||||
|
||||
struct drm_amdgpu_gem_create_in {
|
||||
/** the requested memory size */
|
||||
@ -529,6 +533,8 @@ struct drm_amdgpu_gem_op {
|
||||
#define AMDGPU_VM_MTYPE_UC (4 << 5)
|
||||
/* Use Read Write MTYPE instead of default MTYPE */
|
||||
#define AMDGPU_VM_MTYPE_RW (5 << 5)
|
||||
/* don't allocate MALL */
|
||||
#define AMDGPU_VM_PAGE_NOALLOC (1 << 9)
|
||||
|
||||
struct drm_amdgpu_gem_va {
|
||||
/** GEM object handle */
|
||||
@ -988,6 +994,8 @@ struct drm_amdgpu_info_vbios {
|
||||
#define AMDGPU_VRAM_TYPE_DDR4 8
|
||||
#define AMDGPU_VRAM_TYPE_GDDR6 9
|
||||
#define AMDGPU_VRAM_TYPE_DDR5 10
|
||||
#define AMDGPU_VRAM_TYPE_LPDDR4 11
|
||||
#define AMDGPU_VRAM_TYPE_LPDDR5 12
|
||||
|
||||
struct drm_amdgpu_info_device {
|
||||
/** PCI Device ID */
|
||||
|
Loading…
Reference in New Issue
Block a user