2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-19 18:53:52 +08:00

drm next for 5.19-rc1 (part 2/fixes)

msm:
 - Limiting WB modes to max sspp linewidth
 - Fixing the supported rotations to add 180 back for IGT
 - Fix to handle pm_runtime_get_sync() errors to avoid unclocked access
   in the bind() path for dpu driver
 - Fix the irq_free() without request issue which was a big-time
   hitter in the CI-runs.
 
 amdgpu:
 - Update fdinfo to the common drm format
 - uapi: Add VM_NOALLOC GPUVM attribute to prevent buffers for going into the MALL
   Add AMDGPU_GEM_CREATE_DISCARDABLE flag to create buffers that can be discarded on eviction
   Mesa code which uses these: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16466
 - Link training fixes
 - DPIA fixes
 - Misc code cleanups
 - Aux fixes
 - Hotplug fixes
 - More FP clean up
 - Misc GFX9/10 fixes
 - Fix a possible memory leak in SMU shutdown
 - SMU 13 updates
 - RAS fixes
 - TMZ fixes
 - GC 11 updates
 - SMU 11 metrics fixes
 - Fix coverage blend mode for overlay plane
 - Note DDR vs LPDDR memory
 - Fuzz fix for CS IOCTL
 - Add new PCI DID
 
 amdkfd:
 - Clean up hive setup
 - Misc fixes
 
 tegra:
 - add some prelim 5.20 work to avoid inter-tree mess
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmKZfhwACgkQDHTzWXnE
 hr5v1A/+KLUDtz1vB/JGHYnqJYeGicSs48Pe2OtSW2YX6rI2IwaQBnYjXtm8J8NF
 47Lw35KjzYLS9NRst9/+zhcyij6s573dibWVTi9NG5O0vYMoww6EvD7ORqPAv1JP
 WNl1ek9DmkorEtcjJWwWDpMUP34WrN1qCHRT1p6nkcIJ28pp+x4fwt3cPi3fmEmk
 hH5/pjwymvaWX9m/HBRi/esone/uLVVIDThX769spyY7c1jKG3BKO0/hGrvY/qHs
 0QRIG4eroB2b6KEzg6gO4a8NX7PHamBtImT9Nqw+o8pBzYhuez39ITM5sMkCrChj
 kvCpF+YXwUWyr+4SbkKLGHZqerpgz219kHOh1Z7VOwCSQomIljkmdoKHUJ50QMKk
 pNzF/U7ng1zgPhTJVj/o3wymeE9y1X0W3p/yLXS1jXt+Nl4AjsBWSZLd0Nn+HKck
 ZpqwhIfH5vuFjpN+u2F3pVaRCjNM2UXavS6CD1thIOXDQ4gQ+p2rndHL/yWIyVFb
 HXkQu0PBSKzF6ys4ikvXxsT7xM0EJEa680dEjJuMhhC5LgDzw3A5jLdSfhIr2nal
 Z+CWW1NKukAarI2rPOUotO8GYsY3bMF3o/02wFRAmDwPwXUWvy/mULv8vSl63ocH
 6B/YOitiGKz6tpOqRwBgjkFCTebMwKc7Mbt2JVZKiG73BMKHcFQ=
 =IC8q
 -----END PGP SIGNATURE-----

Merge tag 'drm-next-2022-06-03-1' of git://anongit.freedesktop.org/drm/drm

Pull more drm updates from Dave Airlie:
 "This is mostly regular fixes, msm and amdgpu. There is a tegra patch
  that is bit of prep work for a 5.20 feature to avoid some inter-tree
  syncs, and a couple of late addition amdgpu uAPI changes but best to
  get those in early, and the userspace pieces are ready.

  msm:
   - Limiting WB modes to max sspp linewidth
   - Fixing the supported rotations to add 180 back for IGT
   - Fix to handle pm_runtime_get_sync() errors to avoid unclocked
     access in the bind() path for dpu driver
   - Fix the irq_free() without request issue which was a big-time
     hitter in the CI-runs.

  amdgpu:
   - Update fdinfo to the common drm format
   - uapi:
       - Add VM_NOALLOC GPUVM attribute to prevent buffers for going
         into the MALL
       - Add AMDGPU_GEM_CREATE_DISCARDABLE flag to create buffers that
         can be discarded on eviction
       - Mesa code which uses these:
           https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16466
   - Link training fixes
   - DPIA fixes
   - Misc code cleanups
   - Aux fixes
   - Hotplug fixes
   - More FP clean up
   - Misc GFX9/10 fixes
   - Fix a possible memory leak in SMU shutdown
   - SMU 13 updates
   - RAS fixes
   - TMZ fixes
   - GC 11 updates
   - SMU 11 metrics fixes
   - Fix coverage blend mode for overlay plane
   - Note DDR vs LPDDR memory
   - Fuzz fix for CS IOCTL
   - Add new PCI DID

  amdkfd:
   - Clean up hive setup
   - Misc fixes

  tegra:
   - add some prelim 5.20 work to avoid inter-tree mess"

* tag 'drm-next-2022-06-03-1' of git://anongit.freedesktop.org/drm/drm: (57 commits)
  drm/msm/dpu: Move min BW request and full BW disable back to mdss
  drm/msm/dpu: Fix pointer dereferenced before checking
  drm/msm/dpu: Remove unused code
  drm/msm/disp/dpu1: remove superfluous init
  drm/msm/dp: Always clear mask bits to disable interrupts at dp_ctrl_reset_irq_ctrl()
  gpu: host1x: Add context bus
  drm/amdgpu: add drm-client-id to fdinfo v2
  drm/amdgpu: Convert to common fdinfo format v5
  drm/amdgpu: bump minor version number
  drm/amdgpu: add AMDGPU_VM_NOALLOC v2
  drm/amdgpu: add AMDGPU_GEM_CREATE_DISCARDABLE
  drm/amdgpu: add beige goby PCI ID
  drm/amd/pm: Return auto perf level, if unsupported
  drm/amdkfd: fix typo in comment
  drm/amdgpu/gfx: fix typos in comments
  drm/amdgpu/cs: make commands with 0 chunks illegal behaviour.
  drm/amdgpu: differentiate between LP and non-LP DDR memory
  drm/amdgpu: Resolve pcie_bif RAS recovery bug
  drm/amdgpu: clean up asd on the ta_firmware_header_v2_0
  drm/amdgpu/discovery: validate VCN and SDMA instances
  ...
This commit is contained in:
Linus Torvalds 2022-06-03 09:49:29 -07:00
commit ab18b7b36a
92 changed files with 4086 additions and 2972 deletions

View File

@ -2,7 +2,6 @@
# drm/tegra depends on host1x, so if both drivers are built-in care must be
# taken to initialize them in the correct order. Link order is the only way
# to ensure this currently.
obj-$(CONFIG_TEGRA_HOST1X) += host1x/
obj-y += drm/ vga/
obj-y += host1x/ drm/ vga/
obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/
obj-$(CONFIG_TRACE_GPU_MEM) += trace/

View File

@ -1621,7 +1621,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
mutex_lock(&mem->lock);
/* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */
/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
if (mem->alloc_flags &
(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {

View File

@ -188,13 +188,17 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
vram_type = AMDGPU_VRAM_TYPE_DDR3;
break;
case Ddr4MemType:
case LpDdr4MemType:
vram_type = AMDGPU_VRAM_TYPE_DDR4;
break;
case LpDdr4MemType:
vram_type = AMDGPU_VRAM_TYPE_LPDDR4;
break;
case Ddr5MemType:
case LpDdr5MemType:
vram_type = AMDGPU_VRAM_TYPE_DDR5;
break;
case LpDdr5MemType:
vram_type = AMDGPU_VRAM_TYPE_LPDDR5;
break;
default:
vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
break;

View File

@ -116,7 +116,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
int ret;
if (cs->in.num_chunks == 0)
return 0;
return -EINVAL;
chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
if (!chunk_array)
@ -1252,7 +1252,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
p->fence = dma_fence_get(&job->base.s_fence->finished);
amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence);
amdgpu_cs_post_dependencies(p);
if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&

View File

@ -135,9 +135,9 @@ static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_
static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
{
struct amdgpu_device *adev = ctx->adev;
int32_t ctx_prio;
struct amdgpu_device *adev = ctx->mgr->adev;
unsigned int hw_prio;
int32_t ctx_prio;
ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
@ -162,17 +162,50 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
return hw_prio;
}
/* Calculate the time spend on the hw */
static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
{
struct drm_sched_fence *s_fence;
if (!fence)
return ns_to_ktime(0);
/* When the fence is not even scheduled it can't have spend time */
s_fence = to_drm_sched_fence(fence);
if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
return ns_to_ktime(0);
/* When it is still running account how much already spend */
if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
return ktime_sub(s_fence->finished.timestamp,
s_fence->scheduled.timestamp);
}
static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
struct amdgpu_ctx_entity *centity)
{
ktime_t res = ns_to_ktime(0);
uint32_t i;
spin_lock(&ctx->ring_lock);
for (i = 0; i < amdgpu_sched_jobs; i++) {
res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
}
spin_unlock(&ctx->ring_lock);
return res;
}
static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
const u32 ring)
{
struct amdgpu_device *adev = ctx->adev;
struct amdgpu_ctx_entity *entity;
struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
unsigned num_scheds = 0;
int32_t ctx_prio;
unsigned int hw_prio;
struct amdgpu_device *adev = ctx->mgr->adev;
struct amdgpu_ctx_entity *entity;
enum drm_sched_priority drm_prio;
unsigned int hw_prio, num_scheds;
int32_t ctx_prio;
int r;
entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
@ -182,6 +215,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
entity->hw_ip = hw_ip;
entity->sequence = 1;
hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
@ -220,10 +254,25 @@ error_free_entity:
return r;
}
static int amdgpu_ctx_init(struct amdgpu_device *adev,
int32_t priority,
struct drm_file *filp,
struct amdgpu_ctx *ctx)
static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
{
ktime_t res = ns_to_ktime(0);
int i;
if (!entity)
return res;
for (i = 0; i < amdgpu_sched_jobs; ++i) {
res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
dma_fence_put(entity->fences[i]);
}
kfree(entity);
return res;
}
static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
struct drm_file *filp, struct amdgpu_ctx *ctx)
{
int r;
@ -233,15 +282,14 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
kref_init(&ctx->refcount);
ctx->mgr = mgr;
spin_lock_init(&ctx->ring_lock);
mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
ctx->init_priority = priority;
ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
@ -249,24 +297,10 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
return 0;
}
static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
{
int i;
if (!entity)
return;
for (i = 0; i < amdgpu_sched_jobs; ++i)
dma_fence_put(entity->fences[i]);
kfree(entity);
}
static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
u32 *stable_pstate)
{
struct amdgpu_device *adev = ctx->adev;
struct amdgpu_device *adev = ctx->mgr->adev;
enum amd_dpm_forced_level current_level;
current_level = amdgpu_dpm_get_performance_level(adev);
@ -294,7 +328,7 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
u32 stable_pstate)
{
struct amdgpu_device *adev = ctx->adev;
struct amdgpu_device *adev = ctx->mgr->adev;
enum amd_dpm_forced_level level;
u32 current_stable_pstate;
int r;
@ -345,7 +379,8 @@ done:
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
struct amdgpu_device *adev = ctx->adev;
struct amdgpu_ctx_mgr *mgr = ctx->mgr;
struct amdgpu_device *adev = mgr->adev;
unsigned i, j, idx;
if (!adev)
@ -353,8 +388,10 @@ static void amdgpu_ctx_fini(struct kref *ref)
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
amdgpu_ctx_fini_entity(ctx->entities[i][j]);
ctx->entities[i][j] = NULL;
ktime_t spend;
spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
}
}
@ -421,7 +458,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
}
*id = (uint32_t)r;
r = amdgpu_ctx_init(adev, priority, filp, ctx);
r = amdgpu_ctx_init(mgr, priority, filp, ctx);
if (r) {
idr_remove(&mgr->ctx_handles, *id);
*id = 0;
@ -671,9 +708,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
return 0;
}
void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
struct dma_fence *fence, uint64_t *handle)
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
struct dma_fence *fence)
{
struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
uint64_t seq = centity->sequence;
@ -682,8 +719,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
idx = seq & (amdgpu_sched_jobs - 1);
other = centity->fences[idx];
if (other)
BUG_ON(!dma_fence_is_signaled(other));
WARN_ON(other && !dma_fence_is_signaled(other));
dma_fence_get(fence);
@ -692,9 +728,11 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
centity->sequence++;
spin_unlock(&ctx->ring_lock);
atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
&ctx->mgr->time_spend[centity->hw_ip]);
dma_fence_put(other);
if (handle)
*handle = seq;
return seq;
}
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
@ -731,7 +769,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
int hw_ip,
int32_t priority)
{
struct amdgpu_device *adev = ctx->adev;
struct amdgpu_device *adev = ctx->mgr->adev;
unsigned int hw_prio;
struct drm_gpu_scheduler **scheds = NULL;
unsigned num_scheds;
@ -796,10 +834,17 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
return r;
}
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
struct amdgpu_device *adev)
{
unsigned int i;
mgr->adev = adev;
mutex_init(&mgr->lock);
idr_init(&mgr->ctx_handles);
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
atomic64_set(&mgr->time_spend[i], 0);
}
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
@ -875,80 +920,38 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
mutex_destroy(&mgr->lock);
}
static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max)
void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
ktime_t usage[AMDGPU_HW_IP_NUM])
{
ktime_t now, t1;
uint32_t i;
*total = *max = 0;
now = ktime_get();
for (i = 0; i < amdgpu_sched_jobs; i++) {
struct dma_fence *fence;
struct drm_sched_fence *s_fence;
spin_lock(&ctx->ring_lock);
fence = dma_fence_get(centity->fences[i]);
spin_unlock(&ctx->ring_lock);
if (!fence)
continue;
s_fence = to_drm_sched_fence(fence);
if (!dma_fence_is_signaled(&s_fence->scheduled)) {
dma_fence_put(fence);
continue;
}
t1 = s_fence->scheduled.timestamp;
if (!ktime_before(t1, now)) {
dma_fence_put(fence);
continue;
}
if (dma_fence_is_signaled(&s_fence->finished) &&
s_fence->finished.timestamp < now)
*total += ktime_sub(s_fence->finished.timestamp, t1);
else
*total += ktime_sub(now, t1);
t1 = ktime_sub(now, t1);
dma_fence_put(fence);
*max = max(t1, *max);
}
}
ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
uint32_t idx, uint64_t *elapsed)
{
struct idr *idp;
struct amdgpu_ctx *ctx;
unsigned int hw_ip, i;
uint32_t id;
struct amdgpu_ctx_entity *centity;
ktime_t total = 0, max = 0;
if (idx >= AMDGPU_MAX_ENTITY_NUM)
return 0;
idp = &mgr->ctx_handles;
/*
* This is a little bit racy because it can be that a ctx or a fence are
* destroyed just in the moment we try to account them. But that is ok
* since exactly that case is explicitely allowed by the interface.
*/
mutex_lock(&mgr->lock);
idr_for_each_entry(idp, ctx, id) {
ktime_t ttotal, tmax;
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
if (!ctx->entities[hwip][idx])
continue;
centity = ctx->entities[hwip][idx];
amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
/* Harmonic mean approximation diverges for very small
* values. If ratio < 0.01% ignore
*/
if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
continue;
total = ktime_add(total, ttotal);
max = ktime_after(tmax, max) ? tmax : max;
usage[hw_ip] = ns_to_ktime(ns);
}
mutex_unlock(&mgr->lock);
if (elapsed)
*elapsed = max;
idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
struct amdgpu_ctx_entity *centity;
ktime_t spend;
return total;
centity = ctx->entities[hw_ip][i];
if (!centity)
continue;
spend = amdgpu_ctx_entity_time(ctx, centity);
usage[hw_ip] = ktime_add(usage[hw_ip], spend);
}
}
}
mutex_unlock(&mgr->lock);
}

View File

@ -23,16 +23,20 @@
#ifndef __AMDGPU_CTX_H__
#define __AMDGPU_CTX_H__
#include <linux/ktime.h>
#include <linux/types.h>
#include "amdgpu_ring.h"
struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
struct amdgpu_ctx_mgr;
#define AMDGPU_MAX_ENTITY_NUM 4
#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) ((max) > 16384ULL*(total))
struct amdgpu_ctx_entity {
uint32_t hw_ip;
uint64_t sequence;
struct drm_sched_entity entity;
struct dma_fence *fences[];
@ -40,7 +44,7 @@ struct amdgpu_ctx_entity {
struct amdgpu_ctx {
struct kref refcount;
struct amdgpu_device *adev;
struct amdgpu_ctx_mgr *mgr;
unsigned reset_counter;
unsigned reset_counter_query;
uint32_t vram_lost_counter;
@ -61,6 +65,7 @@ struct amdgpu_ctx_mgr {
struct mutex lock;
/* protected by lock */
struct idr ctx_handles;
atomic64_t time_spend[AMDGPU_HW_IP_NUM];
};
extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
@ -70,9 +75,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity);
void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
struct dma_fence *fence, uint64_t *seq);
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
struct dma_fence *fence);
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
uint64_t seq);
@ -85,10 +90,12 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
struct amdgpu_device *adev);
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
uint32_t idx, uint64_t *elapsed);
void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
ktime_t usage[AMDGPU_HW_IP_NUM]);
#endif

View File

@ -1556,9 +1556,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
amdgpu_gmc_tmz_set(adev);
return 0;
}
@ -3701,6 +3698,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
amdgpu_gmc_noretry_set(adev);
/* Need to get xgmi info early to decide the reset behavior*/
if (adev->gmc.xgmi.supported) {
@ -5219,6 +5219,10 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
r = amdgpu_device_reset_sriov(adev, job ? false : true);
if (r)
adev->asic_reset_res = r;
/* Aldebaran supports ras in SRIOV, so need resume ras during reset */
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
amdgpu_ras_resume(adev);
} else {
r = amdgpu_do_asic_reset(device_list_handle, &reset_context);
if (r && r == -EAGAIN)

View File

@ -1130,13 +1130,24 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
ip->revision & 0xc0;
ip->revision &= ~0xc0;
adev->vcn.num_vcn_inst++;
if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES)
adev->vcn.num_vcn_inst++;
else
dev_err(adev->dev, "Too many VCN instances: %d vs %d\n",
adev->vcn.num_vcn_inst + 1,
AMDGPU_MAX_VCN_INSTANCES);
}
if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
le16_to_cpu(ip->hw_id) == SDMA2_HWID ||
le16_to_cpu(ip->hw_id) == SDMA3_HWID)
adev->sdma.num_instances++;
le16_to_cpu(ip->hw_id) == SDMA3_HWID) {
if (adev->sdma.num_instances < AMDGPU_MAX_SDMA_INSTANCES)
adev->sdma.num_instances++;
else
dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n",
adev->sdma.num_instances + 1,
AMDGPU_MAX_SDMA_INSTANCES);
}
if (le16_to_cpu(ip->hw_id) == UMC_HWID)
adev->gmc.num_umc++;
@ -1361,7 +1372,7 @@ union mall_info {
struct mall_info_v1_0 v1;
};
int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
{
struct binary_header *bhdr;
union mall_info *mall_info;

View File

@ -99,10 +99,11 @@
* - 3.43.0 - Add device hot plug/unplug support
* - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
* - 3.45.0 - Add context ioctl stable pstate interface
* * 3.46.0 - To enable hot plug amdgpu tests in libdrm
* - 3.46.0 - To enable hot plug amdgpu tests in libdrm
* * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 46
#define KMS_DRIVER_MINOR 47
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit;
@ -1940,6 +1941,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
{0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
{0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
{0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
{0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
{ PCI_DEVICE(0x1002, PCI_ANY_ID),

View File

@ -32,6 +32,7 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_debugfs.h>
#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
@ -54,58 +55,49 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
{
struct amdgpu_fpriv *fpriv;
uint32_t bus, dev, fn, i, domain;
uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
struct drm_file *file = f->private_data;
struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
struct amdgpu_bo *root;
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
ktime_t usage[AMDGPU_HW_IP_NUM];
uint32_t bus, dev, fn, domain;
unsigned int hw_ip;
int ret;
ret = amdgpu_file_to_fpriv(f, &fpriv);
if (ret)
return;
bus = adev->pdev->bus->number;
domain = pci_domain_nr(adev->pdev->bus);
dev = PCI_SLOT(adev->pdev->devfn);
fn = PCI_FUNC(adev->pdev->devfn);
root = amdgpu_bo_ref(fpriv->vm.root.bo);
if (!root)
ret = amdgpu_bo_reserve(vm->root.bo, false);
if (ret)
return;
ret = amdgpu_bo_reserve(root, false);
if (ret) {
DRM_ERROR("Fail to reserve bo\n");
return;
}
amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, &gtt_mem, &cpu_mem);
amdgpu_bo_unreserve(root);
amdgpu_bo_unref(&root);
amdgpu_vm_get_memory(vm, &vram_mem, &gtt_mem, &cpu_mem);
amdgpu_bo_unreserve(vm->root.bo);
seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus,
dev, fn, fpriv->vm.pasid);
seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL);
seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL);
seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL);
for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
uint32_t count = amdgpu_ctx_num_entities[i];
int idx = 0;
uint64_t total = 0, min = 0;
uint32_t perc, frac;
amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
for (idx = 0; idx < count; idx++) {
total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr,
i, idx, &min);
if ((total == 0) || (min == 0))
continue;
/*
* ******************************************************************
* For text output format description please see drm-usage-stats.rst!
* ******************************************************************
*/
perc = div64_u64(10000 * total, min);
frac = perc % 100;
seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid);
seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name);
seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
seq_printf(m, "drm-client-id:\t%Lu\n", vm->immediate.fence_context);
seq_printf(m, "drm-memory-vram:\t%llu KiB\n", vram_mem/1024UL);
seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", gtt_mem/1024UL);
seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", cpu_mem/1024UL);
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
if (!usage[hw_ip])
continue;
seq_printf(m, "%s%d:\t%d.%d%%\n",
amdgpu_ip_name[i],
idx, perc/100, frac);
}
seq_printf(m, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip],
ktime_to_ns(usage[hw_ip]));
}
}

View File

@ -296,8 +296,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
AMDGPU_GEM_CREATE_VRAM_CLEARED |
AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
AMDGPU_GEM_CREATE_EXPLICIT_SYNC |
AMDGPU_GEM_CREATE_ENCRYPTED))
AMDGPU_GEM_CREATE_ENCRYPTED |
AMDGPU_GEM_CREATE_DISCARDABLE))
return -EINVAL;
/* reject invalid gem domains */
@ -645,6 +645,8 @@ uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
pte_flag |= AMDGPU_PTE_WRITEABLE;
if (flags & AMDGPU_VM_PAGE_PRT)
pte_flag |= AMDGPU_PTE_PRT;
if (flags & AMDGPU_VM_PAGE_NOALLOC)
pte_flag |= AMDGPU_PTE_NOALLOC;
if (adev->gmc.gmc_funcs->map_mtype)
pte_flag |= amdgpu_gmc_map_mtype(adev,
@ -658,7 +660,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
{
const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK;
AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK |
AMDGPU_VM_PAGE_NOALLOC;
const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
AMDGPU_VM_PAGE_PRT;

View File

@ -512,9 +512,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
*/
void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_RAVEN:
case CHIP_RENOIR:
switch (adev->ip_versions[GC_HWIP][0]) {
/* RAVEN */
case IP_VERSION(9, 2, 2):
case IP_VERSION(9, 1, 0):
/* RENOIR looks like RAVEN */
case IP_VERSION(9, 3, 0):
if (amdgpu_tmz == 0) {
adev->gmc.tmz_enabled = false;
dev_info(adev->dev,
@ -525,12 +528,20 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
"Trusted Memory Zone (TMZ) feature enabled\n");
}
break;
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_VANGOGH:
case CHIP_YELLOW_CARP:
case CHIP_IP_DISCOVERY:
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 3):
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
/* VANGOGH */
case IP_VERSION(10, 3, 1):
/* YELLOW_CARP*/
case IP_VERSION(10, 3, 3):
/* GC 10.3.7 */
case IP_VERSION(10, 3, 7):
/* Don't enable it by default yet.
*/
if (amdgpu_tmz < 1) {

View File

@ -1152,7 +1152,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
mutex_init(&fpriv->bo_list_lock);
idr_init(&fpriv->bo_list_handles);
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
file_priv->driver_priv = fpriv;
goto out_suspend;

View File

@ -567,6 +567,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bp->domain;
bo->allowed_domains = bo->preferred_domains;
if (bp->type != ttm_bo_type_kernel &&
!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE) &&
bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
@ -1018,7 +1019,9 @@ static const char *amdgpu_vram_names[] = {
"DDR3",
"DDR4",
"GDDR6",
"DDR5"
"DDR5",
"LPDDR4",
"LPDDR5"
};
/**

View File

@ -41,7 +41,6 @@
/* BO flag to indicate a KFD userptr BO */
#define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63)
#define AMDGPU_AMDKFD_CREATE_SVM_BO (1ULL << 62)
#define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
#define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo)

View File

@ -1177,7 +1177,7 @@ int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool lo
psp->xgmi_context.context.mem_context.shared_mem_size = PSP_XGMI_SHARED_MEM_SIZE;
psp->xgmi_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
if (!psp->xgmi_context.context.initialized) {
if (!psp->xgmi_context.context.mem_context.shared_buf) {
ret = psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context);
if (ret)
return ret;

View File

@ -726,7 +726,9 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
/* Do not enable if it is not allowed. */
WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
if (!amdgpu_ras_intr_triggered()) {
/* Only enable ras feature operation handle on host side */
if (!amdgpu_sriov_vf(adev) &&
!amdgpu_ras_intr_triggered()) {
ret = psp_ras_enable_features(&adev->psp, info, enable);
if (ret) {
dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
@ -1523,7 +1525,9 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
*/
void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
{
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
/* Fatal error events are handled on host side */
if (amdgpu_sriov_vf(adev) ||
!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
return;
if (adev->nbio.ras &&
@ -2270,10 +2274,14 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
{
adev->ras_hw_enabled = adev->ras_enabled = 0;
if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
if (!adev->is_atom_fw ||
!amdgpu_ras_asic_supported(adev))
return;
if (!(amdgpu_sriov_vf(adev) &&
(adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))))
return;
if (!adev->gmc.xgmi.connected_to_cpu) {
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
dev_info(adev->dev, "MEM ECC is active.\n");
@ -2285,15 +2293,21 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
dev_info(adev->dev, "SRAM ECC is active.\n");
adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
1 << AMDGPU_RAS_BLOCK__DF);
if (!amdgpu_sriov_vf(adev)) {
adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
1 << AMDGPU_RAS_BLOCK__DF);
if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0))
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
1 << AMDGPU_RAS_BLOCK__JPEG);
else
adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
1 << AMDGPU_RAS_BLOCK__JPEG);
if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0))
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
1 << AMDGPU_RAS_BLOCK__JPEG);
else
adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
1 << AMDGPU_RAS_BLOCK__JPEG);
} else {
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
1 << AMDGPU_RAS_BLOCK__SDMA |
1 << AMDGPU_RAS_BLOCK__GFX);
}
} else {
dev_info(adev->dev, "SRAM ECC is not presented.\n");
}
@ -2637,6 +2651,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
struct amdgpu_ras_block_object *obj;
int r;
/* Guest side doesn't need init ras feature */
if (amdgpu_sriov_vf(adev))
return 0;
list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
if (!node->ras_obj) {
dev_warn(adev->dev, "Warning: abnormal ras list node.\n");

View File

@ -124,6 +124,10 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
if (amdgpu_sriov_vf(adev))
return AMDGPU_RAS_SUCCESS;
amdgpu_ras_reset_gpu(adev);
return AMDGPU_RAS_SUCCESS;

View File

@ -117,7 +117,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
}
abo = ttm_to_amdgpu_bo(bo);
if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
placement->num_placement = 0;
placement->num_busy_placement = 0;
return;

View File

@ -5111,7 +5111,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
acccess. These should be enabled by FW for target VMIDs. */
access. These should be enabled by FW for target VMIDs. */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
@ -6898,7 +6898,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
@ -6919,23 +6919,6 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
tmp = 0;
/* enable the doorbell if requested */
if (prop->use_doorbell) {
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, prop->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_SOURCE, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_HIT, 0);
}
mqd->cp_hqd_pq_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
@ -6973,20 +6956,6 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
/* disable wptr polling */
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
/* write the EOP addr */
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
mqd->cp_hqd_eop_base_addr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
mqd->cp_hqd_eop_base_addr_hi);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
mqd->cp_hqd_eop_control);
/* enable doorbell? */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
mqd->cp_hqd_pq_doorbell_control);
/* disable the queue if it's active */
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
@ -7005,6 +6974,19 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_wptr_hi);
}
/* disable doorbells */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
/* write the EOP addr */
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
mqd->cp_hqd_eop_base_addr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
mqd->cp_hqd_eop_base_addr_hi);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
mqd->cp_hqd_eop_control);
/* set the pointer to the MQD */
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
mqd->cp_mqd_base_addr_lo);

View File

@ -4082,7 +4082,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);

View File

@ -3714,7 +3714,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
acccess. These should be enabled by FW for target VMIDs. */
access. These should be enabled by FW for target VMIDs. */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
@ -4490,7 +4490,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(ring->ring_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
@ -5815,7 +5815,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
gfx_v8_0_wait_for_rlc_serdes(adev);
/* write cmd to Set CGCG Overrride */
/* write cmd to Set CGCG Override */
gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */

View File

@ -2535,7 +2535,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
acccess. These should be enabled by FW for target VMIDs. */
access. These should be enabled by FW for target VMIDs. */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
@ -3514,7 +3514,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(ring->ring_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
@ -3535,23 +3535,6 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
tmp = 0;
/* enable the doorbell if requested */
if (ring->use_doorbell) {
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, ring->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_SOURCE, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_HIT, 0);
}
mqd->cp_hqd_pq_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);

View File

@ -613,6 +613,9 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
*flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
*flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
*flags &= ~AMDGPU_PTE_NOALLOC;
*flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
if (mapping->flags & AMDGPU_PTE_PRT) {
*flags |= AMDGPU_PTE_PRT;
*flags |= AMDGPU_PTE_SNOOPED;

View File

@ -500,6 +500,9 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
*flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
*flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
*flags &= ~AMDGPU_PTE_NOALLOC;
*flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
if (mapping->flags & AMDGPU_PTE_PRT) {
*flags |= AMDGPU_PTE_PRT;
*flags |= AMDGPU_PTE_SNOOPED;

View File

@ -32,13 +32,10 @@
MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_5_asd.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_asd.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
@ -85,17 +82,17 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
err = psp_init_sos_microcode(psp, chip_name);
if (err)
return err;
err = psp_init_ta_microcode(&adev->psp, chip_name);
if (err)
return err;
/* It's not necessary to load ras ta on Guest side */
if (!amdgpu_sriov_vf(adev)) {
err = psp_init_ta_microcode(&adev->psp, chip_name);
if (err)
return err;
}
break;
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 3):
case IP_VERSION(13, 0, 5):
case IP_VERSION(13, 0, 8):
err = psp_init_asd_microcode(psp, chip_name);
if (err)
return err;
err = psp_init_toc_microcode(psp, chip_name);
if (err)
return err;

View File

@ -42,6 +42,7 @@
#include "soc15.h"
#include "soc15_common.h"
#include "soc21.h"
static const struct amd_ip_funcs soc21_common_ip_funcs;

File diff suppressed because it is too large Load Diff

View File

@ -23,37 +23,52 @@
/* To compile this assembly code:
*
* Navi1x:
* cpp -DASIC_TARGET_NAVI1X=1 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
* sp3-nv1x nv1x.sp3 -hex nv1x.hex
* cpp -DASIC_FAMILY=CHIP_NAVI10 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
* sp3 nv1x.sp3 -hex nv1x.hex
*
* Others:
* cpp -DASIC_TARGET_NAVI1X=0 cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
* sp3-gfx10 gfx10.sp3 -hex gfx10.hex
* gfx10:
* cpp -DASIC_FAMILY=CHIP_SIENNA_CICHLID cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
* sp3 gfx10.sp3 -hex gfx10.hex
*
* gfx11:
* cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
* sp3 gfx11.sp3 -hex gfx11.hex
*/
#define NO_SQC_STORE !ASIC_TARGET_NAVI1X
#define CHIP_NAVI10 26
#define CHIP_SIENNA_CICHLID 30
#define CHIP_PLUM_BONITO 36
#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 4
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11
var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1
#if ASIC_FAMILY < CHIP_PLUM_BONITO
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
#else
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
#endif
var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF
var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80
var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7
var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
@ -63,46 +78,37 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000
var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12
var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19
var SQ_WAVE_IB_STS_RCNT_SHIFT = 16
var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15
var SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT = 25
var SQ_WAVE_IB_STS_REPLAY_W64H_SIZE = 1
var SQ_WAVE_IB_STS_REPLAY_W64H_MASK = 0x02000000
var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1
var SQ_WAVE_IB_STS_RCNT_SIZE = 6
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
// bits [31:24] unused by SPI debug data
var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31
var TTMP11_SAVE_REPLAY_W64H_MASK = 0x80000000
var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 24
var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0x7F000000
var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000
var S_SAVE_SPI_INIT_ATC_SHIFT = 27
var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000
var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
var S_SAVE_PC_HI_HT_MASK = 0x01000000
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
var S_SAVE_PC_HI_RCNT_SHIFT = 26
var S_SAVE_PC_HI_RCNT_MASK = 0xFC000000
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 25
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x02000000
var S_SAVE_PC_HI_REPLAY_W64H_SHIFT = 24
var S_SAVE_PC_HI_REPLAY_W64H_MASK = 0x01000000
var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000
var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31
var s_sgpr_save_num = 108
@ -130,19 +136,10 @@ var s_save_ttmps_hi = s_save_trapsts
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000
var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000
var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
var S_WAVE_SIZE = 25
var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
var s_restore_spi_init_lo = exec_lo
var s_restore_spi_init_hi = exec_hi
var s_restore_mem_offset = ttmp12
@ -179,84 +176,133 @@ L_JUMP_TO_RESTORE:
L_SKIP_RESTORE:
s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK
if SINGLE_STEP_MISSED_WORKAROUND
// No single step exceptions if MODE.DEBUG_EN=0.
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
// Second-level trap already handled exception if STATUS.HALT=1.
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
// Prioritize single step exception over context save.
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
s_cbranch_scc0 L_FETCH_2ND_TRAP
L_NO_SINGLE_STEP_WORKAROUND:
end
// Clear SPI_PRIO: do not save with elevated priority.
// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
s_cbranch_scc0 L_NOT_HALTED
L_HALTED:
// Host trap may occur while wave is halted.
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
L_CHECK_SAVE:
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
s_cbranch_scc1 L_SAVE
// If STATUS.MEM_VIOL is asserted then halt the wave to prevent
// the exception raising again and blocking context save.
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
s_cbranch_scc0 L_FETCH_2ND_TRAP
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
// Wave is halted but neither host trap nor SAVECTX is raised.
// Caused by instruction fetch memory violation.
// Spin wait until context saved to prevent interrupt storm.
s_sleep 0x10
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_branch L_CHECK_SAVE
L_NOT_HALTED:
// Let second-level handle non-SAVECTX exception or trap.
// Any concurrent SAVECTX will be handled upon re-entry once halted.
// Check non-maskable exceptions. memory_violation, illegal_instruction
// and xnack_error exceptions always cause the wave to enter the trap
// handler.
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
// Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
// Maskable exceptions only cause the wave to enter the trap handler if
// their respective bit in mode.excp_en is set.
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
s_cbranch_scc0 L_CHECK_TRAP_ID
s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
s_cbranch_scc0 L_NOT_ADDR_WATCH
s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
L_NOT_ADDR_WATCH:
s_getreg_b32 ttmp3, hwreg(HW_REG_MODE)
s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
s_and_b32 ttmp2, ttmp2, ttmp3
s_cbranch_scc1 L_FETCH_2ND_TRAP
L_CHECK_TRAP_ID:
// Check trap_id != 0
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
if SINGLE_STEP_MISSED_WORKAROUND
// Prioritize single step exception over context save.
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
end
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
s_cbranch_scc1 L_SAVE
L_FETCH_2ND_TRAP:
#if ASIC_TARGET_NAVI1X
// Preserve and clear scalar XNACK state before issuing scalar loads.
// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
// unused space ttmp11[31:24].
s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK)
s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS)
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
s_or_b32 ttmp11, ttmp11, ttmp3
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_or_b32 ttmp11, ttmp11, ttmp3
s_andn2_b32 ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
#if HAVE_XNACK
save_and_clear_ib_sts(ttmp14, ttmp15)
#endif
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
// ttmp12 holds SQ_WAVE_STATUS
#if HAVE_SENDMSG_RTN
s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
s_waitcnt lgkmcnt(0)
#else
s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO)
s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI)
#endif
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
s_waitcnt lgkmcnt(0)
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
s_or_b32 ttmp11, ttmp11, ttmp2
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
s_waitcnt lgkmcnt(0)
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
s_waitcnt lgkmcnt(0)
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
L_NO_NEXT_TRAP:
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK
s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
s_addc_u32 ttmp1, ttmp1, 0
L_EXCP_CASE:
// If not caused by trap then halt wave to prevent re-entry.
s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
s_cbranch_scc1 L_TRAP_CASE
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
// If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
// Rewind the PC to prevent this from occurring.
s_sub_u32 ttmp0, ttmp0, 0x8
s_subb_u32 ttmp1, ttmp1, 0x0
s_branch L_EXIT_TRAP
L_TRAP_CASE:
// Host trap will not cause trap re-entry.
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
s_cbranch_scc1 L_EXIT_TRAP
// Advance past trap instruction to prevent re-entry.
s_add_u32 ttmp0, ttmp0, 0x4
s_addc_u32 ttmp1, ttmp1, 0x0
L_EXIT_TRAP:
s_and_b32 ttmp1, ttmp1, 0xFFFF
#if ASIC_TARGET_NAVI1X
// Restore SQ_WAVE_IB_STS.
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
s_or_b32 ttmp2, ttmp2, ttmp3
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
#if HAVE_XNACK
restore_ib_sts(ttmp14, ttmp15)
#endif
// Restore SQ_WAVE_STATUS.
@ -271,20 +317,8 @@ L_SAVE:
s_mov_b32 s_save_tmp, 0
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
#if ASIC_TARGET_NAVI1X
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE)
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT, SQ_WAVE_IB_STS_REPLAY_W64H_SIZE)
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY and REPLAY_W64H in IB_STS
s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
#if HAVE_XNACK
save_and_clear_ib_sts(s_save_tmp, s_save_trapsts)
#endif
/* inform SPI the readiness and wait for SPI's go signal */
@ -292,9 +326,13 @@ L_SAVE:
s_mov_b32 s_save_exec_hi, exec_hi
s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
#if HAVE_SENDMSG_RTN
s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
#else
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
#endif
#if ASIC_TARGET_NAVI1X
#if ASIC_FAMILY < CHIP_SIENNA_CICHLID
L_SLEEP:
// sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause
// SQ hang, since the 7,8th wave could not get arbit to exec inst, while
@ -305,16 +343,57 @@ L_SLEEP:
s_waitcnt lgkmcnt(0)
#endif
// Save first_wave flag so we can clear high bits of save address.
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
#if NO_SQC_STORE
// Trap temporaries must be saved via VGPR but all VGPRs are in use.
// There is no ttmp space to hold the resource constant for VGPR save.
// Save v0 by itself since it requires only two SGPRs.
s_mov_b32 s_save_ttmps_lo, exec_lo
s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
s_mov_b32 exec_lo, 0xFFFFFFFF
s_mov_b32 exec_hi, 0xFFFFFFFF
global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] slc:1 glc:1
v_mov_b32 v0, 0x0
s_mov_b32 exec_lo, s_save_ttmps_lo
s_mov_b32 exec_hi, s_save_ttmps_hi
#endif
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
// ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
get_wave_size(s_save_ttmps_hi)
get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
get_svgpr_size_bytes(s_save_ttmps_hi)
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
#if ASIC_TARGET_NAVI1X
#if NO_SQC_STORE
v_writelane_b32 v0, ttmp4, 0x4
v_writelane_b32 v0, ttmp5, 0x5
v_writelane_b32 v0, ttmp6, 0x6
v_writelane_b32 v0, ttmp7, 0x7
v_writelane_b32 v0, ttmp8, 0x8
v_writelane_b32 v0, ttmp9, 0x9
v_writelane_b32 v0, ttmp10, 0xA
v_writelane_b32 v0, ttmp11, 0xB
v_writelane_b32 v0, ttmp13, 0xD
v_writelane_b32 v0, exec_lo, 0xE
v_writelane_b32 v0, exec_hi, 0xF
s_mov_b32 exec_lo, 0x3FFF
s_mov_b32 exec_hi, 0x0
global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 slc:1 glc:1
v_readlane_b32 ttmp14, v0, 0xE
v_readlane_b32 ttmp15, v0, 0xF
s_mov_b32 exec_lo, ttmp14
s_mov_b32 exec_hi, ttmp15
#else
s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
@ -326,12 +405,6 @@ L_SLEEP:
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
s_mov_b32 s_save_m0, m0
@ -339,7 +412,7 @@ L_SLEEP:
s_mov_b32 s_save_mem_offset, 0x0
get_wave_size(s_wave_size)
#if ASIC_TARGET_NAVI1X
#if HAVE_XNACK
// Save and clear vector XNACK state late to free up SGPRs.
s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
s_setreg_imm32_b32 hwreg(HW_REG_SHADER_XNACK_MASK), 0x0
@ -361,7 +434,9 @@ L_SAVE_4VGPR_WAVE32:
// VGPR Allocated in 4-GPR granularity
#if !NO_SQC_STORE
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
#endif
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
@ -372,7 +447,9 @@ L_SAVE_4VGPR_WAVE64:
// VGPR Allocated in 4-GPR granularity
#if !NO_SQC_STORE
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
#endif
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
@ -397,7 +474,8 @@ L_SAVE_HWREG:
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
@ -418,9 +496,13 @@ L_SAVE_HWREG:
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
#if NO_SQC_STORE
// Write HWREG/SGPRs with 32 VGPR lanes, wave32 is common case.
// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
s_mov_b32 exec_lo, 0xFFFF
s_mov_b32 exec_hi, 0x0
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
// Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
s_mov_b32 exec_lo, 0xFFFFFFFF
#endif
/* save SGPRs */
@ -506,7 +588,7 @@ L_SAVE_LDS_NORMAL:
s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
s_barrier //LDS is used? wait for other waves in the same TG
s_and_b32 s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
s_cbranch_scc0 L_SAVE_LDS_DONE
// first wave do LDS save;
@ -628,7 +710,7 @@ L_SAVE_VGPR_WAVE64:
// VGPR store using dw burst
s_mov_b32 m0, 0x4 //VGPR initial index value =4
s_cmp_lt_u32 m0, s_save_alloc_size
s_cbranch_scc0 L_SAVE_VGPR_END
s_cbranch_scc0 L_SAVE_SHARED_VGPR
L_SAVE_VGPR_W64_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
@ -646,6 +728,7 @@ L_SAVE_VGPR_W64_LOOP:
s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete?
L_SAVE_SHARED_VGPR:
//Below part will be the save shared vgpr part (new for gfx10)
s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
@ -674,12 +757,7 @@ L_RESTORE:
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
//determine it is wave32 or wave64
get_wave_size(s_restore_size)
@ -722,7 +800,13 @@ L_RESTORE_LDS_NORMAL:
s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64
L_RESTORE_LDS_LOOP_W32:
#if HAVE_BUFFER_LDS_LOAD
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
#else
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
s_waitcnt vmcnt(0)
ds_store_addtid_b32 v0
#endif
s_add_u32 m0, m0, 128 // 128 DW
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
@ -730,7 +814,13 @@ L_RESTORE_LDS_LOOP_W32:
s_branch L_RESTORE_VGPR
L_RESTORE_LDS_LOOP_W64:
#if HAVE_BUFFER_LDS_LOAD
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
#else
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
s_waitcnt vmcnt(0)
ds_store_addtid_b32 v0
#endif
s_add_u32 m0, m0, 256 // 256 DW
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
@ -765,6 +855,8 @@ L_RESTORE_VGPR_NORMAL:
s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4
s_mov_b32 m0, 4 //VGPR initial index value = 4
s_cmp_lt_u32 m0, s_restore_alloc_size
s_cbranch_scc0 L_RESTORE_SGPR
L_RESTORE_VGPR_WAVE32_LOOP:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
@ -786,6 +878,7 @@ L_RESTORE_VGPR_WAVE32_LOOP:
buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128
buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2
buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3
s_waitcnt vmcnt(0)
s_branch L_RESTORE_SGPR
@ -796,6 +889,8 @@ L_RESTORE_VGPR_WAVE64:
s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
s_mov_b32 m0, 4 //VGPR initial index value = 4
s_cmp_lt_u32 m0, s_restore_alloc_size
s_cbranch_scc0 L_RESTORE_SHARED_VGPR
L_RESTORE_VGPR_WAVE64_LOOP:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
@ -812,6 +907,7 @@ L_RESTORE_VGPR_WAVE64_LOOP:
s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
L_RESTORE_SHARED_VGPR:
//Below part will be the restore shared vgpr part (new for gfx10)
s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size
s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
@ -935,7 +1031,7 @@ L_RESTORE_HWREG:
s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
#if ASIC_TARGET_NAVI1X
#if HAVE_XNACK
s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
#endif
@ -945,8 +1041,10 @@ L_RESTORE_HWREG:
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
// ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
get_svgpr_size_bytes(s_restore_ttmps_hi)
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
@ -956,24 +1054,8 @@ L_RESTORE_HWREG:
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
s_waitcnt lgkmcnt(0)
#if ASIC_TARGET_NAVI1X
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
s_mov_b32 s_restore_tmp, 0x0
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_REPLAY_W64H_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
#if HAVE_XNACK
restore_ib_sts(s_restore_tmp, s_restore_m0)
#endif
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
@ -1089,5 +1171,29 @@ end
function get_wave_size(s_reg)
s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
s_or_b32 s_reg, s_save_spi_init_hi, s_reg //share with exec_hi, it's at bit25
end
function save_and_clear_ib_sts(tmp1, tmp2)
// Preserve and clear scalar XNACK state before issuing scalar loads.
// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
// unused space ttmp11[31:24].
s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK)
s_getreg_b32 tmp1, hwreg(HW_REG_IB_STS)
s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
s_or_b32 ttmp11, ttmp11, tmp2
s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_or_b32 ttmp11, ttmp11, tmp2
s_andn2_b32 tmp1, tmp1, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1
end
function restore_ib_sts(tmp1, tmp2)
s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
s_and_b32 tmp1, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
s_or_b32 tmp1, tmp1, tmp2
s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1
end

View File

@ -46,8 +46,6 @@ var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN
/**************************************************************************/
/* variables */
/**************************************************************************/
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
@ -56,6 +54,7 @@ var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
@ -72,8 +71,10 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
#endif
var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask
var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80
var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7
var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
@ -83,37 +84,30 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000
var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000
var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12
var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19
var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data
var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000
var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
/* Save */
var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes
var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
var S_SAVE_SPI_INIT_ATC_SHIFT = 27
var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
var S_SAVE_PC_HI_HT_MASK = 0x01000000
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
var s_save_spi_init_lo = exec_lo
var s_save_spi_init_hi = exec_hi
@ -140,18 +134,9 @@ var s_save_ttmps_hi = s_save_trapsts //no conflict
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
var s_restore_spi_init_lo = exec_lo
var s_restore_spi_init_hi = exec_hi
@ -199,71 +184,77 @@ L_JUMP_TO_RESTORE:
L_SKIP_RESTORE:
s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save
if SINGLE_STEP_MISSED_WORKAROUND
// No single step exceptions if MODE.DEBUG_EN=0.
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
// Second-level trap already handled exception if STATUS.HALT=1.
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
// Prioritize single step exception over context save.
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
s_cbranch_scc0 L_FETCH_2ND_TRAP
L_NO_SINGLE_STEP_WORKAROUND:
end
// Clear SPI_PRIO: do not save with elevated priority.
// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
s_cbranch_scc0 L_NOT_HALTED
L_HALTED:
// Host trap may occur while wave is halted.
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
L_CHECK_SAVE:
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
s_cbranch_scc1 L_SAVE //this is the operation for save
// ********* Handle non-CWSR traps *******************
// Illegal instruction is a non-maskable exception which blocks context save.
// Halt the wavefront and return from the trap.
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
s_cbranch_scc1 L_HALT_WAVE
// If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA.
// Instead, halt the wavefront and return from the trap.
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
s_cbranch_scc0 L_FETCH_2ND_TRAP
L_HALT_WAVE:
// If STATUS.HALT is set then this fault must come from SQC instruction fetch.
// We cannot prevent further faults. Spin wait until context saved.
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
s_cbranch_scc0 L_NOT_ALREADY_HALTED
L_WAIT_CTX_SAVE:
// Wave is halted but neither host trap nor SAVECTX is raised.
// Caused by instruction fetch memory violation.
// Spin wait until context saved to prevent interrupt storm.
s_sleep 0x10
s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
s_cbranch_scc0 L_WAIT_CTX_SAVE
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_branch L_CHECK_SAVE
L_NOT_ALREADY_HALTED:
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
L_NOT_HALTED:
// Let second-level handle non-SAVECTX exception or trap.
// Any concurrent SAVECTX will be handled upon re-entry once halted.
// If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
// Rewind the PC to prevent this from occurring. The debugger compensates for this.
s_sub_u32 ttmp0, ttmp0, 0x8
s_subb_u32 ttmp1, ttmp1, 0x0
// Check non-maskable exceptions. memory_violation, illegal_instruction
// and xnack_error exceptions always cause the wave to enter the trap
// handler.
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
// Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
// Maskable exceptions only cause the wave to enter the trap handler if
// their respective bit in mode.excp_en is set.
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
s_cbranch_scc0 L_CHECK_TRAP_ID
s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
s_cbranch_scc0 L_NOT_ADDR_WATCH
s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
L_NOT_ADDR_WATCH:
s_getreg_b32 ttmp3, hwreg(HW_REG_MODE)
s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
s_and_b32 ttmp2, ttmp2, ttmp3
s_cbranch_scc1 L_FETCH_2ND_TRAP
L_CHECK_TRAP_ID:
// Check trap_id != 0
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
if SINGLE_STEP_MISSED_WORKAROUND
// Prioritize single step exception over context save.
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
end
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
s_cbranch_scc1 L_SAVE
L_FETCH_2ND_TRAP:
// Preserve and clear scalar XNACK state before issuing scalar reads.
// Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS)
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
s_or_b32 ttmp11, ttmp11, ttmp3
s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
save_and_clear_ib_sts(ttmp14)
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
@ -271,27 +262,48 @@ L_FETCH_2ND_TRAP:
s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
s_waitcnt lgkmcnt(0)
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
s_or_b32 ttmp11, ttmp11, ttmp2
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
s_waitcnt lgkmcnt(0)
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
s_waitcnt lgkmcnt(0)
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
L_NO_NEXT_TRAP:
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
s_addc_u32 ttmp1, ttmp1, 0
L_EXCP_CASE:
// If not caused by trap then halt wave to prevent re-entry.
s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
s_cbranch_scc1 L_TRAP_CASE
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
// If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
// Rewind the PC to prevent this from occurring.
s_sub_u32 ttmp0, ttmp0, 0x8
s_subb_u32 ttmp1, ttmp1, 0x0
s_branch L_EXIT_TRAP
L_TRAP_CASE:
// Host trap will not cause trap re-entry.
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
s_cbranch_scc1 L_EXIT_TRAP
// Advance past trap instruction to prevent re-entry.
s_add_u32 ttmp0, ttmp0, 0x4
s_addc_u32 ttmp1, ttmp1, 0x0
L_EXIT_TRAP:
s_and_b32 ttmp1, ttmp1, 0xFFFF
// Restore SQ_WAVE_IB_STS.
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
restore_ib_sts(ttmp14)
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
@ -312,16 +324,7 @@ L_SAVE:
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS
s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
save_and_clear_ib_sts(s_save_tmp)
/* inform SPI the readiness and wait for SPI's go signal */
s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
@ -360,12 +363,6 @@ L_SAVE:
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
//FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?)
s_mov_b32 s_save_m0, m0 //save M0
@ -690,12 +687,6 @@ L_RESTORE:
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
/* global mem offset */
// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0
@ -889,19 +880,7 @@ L_RESTORE:
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
s_waitcnt lgkmcnt(0)
//reuse s_restore_m0 as a temp register
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
restore_ib_sts(s_restore_tmp)
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
@ -910,8 +889,7 @@ L_RESTORE:
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
/**************************************************************************/
@ -1078,3 +1056,19 @@ function set_status_without_spi_prio(status, tmp)
s_nop 0x2 // avoid S_SETREG => S_SETREG hazard
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
end
function save_and_clear_ib_sts(tmp)
// Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
s_getreg_b32 tmp, hwreg(HW_REG_IB_STS)
s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_lshl_b32 tmp, tmp, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
s_or_b32 ttmp11, ttmp11, tmp
s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0x0
end
function restore_ib_sts(tmp)
s_lshr_b32 tmp, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_setreg_b32 hwreg(HW_REG_IB_STS), tmp
end

View File

@ -441,10 +441,14 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_nv1x_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
} else {
} else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx10_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
} else {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx11_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
}
kfd->cwsr_enabled = true;

View File

@ -531,7 +531,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0;
bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO;
bp.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
bp.type = ttm_bo_type_device;
bp.resv = NULL;

View File

@ -1271,6 +1271,12 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
if (!peer_dev)
continue;
/* Include the CPU peer in GPU hive if connected over xGMI. */
if (!peer_dev->gpu && !peer_dev->node_props.hive_id &&
dev->node_props.hive_id &&
dev->gpu->adev->gmc.xgmi.connected_to_cpu)
peer_dev->node_props.hive_id = dev->node_props.hive_id;
list_for_each_entry(inbound_link, &peer_dev->io_link_props,
list) {
if (inbound_link->node_to != link->node_from)
@ -1302,22 +1308,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
/* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */
if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) {
struct kfd_topology_device *top_dev;
down_read(&topology_lock);
list_for_each_entry(top_dev, &topology_device_list, list) {
if (top_dev->gpu)
break;
top_dev->node_props.hive_id = gpu->hive_id;
}
up_read(&topology_lock);
}
/* Check to see if this gpu device exists in the topology_device_list.
* If so, assign the gpu to that device,
* else create a Virtual CRAT for this gpu device and then parse that

View File

@ -769,7 +769,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params)
do {
dc_stat_get_dmub_notification(adev->dm.dc, &notify);
if (notify.type > ARRAY_SIZE(dm->dmub_thread_offload)) {
if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) {
DRM_ERROR("DM: notify type %d invalid!", notify.type);
continue;
}
@ -5381,17 +5381,19 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
static void
fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
bool *per_pixel_alpha, bool *global_alpha,
int *global_alpha_value)
bool *per_pixel_alpha, bool *pre_multiplied_alpha,
bool *global_alpha, int *global_alpha_value)
{
*per_pixel_alpha = false;
*pre_multiplied_alpha = true;
*global_alpha = false;
*global_alpha_value = 0xff;
if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY)
return;
if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) {
if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ||
plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) {
static const uint32_t alpha_formats[] = {
DRM_FORMAT_ARGB8888,
DRM_FORMAT_RGBA8888,
@ -5406,6 +5408,9 @@ fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
break;
}
}
if (per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE)
*pre_multiplied_alpha = false;
}
if (plane_state->alpha < 0xffff) {
@ -5568,7 +5573,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
return ret;
fill_blending_from_plane_state(
plane_state, &plane_info->per_pixel_alpha,
plane_state, &plane_info->per_pixel_alpha, &plane_info->pre_multiplied_alpha,
&plane_info->global_alpha, &plane_info->global_alpha_value);
return 0;
@ -5615,6 +5620,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
dc_plane_state->tiling_info = plane_info.tiling_info;
dc_plane_state->visible = plane_info.visible;
dc_plane_state->per_pixel_alpha = plane_info.per_pixel_alpha;
dc_plane_state->pre_multiplied_alpha = plane_info.pre_multiplied_alpha;
dc_plane_state->global_alpha = plane_info.global_alpha;
dc_plane_state->global_alpha_value = plane_info.global_alpha_value;
dc_plane_state->dcc = plane_info.dcc;
@ -7911,7 +7917,8 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
if (plane->type == DRM_PLANE_TYPE_OVERLAY &&
plane_cap && plane_cap->per_pixel_alpha) {
unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) |
BIT(DRM_MODE_BLEND_PREMULTI);
BIT(DRM_MODE_BLEND_PREMULTI) |
BIT(DRM_MODE_BLEND_COVERAGE);
drm_plane_create_alpha_property(plane);
drm_plane_create_blend_mode_property(plane, blend_caps);

View File

@ -122,7 +122,7 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
dpp_inst = clk_mgr->base.ctx->dc->res_pool->dpps[i]->inst;
dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[dpp_inst];
if (safe_to_lower || prev_dppclk_khz < dppclk_khz)
clk_mgr->dccg->funcs->update_dpp_dto(

View File

@ -91,7 +91,8 @@ static void dcn315_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable)
if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
dc_is_virtual_signal(pipe->stream->signal))) {
if (disable)
pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
else

View File

@ -122,7 +122,8 @@ static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable)
if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
dc_is_virtual_signal(pipe->stream->signal))) {
if (disable)
pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
else

View File

@ -2901,14 +2901,15 @@ static void commit_planes_for_stream(struct dc *dc,
top_pipe_to_program->stream_res.tg);
}
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
dc->hwss.interdependent_update_lock(dc, context, true);
else
} else {
/* Lock the top pipe while updating plane addrs, since freesync requires
* plane addr update event triggers to be synchronized.
* top_pipe_to_program is expected to never be NULL
*/
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, true);
}
// Stream updates
if (stream_update)
@ -2924,10 +2925,11 @@ static void commit_planes_for_stream(struct dc *dc,
if (dc->hwss.program_front_end_for_ctx)
dc->hwss.program_front_end_for_ctx(dc, context);
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
dc->hwss.interdependent_update_lock(dc, context, false);
else
} else {
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
}
dc->hwss.post_unlock_program_front_end(dc, context);
return;
}
@ -3052,10 +3054,11 @@ static void commit_planes_for_stream(struct dc *dc,
}
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
dc->hwss.interdependent_update_lock(dc, context, false);
else
} else {
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
}
if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {

View File

@ -33,6 +33,7 @@
#include "gpio_service_interface.h"
#include "core_status.h"
#include "dc_link_dp.h"
#include "dc_link_dpia.h"
#include "dc_link_ddc.h"
#include "link_hwss.h"
#include "opp.h"
@ -240,7 +241,7 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
/* Link may not have physical HPD pin. */
if (link->ep_type != DISPLAY_ENDPOINT_PHY) {
if (link->is_hpd_pending || !link->hpd_status)
if (link->is_hpd_pending || !dc_link_dpia_query_hpd_status(link))
*type = dc_connection_none;
else
*type = dc_connection_single;
@ -1604,8 +1605,25 @@ static bool dc_link_construct_legacy(struct dc_link *link,
if (link->hpd_gpio) {
if (!link->dc->config.allow_edp_hotplug_detection)
link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
link->irq_source_hpd_rx =
dal_irq_get_rx_source(link->hpd_gpio);
switch (link->dc->config.allow_edp_hotplug_detection) {
case 1: // only the 1st eDP handles hotplug
if (link->link_index == 0)
link->irq_source_hpd_rx =
dal_irq_get_rx_source(link->hpd_gpio);
else
link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
break;
case 2: // only the 2nd eDP handles hotplug
if (link->link_index == 1)
link->irq_source_hpd_rx =
dal_irq_get_rx_source(link->hpd_gpio);
else
link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
break;
default:
break;
}
}
break;

View File

@ -2783,31 +2783,37 @@ bool perform_link_training_with_retries(
struct dc_link *link = stream->link;
enum dp_panel_mode panel_mode = dp_get_panel_mode(link);
enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
struct dc_link_settings current_setting = *link_setting;
struct dc_link_settings cur_link_settings = *link_setting;
const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
int fail_count = 0;
bool is_link_bw_low = false; /* link bandwidth < stream bandwidth */
bool is_link_bw_min = /* RBR x 1 */
(cur_link_settings.link_rate <= LINK_RATE_LOW) &&
(cur_link_settings.lane_count <= LANE_COUNT_ONE);
dp_trace_commit_lt_init(link);
if (dp_get_link_encoding_format(&current_setting) == DP_8b_10b_ENCODING)
if (dp_get_link_encoding_format(&cur_link_settings) == DP_8b_10b_ENCODING)
/* We need to do this before the link training to ensure the idle
* pattern in SST mode will be sent right after the link training
*/
link_hwss->setup_stream_encoder(pipe_ctx);
dp_trace_set_lt_start_timestamp(link, false);
for (j = 0; j < attempts; ++j) {
j = 0;
while (j < attempts && fail_count < (attempts * 10)) {
DC_LOG_HW_LINK_TRAINING("%s: Beginning link training attempt %u of %d\n",
__func__, (unsigned int)j + 1, attempts);
DC_LOG_HW_LINK_TRAINING("%s: Beginning link training attempt %u of %d @ rate(%d) x lane(%d)\n",
__func__, (unsigned int)j + 1, attempts, cur_link_settings.link_rate,
cur_link_settings.lane_count);
dp_enable_link_phy(
link,
&pipe_ctx->link_res,
signal,
pipe_ctx->clock_source->id,
&current_setting);
&cur_link_settings);
if (stream->sink_patches.dppowerup_delay > 0) {
int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay;
@ -2832,30 +2838,30 @@ bool perform_link_training_with_retries(
dp_set_panel_mode(link, panel_mode);
if (link->aux_access_disabled) {
dc_link_dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &current_setting);
dc_link_dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &cur_link_settings);
return true;
} else {
/** @todo Consolidate USB4 DP and DPx.x training. */
if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
status = dc_link_dpia_perform_link_training(link,
&pipe_ctx->link_res,
&current_setting,
&cur_link_settings,
skip_video_pattern);
/* Transmit idle pattern once training successful. */
if (status == LINK_TRAINING_SUCCESS)
if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low)
dp_set_hw_test_pattern(link, &pipe_ctx->link_res, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0);
} else {
status = dc_link_dp_perform_link_training(link,
&pipe_ctx->link_res,
&current_setting,
&cur_link_settings,
skip_video_pattern);
}
dp_trace_lt_total_count_increment(link, false);
dp_trace_lt_result_update(link, status, false);
dp_trace_set_lt_end_timestamp(link, false);
if (status == LINK_TRAINING_SUCCESS)
if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low)
return true;
}
@ -2866,8 +2872,9 @@ bool perform_link_training_with_retries(
if (j == (attempts - 1) && link->ep_type == DISPLAY_ENDPOINT_PHY)
break;
DC_LOG_WARNING("%s: Link training attempt %u of %d failed\n",
__func__, (unsigned int)j + 1, attempts);
DC_LOG_WARNING("%s: Link training attempt %u of %d failed @ rate(%d) x lane(%d)\n",
__func__, (unsigned int)j + 1, attempts, cur_link_settings.link_rate,
cur_link_settings.lane_count);
dp_disable_link_phy(link, &pipe_ctx->link_res, signal);
@ -2876,27 +2883,49 @@ bool perform_link_training_with_retries(
enum dc_connection_type type = dc_connection_none;
dc_link_detect_sink(link, &type);
if (type == dc_connection_none)
if (type == dc_connection_none) {
DC_LOG_HW_LINK_TRAINING("%s: Aborting training because sink unplugged\n", __func__);
break;
} else if (do_fallback) {
}
}
/* Try to train again at original settings if:
* - not falling back between training attempts;
* - aborted previous attempt due to reasons other than sink unplug;
* - successfully trained but at a link rate lower than that required by stream;
* - reached minimum link bandwidth.
*/
if (!do_fallback || (status == LINK_TRAINING_ABORT) ||
(status == LINK_TRAINING_SUCCESS && is_link_bw_low) ||
is_link_bw_min) {
j++;
cur_link_settings = *link_setting;
delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
is_link_bw_low = false;
is_link_bw_min = (cur_link_settings.link_rate <= LINK_RATE_LOW) &&
(cur_link_settings.lane_count <= LANE_COUNT_ONE);
} else if (do_fallback) { /* Try training at lower link bandwidth if doing fallback. */
uint32_t req_bw;
uint32_t link_bw;
decide_fallback_link_setting(link, *link_setting, &current_setting, status);
/* Fail link training if reduced link bandwidth no longer meets
* stream requirements.
decide_fallback_link_setting(link, *link_setting, &cur_link_settings, status);
/* Flag if reduced link bandwidth no longer meets stream requirements or fallen back to
* minimum link bandwidth.
*/
req_bw = dc_bandwidth_in_kbps_from_timing(&stream->timing);
link_bw = dc_link_bandwidth_kbps(link, &current_setting);
if (req_bw > link_bw)
break;
link_bw = dc_link_bandwidth_kbps(link, &cur_link_settings);
is_link_bw_low = (req_bw > link_bw);
is_link_bw_min = ((cur_link_settings.link_rate <= LINK_RATE_LOW) &&
(cur_link_settings.lane_count <= LANE_COUNT_ONE));
if (is_link_bw_low)
DC_LOG_WARNING("%s: Link bandwidth too low after fallback req_bw(%d) > link_bw(%d)\n",
__func__, req_bw, link_bw);
}
msleep(delay_between_attempts);
delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
}
return false;
}
@ -5097,13 +5126,16 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link)
return true;
}
void dp_retrieve_lttpr_cap(struct dc_link *link)
bool dp_retrieve_lttpr_cap(struct dc_link *link)
{
uint8_t lttpr_dpcd_data[8];
bool allow_lttpr_non_transparent_mode = 0;
bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable;
bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware;
enum dc_status status = DC_ERROR_UNEXPECTED;
bool is_lttpr_present = false;
memset(link->lttpr_dpcd_data, '\0', sizeof(link->lttpr_dpcd_data));
memset(lttpr_dpcd_data, '\0', sizeof(lttpr_dpcd_data));
if ((link->dc->config.allow_lttpr_non_transparent_mode.bits.DP2_0 &&
link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)) {
@ -5113,116 +5145,82 @@ void dp_retrieve_lttpr_cap(struct dc_link *link)
allow_lttpr_non_transparent_mode = 1;
}
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
link->lttpr_support = LTTPR_UNSUPPORTED;
/*
* Logic to determine LTTPR support
* Logic to determine LTTPR mode
*/
if (vbios_lttpr_interop)
link->lttpr_support = LTTPR_SUPPORTED;
else if (link->dc->config.allow_lttpr_non_transparent_mode.raw == 0
|| !link->dc->caps.extended_aux_timeout_support)
link->lttpr_support = LTTPR_UNSUPPORTED;
else
link->lttpr_support = LTTPR_CHECK_EXT_SUPPORT;
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
if (vbios_lttpr_enable && vbios_lttpr_interop)
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
else if (!vbios_lttpr_enable && vbios_lttpr_interop) {
if (allow_lttpr_non_transparent_mode)
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
else
link->lttpr_mode = LTTPR_MODE_TRANSPARENT;
} else if (!vbios_lttpr_enable && !vbios_lttpr_interop) {
if (!allow_lttpr_non_transparent_mode || !link->dc->caps.extended_aux_timeout_support)
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
else
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
}
#if defined(CONFIG_DRM_AMD_DC_DCN)
/* Check DP tunnel LTTPR mode debug option. */
if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
link->dc->debug.dpia_debug.bits.force_non_lttpr)
link->lttpr_support = LTTPR_UNSUPPORTED;
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
#endif
if (link->lttpr_support > LTTPR_UNSUPPORTED) {
if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT || link->lttpr_mode == LTTPR_MODE_TRANSPARENT) {
/* By reading LTTPR capability, RX assumes that we will enable
* LTTPR extended aux timeout if LTTPR is present.
*/
status = core_link_read_dpcd(
link,
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV,
link->lttpr_dpcd_data,
sizeof(link->lttpr_dpcd_data));
lttpr_dpcd_data,
sizeof(lttpr_dpcd_data));
link->dpcd_caps.lttpr_caps.revision.raw =
lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.max_link_rate =
lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.phy_repeater_cnt =
lttpr_dpcd_data[DP_PHY_REPEATER_CNT -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.max_lane_count =
lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.mode =
lttpr_dpcd_data[DP_PHY_REPEATER_MODE -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.max_ext_timeout =
lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.main_link_channel_coding.raw =
lttpr_dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.raw =
lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
/* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
is_lttpr_present = (link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);
if (is_lttpr_present) {
CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: ");
configure_lttpr_mode_transparent(link);
} else
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
}
}
bool dp_parse_lttpr_mode(struct dc_link *link)
{
bool dpcd_allow_lttpr_non_transparent_mode = false;
bool is_lttpr_present = false;
bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable;
if ((link->dc->config.allow_lttpr_non_transparent_mode.bits.DP2_0 &&
link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)) {
dpcd_allow_lttpr_non_transparent_mode = true;
} else if (link->dc->config.allow_lttpr_non_transparent_mode.bits.DP1_4A &&
!link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
dpcd_allow_lttpr_non_transparent_mode = true;
}
/*
* Logic to determine LTTPR mode
*/
if (link->lttpr_support == LTTPR_SUPPORTED)
if (vbios_lttpr_enable)
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
else if (dpcd_allow_lttpr_non_transparent_mode)
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
else
link->lttpr_mode = LTTPR_MODE_TRANSPARENT;
else // lttpr_support == LTTPR_CHECK_EXT_SUPPORT
if (dpcd_allow_lttpr_non_transparent_mode) {
link->lttpr_support = LTTPR_SUPPORTED;
link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
} else {
link->lttpr_support = LTTPR_UNSUPPORTED;
}
if (link->lttpr_support == LTTPR_UNSUPPORTED)
return false;
link->dpcd_caps.lttpr_caps.revision.raw =
link->lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.max_link_rate =
link->lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.phy_repeater_cnt =
link->lttpr_dpcd_data[DP_PHY_REPEATER_CNT -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.max_lane_count =
link->lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.mode =
link->lttpr_dpcd_data[DP_PHY_REPEATER_MODE -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.max_ext_timeout =
link->lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.main_link_channel_coding.raw =
link->lttpr_dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.raw =
link->lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
/* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
is_lttpr_present = (link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);
if (is_lttpr_present) {
CONN_DATA_DETECT(link, link->lttpr_dpcd_data, sizeof(link->lttpr_dpcd_data), "LTTPR Caps: ");
configure_lttpr_mode_transparent(link);
} else
link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
return is_lttpr_present;
}
@ -5374,8 +5372,7 @@ static bool retrieve_link_cap(struct dc_link *link)
status = wa_try_to_wake_dprx(link, timeout_ms);
}
dp_retrieve_lttpr_cap(link);
is_lttpr_present = dp_retrieve_lttpr_cap(link);
/* Read DP tunneling information. */
status = dpcd_get_tunneling_device_data(link);
@ -5411,9 +5408,6 @@ static bool retrieve_link_cap(struct dc_link *link)
return false;
}
if (link->lttpr_support > LTTPR_UNSUPPORTED)
is_lttpr_present = dp_parse_lttpr_mode(link);
if (!is_lttpr_present)
dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);

View File

@ -34,6 +34,7 @@
#include "dm_helpers.h"
#include "dmub/inc/dmub_cmd.h"
#include "inc/link_dpcd.h"
#include "dc_dmub_srv.h"
#define DC_LOGGER \
link->ctx->logger
@ -69,6 +70,24 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link)
return status;
}
bool dc_link_dpia_query_hpd_status(struct dc_link *link)
{
union dmub_rb_cmd cmd = {0};
struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv;
bool is_hpd_high = false;
/* prepare QUERY_HPD command */
cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE;
cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1;
cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA;
/* Return HPD status reported by DMUB if query successfully executed. */
if (dc_dmub_srv_cmd_with_reply_data(dmub_srv, &cmd) && cmd.query_hpd.data.status == AUX_RET_SUCCESS)
is_hpd_high = cmd.query_hpd.data.result;
return is_hpd_high;
}
/* Configure link as prescribed in link_setting; set LTTPR mode; and
* Initialize link training settings.
* Abort link training if sink unplug detected.

View File

@ -61,6 +61,8 @@ static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *pl
plane_state->blend_tf->type = TF_TYPE_BYPASS;
}
plane_state->pre_multiplied_alpha = true;
}
static void dc_plane_destruct(struct dc_plane_state *plane_state)

View File

@ -47,7 +47,7 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
#define DC_VER "3.2.185"
#define DC_VER "3.2.186"
#define MAX_SURFACES 3
#define MAX_PLANES 6
@ -329,7 +329,7 @@ struct dc_config {
bool disable_dmcu;
bool enable_4to1MPC;
bool enable_windowed_mpo_odm;
bool allow_edp_hotplug_detection;
uint32_t allow_edp_hotplug_detection;
bool clamp_min_dcfclk;
uint64_t vblank_alignment_dto_params;
uint8_t vblank_alignment_max_frame_time_diff;
@ -1011,6 +1011,7 @@ struct dc_plane_state {
bool is_tiling_rotated;
bool per_pixel_alpha;
bool pre_multiplied_alpha;
bool global_alpha;
int global_alpha_value;
bool visible;
@ -1045,6 +1046,7 @@ struct dc_plane_info {
bool horizontal_mirror;
bool visible;
bool per_pixel_alpha;
bool pre_multiplied_alpha;
bool global_alpha;
int global_alpha_value;
bool input_csc_enabled;

View File

@ -129,8 +129,6 @@ struct dc_link {
bool link_state_valid;
bool aux_access_disabled;
bool sync_lt_in_progress;
uint8_t lttpr_dpcd_data[8];
enum lttpr_support lttpr_support;
enum lttpr_mode lttpr_mode;
bool is_internal_display;

View File

@ -87,7 +87,8 @@ static void release_engine(
engine->ddc = NULL;
REG_UPDATE(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1);
REG_UPDATE_2(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1,
AUX_SW_USE_AUX_REG_REQ, 0);
}
#define SW_CAN_ACCESS_AUX 1

View File

@ -1101,9 +1101,12 @@ static bool get_pixel_clk_frequency_100hz(
* not be programmed equal to DPREFCLK
*/
modulo_hz = REG_READ(MODULO[inst]);
*pixel_clk_khz = div_u64((uint64_t)clock_hz*
clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
modulo_hz);
if (modulo_hz)
*pixel_clk_khz = div_u64((uint64_t)clock_hz*
clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
modulo_hz);
else
*pixel_clk_khz = 0;
} else {
/* NOTE: There is agreement with VBIOS here that MODULO is
* programmed equal to DPREFCLK, in which case PHASE will be

View File

@ -2550,12 +2550,21 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
blnd_cfg.overlap_only = false;
blnd_cfg.global_gain = 0xff;
if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
} else if (per_pixel_alpha) {
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
if (per_pixel_alpha) {
/* DCN1.0 has output CM before MPC which seems to screw with
* pre-multiplied alpha.
*/
blnd_cfg.pre_multiplied_alpha = (is_rgb_cspace(
pipe_ctx->stream->output_color_space)
&& pipe_ctx->plane_state->pre_multiplied_alpha);
if (pipe_ctx->plane_state->global_alpha) {
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
} else {
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
}
} else {
blnd_cfg.pre_multiplied_alpha = false;
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
}
@ -2564,14 +2573,6 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
else
blnd_cfg.global_alpha = 0xff;
/* DCN1.0 has output CM before MPC which seems to screw with
* pre-multiplied alpha.
*/
blnd_cfg.pre_multiplied_alpha = is_rgb_cspace(
pipe_ctx->stream->output_color_space)
&& per_pixel_alpha;
/*
* TODO: remove hack
* Note: currently there is a bug in init_hw such that

View File

@ -1773,7 +1773,6 @@ void dcn20_post_unlock_program_front_end(
*/
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable) {
struct hubp *hubp = pipe->plane_res.hubp;
int j = 0;
@ -2346,12 +2345,16 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
blnd_cfg.overlap_only = false;
blnd_cfg.global_gain = 0xff;
if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
} else if (per_pixel_alpha) {
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
if (per_pixel_alpha) {
blnd_cfg.pre_multiplied_alpha = pipe_ctx->plane_state->pre_multiplied_alpha;
if (pipe_ctx->plane_state->global_alpha) {
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
} else {
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
}
} else {
blnd_cfg.pre_multiplied_alpha = false;
blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
}
@ -2365,7 +2368,7 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
blnd_cfg.top_gain = 0x1f000;
blnd_cfg.bottom_inside_gain = 0x1f000;
blnd_cfg.bottom_outside_gain = 0x1f000;
blnd_cfg.pre_multiplied_alpha = per_pixel_alpha;
if (pipe_ctx->plane_state->format
== SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA)
blnd_cfg.pre_multiplied_alpha = false;

View File

@ -28,6 +28,8 @@
#include "dc.h"
#include "dcn_calc_math.h"
#include "dml/dcn30/dcn30_fpu.h"
#define REG(reg)\
optc1->tg_regs->reg
@ -184,6 +186,14 @@ void optc3_set_dsc_config(struct timing_generator *optc,
}
void optc3_set_vrr_m_const(struct timing_generator *optc,
double vtotal_avg)
{
DC_FP_START();
optc3_fpu_set_vrr_m_const(optc, vtotal_avg);
DC_FP_END();
}
void optc3_set_odm_bypass(struct timing_generator *optc,
const struct dc_crtc_timing *dc_crtc_timing)
{

View File

@ -84,6 +84,7 @@
#include "dce/dce_aux.h"
#include "dce/dce_i2c.h"
#include "dml/dcn30/dcn30_fpu.h"
#include "dml/dcn30/display_mode_vba_30.h"
#include "vm_helper.h"
#include "dcn20/dcn20_vmid.h"
@ -91,137 +92,6 @@
#define DC_LOGGER_INIT(logger)
struct _vcs_dpi_ip_params_st dcn3_0_ip = {
.use_min_dcfclk = 0,
.clamp_min_dcfclk = 0,
.odm_capable = 1,
.gpuvm_enable = 0,
.hostvm_enable = 0,
.gpuvm_max_page_table_levels = 4,
.hostvm_max_page_table_levels = 4,
.hostvm_cached_page_table_levels = 0,
.pte_group_size_bytes = 2048,
.num_dsc = 6,
.rob_buffer_size_kbytes = 184,
.det_buffer_size_kbytes = 184,
.dpte_buffer_size_in_pte_reqs_luma = 84,
.pde_proc_buffer_size_64k_reqs = 48,
.dpp_output_buffer_pixels = 2560,
.opp_output_buffer_lines = 1,
.pixel_chunk_size_kbytes = 8,
.pte_enable = 1,
.max_page_table_levels = 2,
.pte_chunk_size_kbytes = 2, // ?
.meta_chunk_size_kbytes = 2,
.writeback_chunk_size_kbytes = 8,
.line_buffer_size_bits = 789504,
.is_line_buffer_bpp_fixed = 0, // ?
.line_buffer_fixed_bpp = 0, // ?
.dcc_supported = true,
.writeback_interface_buffer_size_kbytes = 90,
.writeback_line_buffer_buffer_size = 0,
.max_line_buffer_lines = 12,
.writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
.writeback_chroma_buffer_size_kbytes = 8,
.writeback_chroma_line_buffer_width_pixels = 4,
.writeback_max_hscl_ratio = 1,
.writeback_max_vscl_ratio = 1,
.writeback_min_hscl_ratio = 1,
.writeback_min_vscl_ratio = 1,
.writeback_max_hscl_taps = 1,
.writeback_max_vscl_taps = 1,
.writeback_line_buffer_luma_buffer_size = 0,
.writeback_line_buffer_chroma_buffer_size = 14643,
.cursor_buffer_size = 8,
.cursor_chunk_size = 2,
.max_num_otg = 6,
.max_num_dpp = 6,
.max_num_wb = 1,
.max_dchub_pscl_bw_pix_per_clk = 4,
.max_pscl_lb_bw_pix_per_clk = 2,
.max_lb_vscl_bw_pix_per_clk = 4,
.max_vscl_hscl_bw_pix_per_clk = 4,
.max_hscl_ratio = 6,
.max_vscl_ratio = 6,
.hscl_mults = 4,
.vscl_mults = 4,
.max_hscl_taps = 8,
.max_vscl_taps = 8,
.dispclk_ramp_margin_percent = 1,
.underscan_factor = 1.11,
.min_vblank_lines = 32,
.dppclk_delay_subtotal = 46,
.dynamic_metadata_vm_enabled = true,
.dppclk_delay_scl_lb_only = 16,
.dppclk_delay_scl = 50,
.dppclk_delay_cnvc_formatter = 27,
.dppclk_delay_cnvc_cursor = 6,
.dispclk_delay_subtotal = 119,
.dcfclk_cstate_latency = 5.2, // SRExitTime
.max_inter_dcn_tile_repeaters = 8,
.odm_combine_4to1_supported = true,
.xfc_supported = false,
.xfc_fill_bw_overhead_percent = 10.0,
.xfc_fill_constant_bytes = 0,
.gfx7_compat_tiling_supported = 0,
.number_of_cursors = 1,
};
struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = {
.clock_limits = {
{
.state = 0,
.dispclk_mhz = 562.0,
.dppclk_mhz = 300.0,
.phyclk_mhz = 300.0,
.phyclk_d18_mhz = 667.0,
.dscclk_mhz = 405.6,
},
},
.min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
.num_states = 1,
.sr_exit_time_us = 15.5,
.sr_enter_plus_exit_time_us = 20,
.urgent_latency_us = 4.0,
.urgent_latency_pixel_data_only_us = 4.0,
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
.urgent_latency_vm_data_only_us = 4.0,
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
.max_avg_sdp_bw_use_normal_percent = 60.0,
.max_avg_dram_bw_use_normal_percent = 40.0,
.writeback_latency_us = 12.0,
.max_request_size_bytes = 256,
.fabric_datapath_to_dcn_data_return_bytes = 64,
.dcn_downspread_percent = 0.5,
.downspread_percent = 0.38,
.dram_page_open_time_ns = 50.0,
.dram_rw_turnaround_time_ns = 17.5,
.dram_return_buffer_per_channel_bytes = 8192,
.round_trip_ping_latency_dcfclk_cycles = 191,
.urgent_out_of_order_return_per_channel_bytes = 4096,
.channel_interleave_bytes = 256,
.num_banks = 8,
.gpuvm_min_page_size_bytes = 4096,
.hostvm_min_page_size_bytes = 4096,
.dram_clock_change_latency_us = 404,
.dummy_pstate_latency_us = 5,
.writeback_dram_clock_change_latency_us = 23.0,
.return_bus_width_bytes = 64,
.dispclk_dppclk_vco_speed_mhz = 3650,
.xfc_bus_transport_time_us = 20, // ?
.xfc_xbuf_latency_tolerance_us = 4, // ?
.use_urgent_burst_bw = 1, // ?
.do_urgent_latency_adjustment = true,
.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
};
enum dcn30_clk_src_array_id {
DCN30_CLK_SRC_PLL0,
DCN30_CLK_SRC_PLL1,
@ -1480,90 +1350,9 @@ int dcn30_populate_dml_pipes_from_context(
void dcn30_populate_dml_writeback_from_context(
struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
{
int pipe_cnt, i, j;
double max_calc_writeback_dispclk;
double writeback_dispclk;
struct writeback_st dout_wb;
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
if (!stream)
continue;
max_calc_writeback_dispclk = 0;
/* Set writeback information */
pipes[pipe_cnt].dout.wb_enable = 0;
pipes[pipe_cnt].dout.num_active_wb = 0;
for (j = 0; j < stream->num_wb_info; j++) {
struct dc_writeback_info *wb_info = &stream->writeback_info[j];
if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
(wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
pipes[pipe_cnt].dout.wb_enable = 1;
pipes[pipe_cnt].dout.num_active_wb++;
dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
wb_info->dwb_params.cnv_params.crop_height :
wb_info->dwb_params.cnv_params.src_height;
dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
wb_info->dwb_params.cnv_params.crop_width :
wb_info->dwb_params.cnv_params.src_width;
dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
/* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */
if (dc->dml.ip.writeback_max_hscl_taps > 1) {
dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
} else {
dout_wb.wb_htaps_luma = 1;
dout_wb.wb_vtaps_luma = 1;
}
dout_wb.wb_htaps_chroma = 0;
dout_wb.wb_vtaps_chroma = 0;
dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
(double)wb_info->dwb_params.cnv_params.crop_width /
(double)wb_info->dwb_params.dest_width :
(double)wb_info->dwb_params.cnv_params.src_width /
(double)wb_info->dwb_params.dest_width;
dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
(double)wb_info->dwb_params.cnv_params.crop_height /
(double)wb_info->dwb_params.dest_height :
(double)wb_info->dwb_params.cnv_params.src_height /
(double)wb_info->dwb_params.dest_height;
if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB ||
wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA)
dout_wb.wb_pixel_format = dm_444_64;
else
dout_wb.wb_pixel_format = dm_444_32;
/* Workaround for cases where multiple writebacks are connected to same plane
* In which case, need to compute worst case and set the associated writeback parameters
* This workaround is necessary due to DML computation assuming only 1 set of writeback
* parameters per pipe
*/
writeback_dispclk = dml30_CalculateWriteBackDISPCLK(
dout_wb.wb_pixel_format,
pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
dout_wb.wb_hratio,
dout_wb.wb_vratio,
dout_wb.wb_htaps_luma,
dout_wb.wb_vtaps_luma,
dout_wb.wb_src_width,
dout_wb.wb_dst_width,
pipes[pipe_cnt].pipe.dest.htotal,
dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size);
if (writeback_dispclk > max_calc_writeback_dispclk) {
max_calc_writeback_dispclk = writeback_dispclk;
pipes[pipe_cnt].dout.wb = dout_wb;
}
}
}
pipe_cnt++;
}
DC_FP_START();
dcn30_fpu_populate_dml_writeback_from_context(dc, res_ctx, pipes);
DC_FP_END();
}
unsigned int dcn30_calc_max_scaled_time(
@ -1598,7 +1387,7 @@ void dcn30_set_mcif_arb_params(
enum mmhubbub_wbif_mode wbif_mode;
struct display_mode_lib *dml = &context->bw_ctx.dml;
struct mcif_arb_params *wb_arb_params;
int i, j, k, dwb_pipe;
int i, j, dwb_pipe;
/* Writeback MCIF_WB arbitration parameters */
dwb_pipe = 0;
@ -1622,17 +1411,15 @@ void dcn30_set_mcif_arb_params(
else
wbif_mode = PACKED_444;
for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) {
wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000;
wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
}
DC_FP_START();
dcn30_fpu_set_mcif_arb_params(wb_arb_params, dml, pipes, pipe_cnt, j);
DC_FP_END();
wb_arb_params->time_per_pixel = (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; /* time_per_pixel should be in u6.6 format */
wb_arb_params->slice_lines = 32;
wb_arb_params->arbitration_slice = 2; /* irrelevant since there is no YUV output */
wb_arb_params->max_scaled_time = dcn30_calc_max_scaled_time(wb_arb_params->time_per_pixel,
wbif_mode,
wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */
wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[j] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */
dwb_pipe++;
@ -2111,178 +1898,11 @@ validate_out:
return out;
}
/*
* This must be noinline to ensure anything that deals with FP registers
* is contained within this call; previously our compiling with hard-float
* would result in fp instructions being emitted outside of the boundaries
* of the DC_FP_START/END macros, which makes sense as the compiler has no
* idea about what is wrapped and what is not
*
* This is largely just a workaround to avoid breakage introduced with 5.6,
* ideally all fp-using code should be moved into its own file, only that
* should be compiled with hard-float, and all code exported from there
* should be strictly wrapped with DC_FP_START/END
*/
static noinline void dcn30_calculate_wm_and_dlg_fp(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
int vlevel)
{
int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
int i, pipe_idx;
double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
pipes[0].clks_cfg.voltage = vlevel;
pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
/* Set B:
* DCFCLK: 1GHz or min required above 1GHz
* FCLK/UCLK: Max
*/
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
if (vlevel == 0) {
pipes[0].clks_cfg.voltage = 1;
pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
}
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
}
context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
pipes[0].clks_cfg.voltage = vlevel;
pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
/* Set D:
* DCFCLK: Min Required
* FCLK(proportional to UCLK): 1GHz or Max
* MALL stutter, sr_enter_exit = 4, sr_exit = 2us
*/
/*
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
}
context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
*/
/* Set C:
* DCFCLK: Min Required
* FCLK(proportional to UCLK): 1GHz or Max
* pstate latency overridden to 5us
*/
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
unsigned int min_dram_speed_mts_margin = 160;
if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported)
min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;
/* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */
for (i = 3; i > 0; i--)
if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)
break;
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
}
context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
if (!pstate_en) {
/* The only difference between A and C is p-state latency, if p-state is not supported we want to
* calculate DLG based on dummy p-state latency, and max out the set A p-state watermark
*/
context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
} else {
/* Set A:
* DCFCLK: Min Required
* FCLK(proportional to UCLK): 1GHz or Max
*
* Set A calculated last so that following calculations are based on Set A
*/
dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
}
context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
/* Make set D = set A until set D is enabled */
context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
if (!context->res_ctx.pipe_ctx[i].stream)
continue;
pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
if (dc->config.forced_clocks) {
pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
}
if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
pipe_idx++;
}
DC_FP_START();
dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
DC_FP_END();
if (!pstate_en)
/* Restore full p-state latency */
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
}
void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
{
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
}
DC_FP_START();
dcn30_fpu_update_soc_for_wm_a(dc, context);
DC_FP_END();
}
void dcn30_calculate_wm_and_dlg(
@ -2292,7 +1912,7 @@ void dcn30_calculate_wm_and_dlg(
int vlevel)
{
DC_FP_START();
dcn30_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel);
dcn30_fpu_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel);
DC_FP_END();
}
@ -2351,40 +1971,6 @@ validate_out:
return out;
}
/*
* This must be noinline to ensure anything that deals with FP registers
* is contained within this call; previously our compiling with hard-float
* would result in fp instructions being emitted outside of the boundaries
* of the DC_FP_START/END macros, which makes sense as the compiler has no
* idea about what is wrapped and what is not
*
* This is largely just a workaround to avoid breakage introduced with 5.6,
* ideally all fp-using code should be moved into its own file, only that
* should be compiled with hard-float, and all code exported from there
* should be strictly wrapped with DC_FP_START/END
*/
static noinline void dcn30_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
unsigned int *optimal_dcfclk,
unsigned int *optimal_fclk)
{
double bw_from_dram, bw_from_dram1, bw_from_dram2;
bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans *
dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100);
bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans *
dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100);
bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
if (optimal_fclk)
*optimal_fclk = bw_from_dram /
(dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
if (optimal_dcfclk)
*optimal_dcfclk = bw_from_dram /
(dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
}
void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
unsigned int i, j;
@ -2399,47 +1985,43 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
unsigned int num_dcfclk_sta_targets = 4;
unsigned int num_uclk_states;
struct dc_bounding_box_max_clk dcn30_bb_max_clk;
memset(&dcn30_bb_max_clk, 0, sizeof(dcn30_bb_max_clk));
if (dc->ctx->dc_bios->vram_info.num_chans)
dcn3_0_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
DC_FP_START();
dcn30_fpu_update_dram_channel_width_bytes(dc);
DC_FP_END();
if (bw_params->clk_table.entries[0].memclk_mhz) {
int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
if (bw_params->clk_table.entries[i].dcfclk_mhz > dcn30_bb_max_clk.max_dcfclk_mhz)
dcn30_bb_max_clk.max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
if (bw_params->clk_table.entries[i].dispclk_mhz > dcn30_bb_max_clk.max_dispclk_mhz)
dcn30_bb_max_clk.max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
if (bw_params->clk_table.entries[i].dppclk_mhz > dcn30_bb_max_clk.max_dppclk_mhz)
dcn30_bb_max_clk.max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
if (bw_params->clk_table.entries[i].phyclk_mhz > dcn30_bb_max_clk.max_phyclk_mhz)
dcn30_bb_max_clk.max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
}
if (!max_dcfclk_mhz)
max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz;
if (!max_dispclk_mhz)
max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz;
if (!max_dppclk_mhz)
max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz;
if (!max_phyclk_mhz)
max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz;
DC_FP_START();
dcn30_fpu_update_max_clk(&dcn30_bb_max_clk);
DC_FP_END();
if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
if (dcn30_bb_max_clk.max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
// If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
dcfclk_sta_targets[num_dcfclk_sta_targets] = dcn30_bb_max_clk.max_dcfclk_mhz;
num_dcfclk_sta_targets++;
} else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
} else if (dcn30_bb_max_clk.max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
// If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates
for (i = 0; i < num_dcfclk_sta_targets; i++) {
if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
dcfclk_sta_targets[i] = max_dcfclk_mhz;
if (dcfclk_sta_targets[i] > dcn30_bb_max_clk.max_dcfclk_mhz) {
dcfclk_sta_targets[i] = dcn30_bb_max_clk.max_dcfclk_mhz;
break;
}
}
@ -2452,7 +2034,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
// Calculate optimal dcfclk for each uclk
for (i = 0; i < num_uclk_states; i++) {
DC_FP_START();
dcn30_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
&optimal_dcfclk_for_uclk[i], NULL);
DC_FP_END();
if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
@ -2479,7 +2061,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
} else {
if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) {
dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
} else {
@ -2494,33 +2076,15 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
}
while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) {
dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
}
dcn3_0_soc.num_states = num_states;
for (i = 0; i < dcn3_0_soc.num_states; i++) {
dcn3_0_soc.clock_limits[i].state = i;
dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
/* Fill all states with max values of all other clocks */
dcn3_0_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
dcn3_0_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
dcn3_0_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz;
/* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */
/* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz;
dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz;
dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz;
}
/* re-init DML with updated bb */
dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
if (dc->current_state)
dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
DC_FP_START();
dcn30_fpu_update_bw_bounding_box(dc, bw_params, &dcn30_bb_max_clk, dcfclk_mhz, dram_speed_mts);
DC_FP_END();
}
}

View File

@ -35,6 +35,9 @@ struct dc;
struct resource_pool;
struct _vcs_dpi_display_pipe_params_st;
extern struct _vcs_dpi_ip_params_st dcn3_0_ip;
extern struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc;
struct dcn30_resource_pool {
struct resource_pool base;
};
@ -96,4 +99,6 @@ enum dc_status dcn30_add_stream_to_ctx(
void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);
#endif /* _DCN30_RESOURCE_H_ */

View File

@ -81,6 +81,8 @@
#include "dce/dce_aux.h"
#include "dce/dce_i2c.h"
#include "dml/dcn30/dcn30_fpu.h"
#include "dml/dcn30/display_mode_vba_30.h"
#include "dml/dcn301/dcn301_fpu.h"
#include "vm_helper.h"

View File

@ -43,6 +43,8 @@
#include "dcn20/dcn20_dsc.h"
#include "dcn20/dcn20_resource.h"
#include "dml/dcn30/dcn30_fpu.h"
#include "dcn10/dcn10_resource.h"
#include "dce/dce_abm.h"

View File

@ -25,6 +25,8 @@
#include "dcn20/dcn20_dsc.h"
#include "dcn20/dcn20_resource.h"
#include "dml/dcn30/dcn30_fpu.h"
#include "dcn10/dcn10_resource.h"
#include "dc_link_ddc.h"

View File

@ -36,6 +36,8 @@
#include "dcn20/dcn20_resource.h"
#include "dcn30/dcn30_resource.h"
#include "dml/dcn30/dcn30_fpu.h"
#include "dcn10/dcn10_ipp.h"
#include "dcn30/dcn30_hubbub.h"
#include "dcn31/dcn31_hubbub.h"

View File

@ -71,6 +71,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
@ -113,7 +114,7 @@ DML += dcn20/dcn20_fpu.o
DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o
DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o
DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o
DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o
DML += dcn31/dcn31_fpu.o
DML += dcn301/dcn301_fpu.o

View File

@ -0,0 +1,617 @@
/*
* Copyright 2020-2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#include "resource.h"
#include "clk_mgr.h"
#include "reg_helper.h"
#include "dcn_calc_math.h"
#include "dcn20/dcn20_resource.h"
#include "dcn30/dcn30_resource.h"
#include "display_mode_vba_30.h"
#include "dcn30_fpu.h"
#define REG(reg)\
optc1->tg_regs->reg
#define CTX \
optc1->base.ctx
#undef FN
#define FN(reg_name, field_name) \
optc1->tg_shift->field_name, optc1->tg_mask->field_name
struct _vcs_dpi_ip_params_st dcn3_0_ip = {
.use_min_dcfclk = 0,
.clamp_min_dcfclk = 0,
.odm_capable = 1,
.gpuvm_enable = 0,
.hostvm_enable = 0,
.gpuvm_max_page_table_levels = 4,
.hostvm_max_page_table_levels = 4,
.hostvm_cached_page_table_levels = 0,
.pte_group_size_bytes = 2048,
.num_dsc = 6,
.rob_buffer_size_kbytes = 184,
.det_buffer_size_kbytes = 184,
.dpte_buffer_size_in_pte_reqs_luma = 84,
.pde_proc_buffer_size_64k_reqs = 48,
.dpp_output_buffer_pixels = 2560,
.opp_output_buffer_lines = 1,
.pixel_chunk_size_kbytes = 8,
.pte_enable = 1,
.max_page_table_levels = 2,
.pte_chunk_size_kbytes = 2, // ?
.meta_chunk_size_kbytes = 2,
.writeback_chunk_size_kbytes = 8,
.line_buffer_size_bits = 789504,
.is_line_buffer_bpp_fixed = 0, // ?
.line_buffer_fixed_bpp = 0, // ?
.dcc_supported = true,
.writeback_interface_buffer_size_kbytes = 90,
.writeback_line_buffer_buffer_size = 0,
.max_line_buffer_lines = 12,
.writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
.writeback_chroma_buffer_size_kbytes = 8,
.writeback_chroma_line_buffer_width_pixels = 4,
.writeback_max_hscl_ratio = 1,
.writeback_max_vscl_ratio = 1,
.writeback_min_hscl_ratio = 1,
.writeback_min_vscl_ratio = 1,
.writeback_max_hscl_taps = 1,
.writeback_max_vscl_taps = 1,
.writeback_line_buffer_luma_buffer_size = 0,
.writeback_line_buffer_chroma_buffer_size = 14643,
.cursor_buffer_size = 8,
.cursor_chunk_size = 2,
.max_num_otg = 6,
.max_num_dpp = 6,
.max_num_wb = 1,
.max_dchub_pscl_bw_pix_per_clk = 4,
.max_pscl_lb_bw_pix_per_clk = 2,
.max_lb_vscl_bw_pix_per_clk = 4,
.max_vscl_hscl_bw_pix_per_clk = 4,
.max_hscl_ratio = 6,
.max_vscl_ratio = 6,
.hscl_mults = 4,
.vscl_mults = 4,
.max_hscl_taps = 8,
.max_vscl_taps = 8,
.dispclk_ramp_margin_percent = 1,
.underscan_factor = 1.11,
.min_vblank_lines = 32,
.dppclk_delay_subtotal = 46,
.dynamic_metadata_vm_enabled = true,
.dppclk_delay_scl_lb_only = 16,
.dppclk_delay_scl = 50,
.dppclk_delay_cnvc_formatter = 27,
.dppclk_delay_cnvc_cursor = 6,
.dispclk_delay_subtotal = 119,
.dcfclk_cstate_latency = 5.2, // SRExitTime
.max_inter_dcn_tile_repeaters = 8,
.max_num_hdmi_frl_outputs = 1,
.odm_combine_4to1_supported = true,
.xfc_supported = false,
.xfc_fill_bw_overhead_percent = 10.0,
.xfc_fill_constant_bytes = 0,
.gfx7_compat_tiling_supported = 0,
.number_of_cursors = 1,
};
struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = {
.clock_limits = {
{
.state = 0,
.dispclk_mhz = 562.0,
.dppclk_mhz = 300.0,
.phyclk_mhz = 300.0,
.phyclk_d18_mhz = 667.0,
.dscclk_mhz = 405.6,
},
},
.min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
.num_states = 1,
.sr_exit_time_us = 15.5,
.sr_enter_plus_exit_time_us = 20,
.urgent_latency_us = 4.0,
.urgent_latency_pixel_data_only_us = 4.0,
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
.urgent_latency_vm_data_only_us = 4.0,
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
.max_avg_sdp_bw_use_normal_percent = 60.0,
.max_avg_dram_bw_use_normal_percent = 40.0,
.writeback_latency_us = 12.0,
.max_request_size_bytes = 256,
.fabric_datapath_to_dcn_data_return_bytes = 64,
.dcn_downspread_percent = 0.5,
.downspread_percent = 0.38,
.dram_page_open_time_ns = 50.0,
.dram_rw_turnaround_time_ns = 17.5,
.dram_return_buffer_per_channel_bytes = 8192,
.round_trip_ping_latency_dcfclk_cycles = 191,
.urgent_out_of_order_return_per_channel_bytes = 4096,
.channel_interleave_bytes = 256,
.num_banks = 8,
.gpuvm_min_page_size_bytes = 4096,
.hostvm_min_page_size_bytes = 4096,
.dram_clock_change_latency_us = 404,
.dummy_pstate_latency_us = 5,
.writeback_dram_clock_change_latency_us = 23.0,
.return_bus_width_bytes = 64,
.dispclk_dppclk_vco_speed_mhz = 3650,
.xfc_bus_transport_time_us = 20, // ?
.xfc_xbuf_latency_tolerance_us = 4, // ?
.use_urgent_burst_bw = 1, // ?
.do_urgent_latency_adjustment = true,
.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
};
void optc3_fpu_set_vrr_m_const(struct timing_generator *optc,
double vtotal_avg)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
double vtotal_min, vtotal_max;
double ratio, modulo, phase;
uint32_t vblank_start;
uint32_t v_total_mask_value = 0;
dc_assert_fp_enabled();
/* Compute VTOTAL_MIN and VTOTAL_MAX, so that
* VOTAL_MAX - VTOTAL_MIN = 1
*/
v_total_mask_value = 16;
vtotal_min = dcn_bw_floor(vtotal_avg);
vtotal_max = dcn_bw_ceil(vtotal_avg);
/* Check that bottom VBLANK is at least 2 lines tall when running with
* VTOTAL_MIN. Note that VTOTAL registers are defined as 'total number
* of lines in a frame - 1'.
*/
REG_GET(OTG_V_BLANK_START_END, OTG_V_BLANK_START,
&vblank_start);
ASSERT(vtotal_min >= vblank_start + 1);
/* Special case where the average frame rate can be achieved
* without using the DTO
*/
if (vtotal_min == vtotal_max) {
REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min);
optc->funcs->set_vtotal_min_max(optc, 0, 0);
REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0);
REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0);
REG_UPDATE_3(OTG_V_TOTAL_CONTROL,
OTG_V_TOTAL_MIN_SEL, 0,
OTG_V_TOTAL_MAX_SEL, 0,
OTG_SET_V_TOTAL_MIN_MASK_EN, 0);
return;
}
ratio = vtotal_max - vtotal_avg;
modulo = 65536.0 * 65536.0 - 1.0; /* 2^32 - 1 */
phase = ratio * modulo;
/* Special cases where the DTO phase gets rounded to 0 or
* to DTO modulo
*/
if (phase <= 0 || phase >= modulo) {
REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL,
phase <= 0 ?
(uint32_t)vtotal_max : (uint32_t)vtotal_min);
REG_SET(OTG_V_TOTAL_MIN, 0, OTG_V_TOTAL_MIN, 0);
REG_SET(OTG_V_TOTAL_MAX, 0, OTG_V_TOTAL_MAX, 0);
REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0);
REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0);
REG_UPDATE_3(OTG_V_TOTAL_CONTROL,
OTG_V_TOTAL_MIN_SEL, 0,
OTG_V_TOTAL_MAX_SEL, 0,
OTG_SET_V_TOTAL_MIN_MASK_EN, 0);
return;
}
REG_UPDATE_6(OTG_V_TOTAL_CONTROL,
OTG_V_TOTAL_MIN_SEL, 1,
OTG_V_TOTAL_MAX_SEL, 1,
OTG_SET_V_TOTAL_MIN_MASK_EN, 1,
OTG_SET_V_TOTAL_MIN_MASK, v_total_mask_value,
OTG_VTOTAL_MID_REPLACING_MIN_EN, 0,
OTG_VTOTAL_MID_REPLACING_MAX_EN, 0);
REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min);
optc->funcs->set_vtotal_min_max(optc, vtotal_min, vtotal_max);
REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, (uint32_t)phase);
REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, (uint32_t)modulo);
}
void dcn30_fpu_populate_dml_writeback_from_context(
struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
{
int pipe_cnt, i, j;
double max_calc_writeback_dispclk;
double writeback_dispclk;
struct writeback_st dout_wb;
dc_assert_fp_enabled();
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
if (!stream)
continue;
max_calc_writeback_dispclk = 0;
/* Set writeback information */
pipes[pipe_cnt].dout.wb_enable = 0;
pipes[pipe_cnt].dout.num_active_wb = 0;
for (j = 0; j < stream->num_wb_info; j++) {
struct dc_writeback_info *wb_info = &stream->writeback_info[j];
if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
(wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
pipes[pipe_cnt].dout.wb_enable = 1;
pipes[pipe_cnt].dout.num_active_wb++;
dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
wb_info->dwb_params.cnv_params.crop_height :
wb_info->dwb_params.cnv_params.src_height;
dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
wb_info->dwb_params.cnv_params.crop_width :
wb_info->dwb_params.cnv_params.src_width;
dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
/* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */
if (dc->dml.ip.writeback_max_hscl_taps > 1) {
dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
} else {
dout_wb.wb_htaps_luma = 1;
dout_wb.wb_vtaps_luma = 1;
}
dout_wb.wb_htaps_chroma = 0;
dout_wb.wb_vtaps_chroma = 0;
dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
(double)wb_info->dwb_params.cnv_params.crop_width /
(double)wb_info->dwb_params.dest_width :
(double)wb_info->dwb_params.cnv_params.src_width /
(double)wb_info->dwb_params.dest_width;
dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
(double)wb_info->dwb_params.cnv_params.crop_height /
(double)wb_info->dwb_params.dest_height :
(double)wb_info->dwb_params.cnv_params.src_height /
(double)wb_info->dwb_params.dest_height;
if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB ||
wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA)
dout_wb.wb_pixel_format = dm_444_64;
else
dout_wb.wb_pixel_format = dm_444_32;
/* Workaround for cases where multiple writebacks are connected to same plane
* In which case, need to compute worst case and set the associated writeback parameters
* This workaround is necessary due to DML computation assuming only 1 set of writeback
* parameters per pipe
*/
writeback_dispclk = dml30_CalculateWriteBackDISPCLK(
dout_wb.wb_pixel_format,
pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
dout_wb.wb_hratio,
dout_wb.wb_vratio,
dout_wb.wb_htaps_luma,
dout_wb.wb_vtaps_luma,
dout_wb.wb_src_width,
dout_wb.wb_dst_width,
pipes[pipe_cnt].pipe.dest.htotal,
dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size);
if (writeback_dispclk > max_calc_writeback_dispclk) {
max_calc_writeback_dispclk = writeback_dispclk;
pipes[pipe_cnt].dout.wb = dout_wb;
}
}
}
pipe_cnt++;
}
}
void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params,
struct display_mode_lib *dml,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
int cur_pipe)
{
int i;
dc_assert_fp_enabled();
for (i = 0; i < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); i++) {
wb_arb_params->cli_watermark[i] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000;
wb_arb_params->pstate_watermark[i] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
}
wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[cur_pipe] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */
}
void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
{
dc_assert_fp_enabled();
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
}
}
void dcn30_fpu_calculate_wm_and_dlg(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
int vlevel)
{
int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
int i, pipe_idx;
double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
dc_assert_fp_enabled();
if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
pipes[0].clks_cfg.voltage = vlevel;
pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
/* Set B:
* DCFCLK: 1GHz or min required above 1GHz
* FCLK/UCLK: Max
*/
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
if (vlevel == 0) {
pipes[0].clks_cfg.voltage = 1;
pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
}
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
}
context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
pipes[0].clks_cfg.voltage = vlevel;
pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
/* Set D:
* DCFCLK: Min Required
* FCLK(proportional to UCLK): 1GHz or Max
* MALL stutter, sr_enter_exit = 4, sr_exit = 2us
*/
/*
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
}
context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
*/
/* Set C:
* DCFCLK: Min Required
* FCLK(proportional to UCLK): 1GHz or Max
* pstate latency overridden to 5us
*/
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
unsigned int min_dram_speed_mts_margin = 160;
if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported)
min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;
/* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */
for (i = 3; i > 0; i--)
if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)
break;
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
}
context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
if (!pstate_en) {
/* The only difference between A and C is p-state latency, if p-state is not supported we want to
* calculate DLG based on dummy p-state latency, and max out the set A p-state watermark
*/
context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
} else {
/* Set A:
* DCFCLK: Min Required
* FCLK(proportional to UCLK): 1GHz or Max
*
* Set A calculated last so that following calculations are based on Set A
*/
dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
}
context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
/* Make set D = set A until set D is enabled */
context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
if (!context->res_ctx.pipe_ctx[i].stream)
continue;
pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
if (dc->config.forced_clocks) {
pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
}
if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
pipe_idx++;
}
DC_FP_START();
dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
DC_FP_END();
if (!pstate_en)
/* Restore full p-state latency */
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
}
void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc)
{
dc_assert_fp_enabled();
if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
}
void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk)
{
dc_assert_fp_enabled();
if (!dcn30_bb_max_clk->max_dcfclk_mhz)
dcn30_bb_max_clk->max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz;
if (!dcn30_bb_max_clk->max_dispclk_mhz)
dcn30_bb_max_clk->max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz;
if (!dcn30_bb_max_clk->max_dppclk_mhz)
dcn30_bb_max_clk->max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz;
if (!dcn30_bb_max_clk->max_phyclk_mhz)
dcn30_bb_max_clk->max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz;
}
void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
unsigned int *optimal_dcfclk,
unsigned int *optimal_fclk)
{
double bw_from_dram, bw_from_dram1, bw_from_dram2;
dc_assert_fp_enabled();
bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans *
dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100);
bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans *
dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100);
bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
if (optimal_fclk)
*optimal_fclk = bw_from_dram /
(dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
if (optimal_dcfclk)
*optimal_dcfclk = bw_from_dram /
(dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
}
void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
struct clk_bw_params *bw_params,
struct dc_bounding_box_max_clk *dcn30_bb_max_clk,
unsigned int *dcfclk_mhz,
unsigned int *dram_speed_mts)
{
unsigned int i;
dc_assert_fp_enabled();
dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
for (i = 0; i < dcn3_0_soc.num_states; i++) {
dcn3_0_soc.clock_limits[i].state = i;
dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
/* Fill all states with max values of all other clocks */
dcn3_0_soc.clock_limits[i].dispclk_mhz = dcn30_bb_max_clk->max_dispclk_mhz;
dcn3_0_soc.clock_limits[i].dppclk_mhz = dcn30_bb_max_clk->max_dppclk_mhz;
dcn3_0_soc.clock_limits[i].phyclk_mhz = dcn30_bb_max_clk->max_phyclk_mhz;
dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz;
/* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */
/* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz;
dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz;
dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz;
}
/* re-init DML with updated bb */
dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
if (dc->current_state)
dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
}

View File

@ -0,0 +1,67 @@
/*
* Copyright 2020-2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#ifndef __DCN30_FPU_H__
#define __DCN30_FPU_H__
#include "core_types.h"
#include "dcn20/dcn20_optc.h"
void optc3_fpu_set_vrr_m_const(struct timing_generator *optc,
double vtotal_avg);
void dcn30_fpu_populate_dml_writeback_from_context(
struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes);
void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params,
struct display_mode_lib *dml,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
int cur_pipe);
void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
void dcn30_fpu_calculate_wm_and_dlg(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
int vlevel);
void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc);
void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk);
void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
unsigned int *optimal_dcfclk,
unsigned int *optimal_fclk);
void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
struct clk_bw_params *bw_params,
struct dc_bounding_box_max_clk *dcn30_bb_max_clk,
unsigned int *dcfclk_mhz,
unsigned int *dram_speed_mts);
#endif /* __DCN30_FPU_H__*/

View File

@ -486,4 +486,11 @@ struct dc_state {
} perf_params;
};
struct dc_bounding_box_max_clk {
int max_dcfclk_mhz;
int max_dispclk_mhz;
int max_dppclk_mhz;
int max_phyclk_mhz;
};
#endif /* _CORE_TYPES_H_ */

View File

@ -217,8 +217,7 @@ void disable_dp_hpo_output(struct dc_link *link,
void setup_dp_hpo_stream(struct pipe_ctx *pipe_ctx, bool enable);
bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx);
void dp_retrieve_lttpr_cap(struct dc_link *link);
bool dp_apply_lttpr_mode(struct dc_link *link);
bool dp_retrieve_lttpr_cap(struct dc_link *link);
void edp_panel_backlight_power_on(struct dc_link *link);
void dp_receiver_power_ctrl(struct dc_link *link, bool on);
void dp_source_sequence_trace(struct dc_link *link, uint8_t dp_test_mode);

View File

@ -87,6 +87,11 @@ union dpia_set_config_data {
*/
enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link);
/* Query hot plug status of USB4 DP tunnel.
* Returns true if HPD high.
*/
bool dc_link_dpia_query_hpd_status(struct dc_link *link);
/* Train DP tunneling link for USB4 DPIA display endpoint.
* DPIA equivalent of dc_link_dp_perfrorm_link_training.
* Aborts link training upon detection of sink unplug.

View File

@ -80,12 +80,6 @@ enum link_training_result {
DP_128b_132b_CDS_DONE_TIMEOUT,
};
enum lttpr_support {
LTTPR_UNSUPPORTED,
LTTPR_CHECK_EXT_SUPPORT,
LTTPR_SUPPORTED,
};
enum lttpr_mode {
LTTPR_MODE_NON_LTTPR,
LTTPR_MODE_TRANSPARENT,

View File

@ -6070,6 +6070,8 @@
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
#define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1

View File

@ -6058,6 +6058,8 @@
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
#define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1

View File

@ -7142,6 +7142,8 @@
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
#define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1

View File

@ -37285,12 +37285,14 @@
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L
//DIG0_HDMI_INFOFRAME_CONTROL0
#define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND__SHIFT 0x0

View File

@ -5584,6 +5584,8 @@
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
#define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
#define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1

View File

@ -30357,12 +30357,14 @@
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L
//DIG0_HDMI_INFOFRAME_CONTROL0
#define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4

View File

@ -39439,12 +39439,14 @@
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L
//DIG0_HDMI_INFOFRAME_CONTROL0
#define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4

View File

@ -16956,7 +16956,7 @@
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L
@ -16964,7 +16964,7 @@
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L
//DIG0_HDMI_INFOFRAME_CONTROL0

View File

@ -35487,12 +35487,14 @@
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L
#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L
//DIG0_HDMI_INFOFRAME_CONTROL0
#define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4

View File

@ -770,6 +770,9 @@ enum amd_dpm_forced_level amdgpu_dpm_get_performance_level(struct amdgpu_device
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
enum amd_dpm_forced_level level;
if (!pp_funcs)
return AMD_DPM_FORCED_LEVEL_AUTO;
mutex_lock(&adev->pm.mutex);
if (pp_funcs->get_performance_level)
level = pp_funcs->get_performance_level(adev->powerplay.pp_handle);

View File

@ -1436,6 +1436,7 @@ static int smu_disable_dpms(struct smu_context *smu)
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 5):
case IP_VERSION(11, 0, 9):
case IP_VERSION(13, 0, 0):
return 0;
default:
break;

View File

@ -671,8 +671,8 @@ typedef struct {
uint16_t reserved[2];
//Frequency changes
uint16_t GfxclkFmin; // MHz
uint16_t GfxclkFmax; // MHz
int16_t GfxclkFmin; // MHz
int16_t GfxclkFmax; // MHz
uint16_t UclkFmin; // MHz
uint16_t UclkFmax; // MHz
@ -683,15 +683,14 @@ typedef struct {
//Fan control
uint8_t FanLinearPwmPoints[NUM_OD_FAN_MAX_POINTS];
uint8_t FanLinearTempPoints[NUM_OD_FAN_MAX_POINTS];
uint16_t FanMaximumRpm;
uint16_t FanMinimumPwm;
uint16_t FanAcousticLimitRpm;
uint16_t AcousticTargetRpmThreshold;
uint16_t AcousticLimitRpmThreshold;
uint16_t FanTargetTemperature; // Degree Celcius
uint8_t FanZeroRpmEnable;
uint8_t FanZeroRpmStopTemp;
uint8_t FanMode;
uint8_t Padding[1];
uint8_t MaxOpTemp;
uint32_t Spare[13];
uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround
@ -719,15 +718,14 @@ typedef struct {
uint8_t FanLinearPwmPoints;
uint8_t FanLinearTempPoints;
uint16_t FanMaximumRpm;
uint16_t FanMinimumPwm;
uint16_t FanAcousticLimitRpm;
uint16_t AcousticTargetRpmThreshold;
uint16_t AcousticLimitRpmThreshold;
uint16_t FanTargetTemperature; // Degree Celcius
uint8_t FanZeroRpmEnable;
uint8_t FanZeroRpmStopTemp;
uint8_t FanMode;
uint8_t Padding[1];
uint8_t MaxOpTemp;
uint32_t Spare[13];
@ -997,7 +995,8 @@ typedef struct {
uint16_t SocketPowerLimitAcTau[PPT_THROTTLER_COUNT]; // Time constant of LPF in ms
uint16_t SocketPowerLimitDcTau[PPT_THROTTLER_COUNT]; // Time constant of LPF in ms
uint32_t SpareVmin[12];
QuadraticInt_t Vmin_droop;
uint32_t SpareVmin[9];
//SECTION: DPM Configuration 1
@ -1286,7 +1285,6 @@ typedef struct {
uint32_t PostVoltageSetBacoDelay; // in microseconds. Amount of time FW will wait after power good is established or PSI0 command is issued
uint32_t BacoEntryDelay; // in milliseconds. Amount of time FW will wait to trigger BACO entry after receiving entry notification from OS
// SECTION: Board Reserved
uint32_t BoardSpare[64];

View File

@ -30,7 +30,7 @@
#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x27
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x28
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x28
#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms

View File

@ -697,12 +697,28 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu,
uint32_t apu_percent = 0;
uint32_t dgpu_percent = 0;
if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4900))
use_metrics_v3 = true;
else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4300))
use_metrics_v2 = true;
switch (smu->adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(11, 0, 7):
if (smu->smc_fw_version >= 0x3A4900)
use_metrics_v3 = true;
else if (smu->smc_fw_version >= 0x3A4300)
use_metrics_v2 = true;
break;
case IP_VERSION(11, 0, 11):
if (smu->smc_fw_version >= 0x412D00)
use_metrics_v2 = true;
break;
case IP_VERSION(11, 0, 12):
if (smu->smc_fw_version >= 0x3B2300)
use_metrics_v2 = true;
break;
case IP_VERSION(11, 0, 13):
if (smu->smc_fw_version >= 0x491100)
use_metrics_v2 = true;
break;
default:
break;
}
ret = smu_cmn_get_metrics_table(smu,
NULL,
@ -3833,13 +3849,28 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
uint16_t average_gfx_activity;
int ret = 0;
if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4900))
use_metrics_v3 = true;
else if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4300))
use_metrics_v2 = true;
switch (smu->adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(11, 0, 7):
if (smu->smc_fw_version >= 0x3A4900)
use_metrics_v3 = true;
else if (smu->smc_fw_version >= 0x3A4300)
use_metrics_v2 = true;
break;
case IP_VERSION(11, 0, 11):
if (smu->smc_fw_version >= 0x412D00)
use_metrics_v2 = true;
break;
case IP_VERSION(11, 0, 12):
if (smu->smc_fw_version >= 0x3B2300)
use_metrics_v2 = true;
break;
case IP_VERSION(11, 0, 13):
if (smu->smc_fw_version >= 0x491100)
use_metrics_v2 = true;
break;
default:
break;
}
ret = smu_cmn_get_metrics_table(smu,
&metrics_external,

View File

@ -1666,6 +1666,7 @@ static const struct throttling_logging_label {
uint32_t feature_mask;
const char *label;
} logging_label[] = {
{(1U << THROTTLER_TEMP_GPU_BIT), "GPU"},
{(1U << THROTTLER_TEMP_MEM_BIT), "HBM"},
{(1U << THROTTLER_TEMP_VR_GFX_BIT), "VR of GFX rail"},
{(1U << THROTTLER_TEMP_VR_MEM_BIT), "VR of HBM rail"},

View File

@ -218,13 +218,25 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
pptable_id == 3688)
pptable_id = 36881;
/*
* Temporary solution for SMU V13.0.0:
* - use 99991 signed pptable when SCPM enabled
* TODO: drop this when the pptable carried in vbios
* is ready.
* Temporary solution for SMU V13.0.0 with SCPM enabled:
* - use 36831 signed pptable when pp_table_id is 3683
* - use 36641 signed pptable when pp_table_id is 3664 or 0
* TODO: drop these when the pptable carried in vbios is ready.
*/
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0))
pptable_id = 99991;
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) {
switch (pptable_id) {
case 0:
case 3664:
pptable_id = 36641;
break;
case 3683:
pptable_id = 36831;
break;
default:
dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
return -EINVAL;
}
}
}
/* "pptable_id == 0" means vbios carries the pptable. */
@ -448,13 +460,24 @@ int smu_v13_0_setup_pptable(struct smu_context *smu)
pptable_id = smu->smu_table.boot_values.pp_table_id;
/*
* Temporary solution for SMU V13.0.0:
* - use 9999 unsigned pptable when SCPM disabled
* TODO: drop this when the pptable carried in vbios
* is ready.
* Temporary solution for SMU V13.0.0 with SCPM disabled:
* - use 3664 or 3683 on request
* - use 3664 when pptable_id is 0
* TODO: drop these when the pptable carried in vbios is ready.
*/
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0))
pptable_id = 9999;
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) {
switch (pptable_id) {
case 0:
pptable_id = 3664;
break;
case 3664:
case 3683:
break;
default:
dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
return -EINVAL;
}
}
}
/* force using vbios pptable in sriov mode */

View File

@ -275,9 +275,7 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu,
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VDDIO_MEM_SCALING_BIT);
}
#if 0
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MEM_TEMP_READ_BIT);
#endif
if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_GFXCLK_BIT);
@ -296,6 +294,12 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu,
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT);
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT);
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_DSTATE_BIT);
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT);
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT);
return 0;
}

View File

@ -644,42 +644,40 @@ static int smu_v13_0_4_set_watermarks_table(struct smu_context *smu,
if (!table || !clock_ranges)
return -EINVAL;
if (clock_ranges) {
if (clock_ranges->num_reader_wm_sets > NUM_WM_RANGES ||
clock_ranges->num_writer_wm_sets > NUM_WM_RANGES)
return -EINVAL;
if (clock_ranges->num_reader_wm_sets > NUM_WM_RANGES ||
clock_ranges->num_writer_wm_sets > NUM_WM_RANGES)
return -EINVAL;
for (i = 0; i < clock_ranges->num_reader_wm_sets; i++) {
table->WatermarkRow[WM_DCFCLK][i].MinClock =
clock_ranges->reader_wm_sets[i].min_drain_clk_mhz;
table->WatermarkRow[WM_DCFCLK][i].MaxClock =
clock_ranges->reader_wm_sets[i].max_drain_clk_mhz;
table->WatermarkRow[WM_DCFCLK][i].MinMclk =
clock_ranges->reader_wm_sets[i].min_fill_clk_mhz;
table->WatermarkRow[WM_DCFCLK][i].MaxMclk =
clock_ranges->reader_wm_sets[i].max_fill_clk_mhz;
for (i = 0; i < clock_ranges->num_reader_wm_sets; i++) {
table->WatermarkRow[WM_DCFCLK][i].MinClock =
clock_ranges->reader_wm_sets[i].min_drain_clk_mhz;
table->WatermarkRow[WM_DCFCLK][i].MaxClock =
clock_ranges->reader_wm_sets[i].max_drain_clk_mhz;
table->WatermarkRow[WM_DCFCLK][i].MinMclk =
clock_ranges->reader_wm_sets[i].min_fill_clk_mhz;
table->WatermarkRow[WM_DCFCLK][i].MaxMclk =
clock_ranges->reader_wm_sets[i].max_fill_clk_mhz;
table->WatermarkRow[WM_DCFCLK][i].WmSetting =
clock_ranges->reader_wm_sets[i].wm_inst;
}
for (i = 0; i < clock_ranges->num_writer_wm_sets; i++) {
table->WatermarkRow[WM_SOCCLK][i].MinClock =
clock_ranges->writer_wm_sets[i].min_fill_clk_mhz;
table->WatermarkRow[WM_SOCCLK][i].MaxClock =
clock_ranges->writer_wm_sets[i].max_fill_clk_mhz;
table->WatermarkRow[WM_SOCCLK][i].MinMclk =
clock_ranges->writer_wm_sets[i].min_drain_clk_mhz;
table->WatermarkRow[WM_SOCCLK][i].MaxMclk =
clock_ranges->writer_wm_sets[i].max_drain_clk_mhz;
table->WatermarkRow[WM_SOCCLK][i].WmSetting =
clock_ranges->writer_wm_sets[i].wm_inst;
}
smu->watermarks_bitmap |= WATERMARKS_EXIST;
table->WatermarkRow[WM_DCFCLK][i].WmSetting =
clock_ranges->reader_wm_sets[i].wm_inst;
}
for (i = 0; i < clock_ranges->num_writer_wm_sets; i++) {
table->WatermarkRow[WM_SOCCLK][i].MinClock =
clock_ranges->writer_wm_sets[i].min_fill_clk_mhz;
table->WatermarkRow[WM_SOCCLK][i].MaxClock =
clock_ranges->writer_wm_sets[i].max_fill_clk_mhz;
table->WatermarkRow[WM_SOCCLK][i].MinMclk =
clock_ranges->writer_wm_sets[i].min_drain_clk_mhz;
table->WatermarkRow[WM_SOCCLK][i].MaxMclk =
clock_ranges->writer_wm_sets[i].max_drain_clk_mhz;
table->WatermarkRow[WM_SOCCLK][i].WmSetting =
clock_ranges->writer_wm_sets[i].wm_inst;
}
smu->watermarks_bitmap |= WATERMARKS_EXIST;
/* pass data to smu controller */
if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
!(smu->watermarks_bitmap & WATERMARKS_LOADED)) {

View File

@ -190,6 +190,9 @@ static int yellow_carp_fini_smc_tables(struct smu_context *smu)
kfree(smu_table->watermarks_table);
smu_table->watermarks_table = NULL;
kfree(smu_table->gpu_metrics_table);
smu_table->gpu_metrics_table = NULL;
return 0;
}

View File

@ -541,7 +541,6 @@ static int dpu_encoder_virt_atomic_check(
struct dpu_encoder_virt *dpu_enc;
struct msm_drm_private *priv;
struct dpu_kms *dpu_kms;
const struct drm_display_mode *mode;
struct drm_display_mode *adj_mode;
struct msm_display_topology topology;
struct dpu_global_state *global_state;
@ -559,7 +558,6 @@ static int dpu_encoder_virt_atomic_check(
priv = drm_enc->dev->dev_private;
dpu_kms = to_dpu_kms(priv->kms);
mode = &crtc_state->mode;
adj_mode = &crtc_state->adjusted_mode;
global_state = dpu_kms_get_global_state(crtc_state->state);
if (IS_ERR(global_state))
@ -1814,7 +1812,6 @@ static void dpu_encoder_prep_dsc(struct dpu_encoder_virt *dpu_enc,
}
}
dsc_common_mode = 0;
pic_width = dsc->drm->pic_width;
dsc_common_mode = DSC_MODE_MULTIPLEX | DSC_MODE_SPLIT_PANEL;

View File

@ -574,11 +574,11 @@ static void dpu_encoder_phys_wb_disable(struct dpu_encoder_phys *phys_enc)
*/
static void dpu_encoder_phys_wb_destroy(struct dpu_encoder_phys *phys_enc)
{
DPU_DEBUG("[wb:%d]\n", phys_enc->wb_idx - WB_0);
if (!phys_enc)
return;
DPU_DEBUG("[wb:%d]\n", phys_enc->wb_idx - WB_0);
kfree(phys_enc);
}

View File

@ -49,8 +49,6 @@
#define DPU_DEBUGFS_DIR "msm_dpu"
#define DPU_DEBUGFS_HWMASKNAME "hw_log_mask"
#define MIN_IB_BW 400000000ULL /* Min ib vote 400MB */
static int dpu_kms_hw_init(struct msm_kms *kms);
static void _dpu_kms_mmu_destroy(struct dpu_kms *dpu_kms);
@ -1305,15 +1303,9 @@ static int __maybe_unused dpu_runtime_resume(struct device *dev)
struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms);
struct drm_encoder *encoder;
struct drm_device *ddev;
int i;
ddev = dpu_kms->dev;
WARN_ON(!(dpu_kms->num_paths));
/* Min vote of BW is required before turning on AXI clk */
for (i = 0; i < dpu_kms->num_paths; i++)
icc_set_bw(dpu_kms->path[i], 0, Bps_to_icc(MIN_IB_BW));
rc = clk_bulk_prepare_enable(dpu_kms->num_clocks, dpu_kms->clocks);
if (rc) {
DPU_ERROR("clock enable failed rc:%d\n", rc);

View File

@ -1390,8 +1390,13 @@ void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable)
dp_catalog_ctrl_reset(ctrl->catalog);
if (enable)
dp_catalog_ctrl_enable_irq(ctrl->catalog, enable);
/*
* all dp controller programmable registers will not
* be reset to default value after DP_SW_RESET
* therefore interrupt mask bits have to be updated
* to enable/disable interrupts
*/
dp_catalog_ctrl_enable_irq(ctrl->catalog, enable);
}
void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl)

View File

@ -5,6 +5,7 @@
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/interconnect.h>
#include <linux/irq.h>
#include <linux/irqchip.h>
#include <linux/irqdesc.h>
@ -25,6 +26,8 @@
#define UBWC_CTRL_2 0x150
#define UBWC_PREDICTION_MODE 0x154
#define MIN_IB_BW 400000000UL /* Min ib vote 400MB */
struct msm_mdss {
struct device *dev;
@ -36,8 +39,47 @@ struct msm_mdss {
unsigned long enabled_mask;
struct irq_domain *domain;
} irq_controller;
struct icc_path *path[2];
u32 num_paths;
};
static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
struct msm_mdss *msm_mdss)
{
struct icc_path *path0 = of_icc_get(dev, "mdp0-mem");
struct icc_path *path1 = of_icc_get(dev, "mdp1-mem");
if (IS_ERR_OR_NULL(path0))
return PTR_ERR_OR_ZERO(path0);
msm_mdss->path[0] = path0;
msm_mdss->num_paths = 1;
if (!IS_ERR_OR_NULL(path1)) {
msm_mdss->path[1] = path1;
msm_mdss->num_paths++;
}
return 0;
}
static void msm_mdss_put_icc_path(void *data)
{
struct msm_mdss *msm_mdss = data;
int i;
for (i = 0; i < msm_mdss->num_paths; i++)
icc_put(msm_mdss->path[i]);
}
static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw)
{
int i;
for (i = 0; i < msm_mdss->num_paths; i++)
icc_set_bw(msm_mdss->path[i], 0, Bps_to_icc(bw));
}
static void msm_mdss_irq(struct irq_desc *desc)
{
struct msm_mdss *msm_mdss = irq_desc_get_handler_data(desc);
@ -136,6 +178,13 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
{
int ret;
/*
* Several components have AXI clocks that can only be turned on if
* the interconnect is enabled (non-zero bandwidth). Let's make sure
* that the interconnects are at least at a minimum amount.
*/
msm_mdss_icc_request_bw(msm_mdss, MIN_IB_BW);
ret = clk_bulk_prepare_enable(msm_mdss->num_clocks, msm_mdss->clocks);
if (ret) {
dev_err(msm_mdss->dev, "clock enable failed, ret:%d\n", ret);
@ -178,6 +227,7 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
static int msm_mdss_disable(struct msm_mdss *msm_mdss)
{
clk_bulk_disable_unprepare(msm_mdss->num_clocks, msm_mdss->clocks);
msm_mdss_icc_request_bw(msm_mdss, 0);
return 0;
}
@ -271,6 +321,13 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5
dev_dbg(&pdev->dev, "mapped mdss address space @%pK\n", msm_mdss->mmio);
ret = msm_mdss_parse_data_bus_icc_path(&pdev->dev, msm_mdss);
if (ret)
return ERR_PTR(ret);
ret = devm_add_action_or_reset(&pdev->dev, msm_mdss_put_icc_path, msm_mdss);
if (ret)
return ERR_PTR(ret);
if (is_mdp5)
ret = mdp5_mdss_parse_clock(pdev, &msm_mdss->clocks);
else

View File

@ -473,6 +473,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode
native_mode->vdisplay != 0 &&
native_mode->clock != 0) {
mode = drm_mode_duplicate(dev, native_mode);
if (!mode)
return NULL;
mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
drm_mode_set_name(mode);
@ -487,6 +489,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode
* simpler.
*/
mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false);
if (!mode)
return NULL;
mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name);
}

View File

@ -1,8 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
config TEGRA_HOST1X_CONTEXT_BUS
bool
config TEGRA_HOST1X
tristate "NVIDIA Tegra host1x driver"
depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
select DMA_SHARED_BUFFER
select TEGRA_HOST1X_CONTEXT_BUS
select IOMMU_IOVA
help
Driver for the NVIDIA Tegra host1x hardware.

View File

@ -18,3 +18,4 @@ host1x-y = \
hw/host1x07.o
obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o

View File

@ -0,0 +1,31 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021, NVIDIA Corporation.
*/
#include <linux/device.h>
#include <linux/of.h>
struct bus_type host1x_context_device_bus_type = {
.name = "host1x-context",
};
EXPORT_SYMBOL_GPL(host1x_context_device_bus_type);
static int __init host1x_context_device_bus_init(void)
{
int err;
if (!of_machine_is_compatible("nvidia,tegra186") &&
!of_machine_is_compatible("nvidia,tegra194") &&
!of_machine_is_compatible("nvidia,tegra234"))
return 0;
err = bus_register(&host1x_context_device_bus_type);
if (err < 0) {
pr_err("bus type registration failed: %d\n", err);
return err;
}
return 0;
}
postcore_initcall(host1x_context_device_bus_init);

View File

@ -0,0 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
*/
#ifndef __LINUX_HOST1X_CONTEXT_BUS_H
#define __LINUX_HOST1X_CONTEXT_BUS_H
#include <linux/device.h>
#ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
extern struct bus_type host1x_context_device_bus_type;
#endif
#endif

View File

@ -140,6 +140,10 @@ extern "C" {
* not require GTT memory accounting
*/
#define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11)
/* Flag that BO can be discarded under memory pressure without keeping the
* content.
*/
#define AMDGPU_GEM_CREATE_DISCARDABLE (1 << 12)
struct drm_amdgpu_gem_create_in {
/** the requested memory size */
@ -529,6 +533,8 @@ struct drm_amdgpu_gem_op {
#define AMDGPU_VM_MTYPE_UC (4 << 5)
/* Use Read Write MTYPE instead of default MTYPE */
#define AMDGPU_VM_MTYPE_RW (5 << 5)
/* don't allocate MALL */
#define AMDGPU_VM_PAGE_NOALLOC (1 << 9)
struct drm_amdgpu_gem_va {
/** GEM object handle */
@ -988,6 +994,8 @@ struct drm_amdgpu_info_vbios {
#define AMDGPU_VRAM_TYPE_DDR4 8
#define AMDGPU_VRAM_TYPE_GDDR6 9
#define AMDGPU_VRAM_TYPE_DDR5 10
#define AMDGPU_VRAM_TYPE_LPDDR4 11
#define AMDGPU_VRAM_TYPE_LPDDR5 12
struct drm_amdgpu_info_device {
/** PCI Device ID */