diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4baeb6519fda..7fe41a3c2541 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -118,6 +118,8 @@ #define MAX_GPU_INSTANCE 64 +#define GFX_SLICE_PERIOD msecs_to_jiffies(250) + struct amdgpu_gpu_instance { struct amdgpu_device *adev; int mgpu_fan_enabled; @@ -347,9 +349,9 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; -#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ -#define MAX_KIQ_REG_WAIT (amdgpu_sriov_vf(adev) ? 50000 : 5000) /* in usecs, extend for VF */ -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ +#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 int amdgpu_device_ip_set_clockgating_state(void *dev, @@ -823,17 +825,6 @@ struct amdgpu_mqd { struct amdgpu_reset_domain; struct amdgpu_fru_info; -struct amdgpu_reset_info { - /* reset dump register */ - u32 *reset_dump_reg_list; - u32 *reset_dump_reg_value; - int num_regs; - -#ifdef CONFIG_DEV_COREDUMP - struct amdgpu_coredump_info *coredump_info; -#endif -}; - /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. */ @@ -1157,8 +1148,6 @@ struct amdgpu_device { struct mutex benchmark_mutex; - struct amdgpu_reset_info reset_info; - bool scpm_enabled; uint32_t scpm_status; @@ -1175,6 +1164,10 @@ struct amdgpu_device { bool debug_disable_soft_recovery; bool debug_use_vram_fw_buf; bool debug_enable_ras_aca; + + bool enforce_isolation[MAX_XCP]; + /* Added this mutex for cleaner shader isolation between GFX and compute processes */ + struct mutex enforce_isolation_mutex; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, @@ -1587,13 +1580,6 @@ static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif -#if defined(CONFIG_DRM_AMD_DC) -int amdgpu_dm_display_resume(struct amdgpu_device *adev ); -#else -static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; } -#endif - - void amdgpu_register_gpu_instance(struct amdgpu_device *adev); void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 19158cc30f31..57bda66e85ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -80,6 +80,9 @@ static void aca_banks_release(struct aca_banks *banks) { struct aca_bank_node *node, *tmp; + if (list_empty(&banks->list)) + return; + list_for_each_entry_safe(node, tmp, &banks->list, node) { list_del(&node->node); kvfree(node); @@ -453,13 +456,13 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er switch (type) { case ACA_ERROR_TYPE_UE: - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, count); break; case ACA_ERROR_TYPE_CE: - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, count); break; case ACA_ERROR_TYPE_DEFERRED: - amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, count); break; default: break; @@ -562,9 +565,13 @@ static void aca_error_fini(struct aca_error *aerr) struct aca_bank_error *bank_error, *tmp; mutex_lock(&aerr->lock); + if (list_empty(&aerr->list)) + goto out_unlock; + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) aca_bank_error_remove(aerr, bank_error); +out_unlock: mutex_destroy(&aerr->lock); } @@ -680,6 +687,9 @@ static void aca_manager_fini(struct aca_handle_manager *mgr) { struct aca_handle *handle, *tmp; + if (list_empty(&mgr->list)) + return; + list_for_each_entry_safe(handle, tmp, &mgr->list, node) amdgpu_aca_remove_handle(handle); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 03205e3c3746..4f08b153cb66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -364,15 +364,15 @@ allocate_mem_reserve_bo_failed: return r; } -void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj) +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj) { - struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; + struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj; - amdgpu_bo_reserve(bo, true); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unpin(bo); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&(bo)); + amdgpu_bo_reserve(*bo, true); + amdgpu_bo_kunmap(*bo); + amdgpu_bo_unpin(*bo); + amdgpu_bo_unreserve(*bo); + amdgpu_bo_unref(bo); } int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, @@ -783,22 +783,6 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst, int hub_type) -{ - if (!hub_type) { - if (adev->gfxhub.funcs->query_utcl2_poison_status) - return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst); - else - return false; - } else { - if (adev->mmhub.funcs->query_utcl2_poison_status) - return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst); - else - return false; - } -} - int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) { return kgd2kfd_check_and_lock_kfd(); @@ -887,3 +871,21 @@ free_ring_funcs: return r; } + +/* Stop scheduling on KFD */ +int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_stop_sched(adev->kfd.dev, node_id); +} + +/* Start scheduling on KFD */ +int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_start_sched(adev->kfd.dev, node_id); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index e7bb1ca35801..f9d119448442 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -235,7 +235,7 @@ int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); -void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj); +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj); int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, void **mem_obj); void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj); @@ -264,6 +264,8 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, uint32_t *payload); int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, u32 inst); +int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id); +int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when @@ -322,7 +324,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, void **kptr, uint64_t *size); void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo); +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence __rcu **ef); @@ -345,11 +347,9 @@ void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad pasid_notify pasid_fn, void *data, uint32_t reset); bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); -bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); +bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst, int hub_type); int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, @@ -426,6 +426,8 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); int kgd2kfd_check_and_lock_kfd(void); void kgd2kfd_unlock_kfd(void); +int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); #else static inline int kgd2kfd_init(void) { @@ -496,5 +498,15 @@ static inline int kgd2kfd_check_and_lock_kfd(void) static inline void kgd2kfd_unlock_kfd(void) { } + +static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + return 0; +} + +static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + return 0; +} #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index aff08321e976..8dfdb18197c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -191,4 +191,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 2320df51c914..73b2b401b450 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -418,5 +418,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, - .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index a5c7259cf2a3..e2ae714a700f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -541,5 +541,7 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { kgd_gfx_v9_4_3_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode, .set_address_watch = kgd_gfx_v9_4_3_set_address_watch, - .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch + .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 3ab6c3aa0ad1..62176d607bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -1070,6 +1070,20 @@ static void program_trap_handler_settings(struct amdgpu_device *adev, unlock_srbm(adev); } +uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + return 0; +} + +uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -1097,4 +1111,6 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, .program_trap_handler_settings = program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v10_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index 67bcaa3d4226..9efd2dd4fdd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -56,3 +56,12 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); +uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst); +uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst, + unsigned int utimeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 8c8437a4383f..c718bedda0ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -680,5 +680,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode, .set_address_watch = kgd_gfx_v10_set_address_watch, - .clear_address_watch = kgd_gfx_v10_clear_address_watch + .clear_address_watch = kgd_gfx_v10_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v10_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index b61a32d6af4b..a4ba49cb22db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -786,6 +786,20 @@ static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev, return 0; } +static uint64_t kgd_gfx_v11_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + return 0; +} + +static uint64_t kgd_gfx_v11_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .program_sh_mem_settings = program_sh_mem_settings_v11, .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11, @@ -808,5 +822,7 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode, .set_address_watch = kgd_gfx_v11_set_address_watch, - .clear_address_watch = kgd_gfx_v11_clear_address_watch + .clear_address_watch = kgd_gfx_v11_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v11_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v11_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 5a35a8ca8922..c63528a4e894 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1144,6 +1144,105 @@ void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, kgd_gfx_v9_unlock_srbm(adev, inst); } +uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + uint32_t low, high; + uint64_t queue_addr = 0; + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + amdgpu_gfx_rlc_enter_safe_mode(adev, inst); + + if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE)) + goto unlock_out; + + low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE); + high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI); + + /* only concerned with user queues. */ + if (!high) + goto unlock_out; + + queue_addr = (((queue_addr | high) << 32) | low) << 8; + +unlock_out: + amdgpu_gfx_rlc_exit_safe_mode(adev, inst); + kgd_gfx_v9_release_queue(adev, inst); + + return queue_addr; +} + +/* assume queue acquired */ +static int kgd_gfx_v9_hqd_dequeue_wait(struct amdgpu_device *adev, uint32_t inst, + unsigned int utimeout) +{ + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + while (true) { + uint32_t temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE); + + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + return 0; + + if (time_after(jiffies, end_jiffies)) + return -ETIME; + + usleep_range(500, 1000); + } +} + +uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + uint32_t low, high, pipe_reset_data = 0; + uint64_t queue_addr = 0; + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + amdgpu_gfx_rlc_enter_safe_mode(adev, inst); + + if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE)) + goto unlock_out; + + low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE); + high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI); + + /* only concerned with user queues. */ + if (!high) + goto unlock_out; + + queue_addr = (((queue_addr | high) << 32) | low) << 8; + + pr_debug("Attempting queue reset on XCC %i pipe id %i queue id %i\n", + inst, pipe_id, queue_id); + + /* assume previous dequeue request issued will take affect after reset */ + WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1); + + if (!kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout)) + goto unlock_out; + + pr_debug("Attempting pipe reset on XCC %i pipe id %i\n", inst, pipe_id); + + pipe_reset_data = REG_SET_FIELD(pipe_reset_data, CP_MEC_CNTL, MEC_ME1_PIPE0_RESET, 1); + pipe_reset_data = pipe_reset_data << pipe_id; + + WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, pipe_reset_data); + WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, 0); + + if (kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout)) + queue_addr = 0; + +unlock_out: + pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n", + inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!"); + amdgpu_gfx_rlc_exit_safe_mode(adev, inst); + kgd_gfx_v9_release_queue(adev, inst); + + return queue_addr; +} + const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -1172,4 +1271,6 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index ce424615f59b..988c50ac3be0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -101,3 +101,12 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); +uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst); +uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst, + unsigned int utimeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 11672bfe4fad..6d5fd371d5ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1252,7 +1252,7 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, return ret; } -static void unmap_bo_from_gpuvm(struct kgd_mem *mem, +static int unmap_bo_from_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync) { @@ -1260,11 +1260,18 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, struct amdgpu_device *adev = entry->adev; struct amdgpu_vm *vm = bo_va->base.vm; + if (bo_va->queue_refcount) { + pr_debug("bo_va->queue_refcount %d\n", bo_va->queue_refcount); + return -EBUSY; + } + amdgpu_vm_bo_unmap(adev, bo_va, entry->va); amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); amdgpu_sync_fence(sync, bo_va->last_pt_update); + + return 0; } static int update_gpuvm_pte(struct kgd_mem *mem, @@ -2191,7 +2198,10 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", entry->va, entry->va + bo_size, entry); - unmap_bo_from_gpuvm(mem, entry, ctx.sync); + ret = unmap_bo_from_gpuvm(mem, entry, ctx.sync); + if (ret) + goto unreserve_out; + entry->is_mapped = false; mem->mapped_to_gpu_memory--; @@ -2226,11 +2236,12 @@ int amdgpu_amdkfd_gpuvm_sync_memory( /** * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count * @bo: Buffer object to be mapped + * @bo_gart: Return bo reference * * Before return, bo reference count is incremented. To release the reference and unpin/ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. */ -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo) +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart) { int ret; @@ -2257,7 +2268,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo) amdgpu_bo_unreserve(bo); - bo = amdgpu_bo_ref(bo); + *bo_gart = amdgpu_bo_ref(bo); return 0; @@ -3200,12 +3211,13 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem) +bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem) { + struct amdgpu_vm *vm = drm_priv_to_vm(drm_priv); struct kfd_mem_attachment *entry; list_for_each_entry(entry, &mem->attachments, list) { - if (entry->is_mapped && entry->adev == adev) + if (entry->is_mapped && entry->bo_va->base.vm == vm) return true; } return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 7dc102f0bc1d..0c8975ac5af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1018,8 +1018,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (clock_type == COMPUTE_ENGINE_PLL_PARAM) { args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_div = args.v3.ucPostDiv; dividers->enable_post_div = (args.v3.ucCntlFlag & @@ -1039,8 +1040,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (strobe_mode) args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_div = args.v5.ucPostDiv; dividers->enable_post_div = (args.v5.ucCntlFlag & @@ -1058,8 +1060,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, /* fusion */ args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_divider = dividers->post_div = args.v4.ucPostDiv; dividers->real_clock = le32_to_cpu(args.v4.ulClock); @@ -1070,8 +1073,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, args.v6_in.ulClock.ulComputeClockFlag = clock_type; args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv); dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac); @@ -1113,8 +1117,9 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev, if (strobe_mode) args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac); mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv); @@ -1211,8 +1216,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v2.ucVoltageMode = 0; args.v2.usVoltageLevel = 0; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; *voltage = le16_to_cpu(args.v2.usVoltageLevel); break; @@ -1221,8 +1227,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL; args.v3.usVoltageLevel = cpu_to_le16(voltage_id); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; *voltage = le16_to_cpu(args.v3.usVoltageLevel); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 618e469e3622..42e64bce661e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -431,6 +431,11 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) goto success; } + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + if (amdgpu_read_bios(adev)) { dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); goto success; @@ -446,11 +451,6 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) goto success; } - if (amdgpu_read_platform_bios(adev)) { - dev_info(adev->dev, "Fetched VBIOS from platform\n"); - goto success; - } - dev_err(adev->dev, "Unable to locate a BIOS ROM\n"); return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index c3d89088123d..16153d275d7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -414,7 +414,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->pm.fw, "%s", fw_name); if (err) { DRM_ERROR("Failed to load firmware \"%s\"", fw_name); amdgpu_ucode_release(&adev->pm.fw); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index cae7479c3ecf..344e0a9ee08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -249,11 +249,7 @@ amdgpu_connector_find_encoder(struct drm_connector *connector, static struct edid * amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev) { - if (adev->mode_info.bios_hardcoded_edid) { - return kmemdup((unsigned char *)adev->mode_info.bios_hardcoded_edid, - adev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL); - } - return NULL; + return drm_edid_duplicate(drm_edid_raw(adev->mode_info.bios_hardcoded_edid)); } static void amdgpu_connector_get_edid(struct drm_connector *connector) @@ -442,6 +438,9 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, continue; mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + if (!mode) + return; + drm_mode_probed_add(connector, mode); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 6dfdff58bffd..1e475eb01417 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -263,6 +263,10 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, if (size < sizeof(struct drm_amdgpu_bo_list_in)) goto free_partial_kdata; + /* Only a single BO list is allowed to simplify handling. */ + if (p->bo_list) + ret = -EINVAL; + ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); if (ret) goto free_partial_kdata; @@ -292,6 +296,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, num_ibs[i], &p->jobs[i]); if (ret) goto free_all_kdata; + p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id]; } p->gang_leader = p->jobs[p->gang_leader_idx]; @@ -1106,7 +1111,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) struct drm_gpu_scheduler *sched = entity->rq->sched; struct amdgpu_ring *ring = to_amdgpu_ring(sched); - if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) + if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub)) return -EINVAL; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 0e1a11b6b989..cbef720de779 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2026,100 +2026,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL, DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL, amdgpu_debugfs_sclk_set, "%llu\n"); -static ssize_t amdgpu_reset_dump_register_list_read(struct file *f, - char __user *buf, size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; - char reg_offset[12]; - int i, ret, len = 0; - - if (*pos) - return 0; - - memset(reg_offset, 0, 12); - ret = down_read_killable(&adev->reset_domain->sem); - if (ret) - return ret; - - for (i = 0; i < adev->reset_info.num_regs; i++) { - sprintf(reg_offset, "0x%x\n", adev->reset_info.reset_dump_reg_list[i]); - up_read(&adev->reset_domain->sem); - if (copy_to_user(buf + len, reg_offset, strlen(reg_offset))) - return -EFAULT; - - len += strlen(reg_offset); - ret = down_read_killable(&adev->reset_domain->sem); - if (ret) - return ret; - } - - up_read(&adev->reset_domain->sem); - *pos += len; - - return len; -} - -static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, - const char __user *buf, size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; - char reg_offset[11]; - uint32_t *new = NULL, *tmp = NULL; - unsigned int len = 0; - int ret, i = 0; - - do { - memset(reg_offset, 0, 11); - if (copy_from_user(reg_offset, buf + len, - min(10, (size-len)))) { - ret = -EFAULT; - goto error_free; - } - - new = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto error_free; - } - tmp = new; - if (sscanf(reg_offset, "%X %n", &tmp[i], &ret) != 1) { - ret = -EINVAL; - goto error_free; - } - - len += ret; - i++; - } while (len < size); - - new = kmalloc_array(i, sizeof(uint32_t), GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto error_free; - } - ret = down_write_killable(&adev->reset_domain->sem); - if (ret) - goto error_free; - - swap(adev->reset_info.reset_dump_reg_list, tmp); - swap(adev->reset_info.reset_dump_reg_value, new); - adev->reset_info.num_regs = i; - up_write(&adev->reset_domain->sem); - ret = size; - -error_free: - if (tmp != new) - kfree(tmp); - kfree(new); - return ret; -} - -static const struct file_operations amdgpu_reset_dump_register_list = { - .owner = THIS_MODULE, - .read = amdgpu_reset_dump_register_list_read, - .write = amdgpu_reset_dump_register_list_write, - .llseek = default_llseek -}; - int amdgpu_debugfs_init(struct amdgpu_device *adev) { struct dentry *root = adev_to_drm(adev)->primary->debugfs_root; @@ -2204,8 +2110,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) &amdgpu_debugfs_vm_info_fops); debugfs_create_file("amdgpu_benchmark", 0200, root, adev, &amdgpu_benchmark_fops); - debugfs_create_file("amdgpu_reset_dump_register_list", 0644, root, adev, - &amdgpu_reset_dump_register_list); adev->debugfs_vbios_blob.data = adev->bios; adev->debugfs_vbios_blob.size = adev->bios_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index f0a44d0dec27..cf2b4dd4d865 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -203,7 +203,7 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, struct amdgpu_coredump_info *coredump = data; struct drm_print_iterator iter; struct amdgpu_vm_fault_info *fault_info; - int i, ver; + int ver; iter.data = buffer; iter.offset = 0; @@ -236,7 +236,7 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, drm_printf(&p, "\nSOC Memory Information\n"); drm_printf(&p, "real vram size: %llu\n", coredump->adev->gmc.real_vram_size); drm_printf(&p, "visible vram size: %llu\n", coredump->adev->gmc.visible_vram_size); - drm_printf(&p, "visible vram size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size); + drm_printf(&p, "gtt size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size); /* GDS Config */ drm_printf(&p, "\nGDS Config\n"); @@ -317,14 +317,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->reset_info.num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->reset_info.num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_info.reset_dump_reg_list[i], - coredump->adev->reset_info.reset_dump_reg_value[i]); - } return count - iter.remain; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1cd7d355689c..49ef22dcf7fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1916,6 +1916,8 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) */ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { + int i; + if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -1970,6 +1972,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + for (i = 0; i < MAX_XCP; i++) + adev->enforce_isolation[i] = !!enforce_isolation; + return 0; } @@ -2471,6 +2476,7 @@ out: */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { + struct amdgpu_ip_block *ip_block; struct pci_dev *parent; int i, r; bool total; @@ -2608,7 +2614,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!total) return -ENODEV; - amdgpu_amdkfd_device_probe(adev); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + if (ip_block->status.valid != false) + amdgpu_amdkfd_device_probe(adev); + adev->cg_flags &= amdgpu_cg_mask; adev->pg_flags &= amdgpu_pg_mask; @@ -3948,6 +3957,27 @@ static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) adev->ram_is_direct_mapped = true; } +#if defined(CONFIG_HSA_AMD_P2P) +/** + * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled. + * + * @adev: amdgpu_device pointer + * + * return if IOMMU remapping bar address + */ +static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(adev->dev); + if (domain && (domain->type == IOMMU_DOMAIN_DMA || + domain->type == IOMMU_DOMAIN_DMA_FQ)) + return true; + + return false; +} +#endif + static const struct attribute *amdgpu_dev_attributes[] = { &dev_attr_pcie_replay_count.attr, NULL @@ -4055,6 +4085,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->notifier_lock); mutex_init(&adev->pm.stable_pstate_ctx_lock); mutex_init(&adev->benchmark_mutex); + mutex_init(&adev->gfx.reset_sem_mutex); + /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ + mutex_init(&adev->enforce_isolation_mutex); + mutex_init(&adev->gfx.kfd_sch_mutex); amdgpu_device_init_apu_flags(adev); @@ -4086,6 +4120,21 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, amdgpu_device_delay_enable_gfx_off); + /* + * Initialize the enforce_isolation work structures for each XCP + * partition. This work handler is responsible for enforcing shader + * isolation on AMD GPUs. It counts the number of emitted fences for + * each GFX and compute ring. If there are any fences, it schedules + * the `enforce_isolation_work` to be run after a delay. If there are + * no fences, it signals the Kernel Fusion Driver (KFD) to resume the + * runqueue. + */ + for (i = 0; i < MAX_XCP; i++) { + INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work, + amdgpu_gfx_enforce_isolation_handler); + adev->gfx.enforce_isolation[i].adev = adev; + adev->gfx.enforce_isolation[i].xcp_id = i; + } INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); @@ -5278,16 +5327,15 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, { int i, r = 0; struct amdgpu_job *job = NULL; + struct amdgpu_device *tmp_adev = reset_context->reset_req_dev; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); if (reset_context->reset_req_dev == adev) job = reset_context->job; - if (amdgpu_sriov_vf(adev)) { - /* stop the data exchange thread */ - amdgpu_virt_fini_data_exchange(adev); - } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_pre_reset(adev); amdgpu_fence_driver_isr_toggle(adev, true); @@ -5336,6 +5384,16 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, } } + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { + dev_info(tmp_adev->dev, "Dumping IP State\n"); + /* Trigger ip dump before we reset the asic */ + for (i = 0; i < tmp_adev->num_ip_blocks; i++) + if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) + tmp_adev->ip_blocks[i].version->funcs + ->dump_ip_state((void *)tmp_adev); + dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); + } + if (need_full_reset) r = amdgpu_device_ip_suspend(adev); if (need_full_reset) @@ -5348,47 +5406,17 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, return r; } -static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) -{ - int i; - - lockdep_assert_held(&adev->reset_domain->sem); - - for (i = 0; i < adev->reset_info.num_regs; i++) { - adev->reset_info.reset_dump_reg_value[i] = - RREG32(adev->reset_info.reset_dump_reg_list[i]); - - trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], - adev->reset_info.reset_dump_reg_value[i]); - } - - return 0; -} - int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) { struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; - uint32_t i; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); - if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { - amdgpu_reset_reg_dumps(tmp_adev); - - dev_info(tmp_adev->dev, "Dumping IP State\n"); - /* Trigger ip dump before we reset the asic */ - for (i = 0; i < tmp_adev->num_ip_blocks; i++) - if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) - tmp_adev->ip_blocks[i].version->funcs - ->dump_ip_state((void *)tmp_adev); - dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); - } - reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ @@ -5513,7 +5541,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, * bad_page_threshold value to fix this once * probing driver again. */ - if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) { + if (!amdgpu_ras_is_rma(tmp_adev)) { /* must succeed. */ amdgpu_ras_resume(tmp_adev); } else { @@ -5891,8 +5919,14 @@ skip_hw_reset: tmp_adev->asic_reset_res = 0; if (r) { - /* bad news, how to tell it to userspace ? */ - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); + /* bad news, how to tell it to userspace ? + * for ras error, we should report GPU bad status instead of + * reset failure + */ + if (reset_context->src != AMDGPU_RESET_SRC_RAS || + !amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) + dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", + atomic_read(&tmp_adev->gpu_reset_counter)); amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); @@ -6138,18 +6172,24 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev) { #ifdef CONFIG_HSA_AMD_P2P - uint64_t address_mask = peer_adev->dev->dma_mask ? - ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); - resource_size_t aper_limit = - adev->gmc.aper_base + adev->gmc.aper_size - 1; bool p2p_access = !adev->gmc.xgmi.connected_to_cpu && !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); - return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && - adev->gmc.real_vram_size == adev->gmc.visible_vram_size && - !(adev->gmc.aper_base & address_mask || - aper_limit & address_mask)); + bool is_large_bar = adev->gmc.visible_vram_size && + adev->gmc.real_vram_size == adev->gmc.visible_vram_size; + bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev); + + if (!p2p_addressable) { + uint64_t address_mask = peer_adev->dev->dma_mask ? + ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); + resource_size_t aper_limit = + adev->gmc.aper_base + adev->gmc.aper_size - 1; + + p2p_addressable = !(adev->gmc.aper_base & address_mask || + aper_limit & address_mask); + } + return is_large_bar && p2p_access && p2p_addressable; #else return false; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 7b561e8e3caf..4bd61c169ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1500,6 +1500,7 @@ union gc_info { struct gc_info_v1_0 v1; struct gc_info_v1_1 v1_1; struct gc_info_v1_2 v1_2; + struct gc_info_v1_3 v1_3; struct gc_info_v2_0 v2; struct gc_info_v2_1 v2_1; }; @@ -1558,6 +1559,16 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance); adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu); } + if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) { + adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu); + adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size); + adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc); + adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size); + adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc); + adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size); + adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size); + adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size); + } break; case 2: adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 8283d682f543..7cc980bf4725 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -55,8 +55,6 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); -void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev); void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c770cb201e64..b4efeef848de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -24,10 +24,13 @@ */ #include +#include + #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "amdgpu_xcp.h" #include "amdgpu_xgmi.h" @@ -882,8 +885,11 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r int r; if (amdgpu_ras_is_supported(adev, ras_block->block)) { - if (!amdgpu_persistent_edc_harvesting_supported(adev)) - amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + if (!amdgpu_persistent_edc_harvesting_supported(adev)) { + r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + if (r) + return r; + } r = amdgpu_ras_block_late_init(adev, ras_block); if (r) @@ -1027,7 +1033,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ pr_err("critical bug! too many kiq readers\n"); goto failed_unlock; } - amdgpu_ring_alloc(ring, 32); + r = amdgpu_ring_alloc(ring, 32); + if (r) + goto failed_unlock; + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) @@ -1093,7 +1102,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 } spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); + r = amdgpu_ring_alloc(ring, 32); + if (r) + goto failed_unlock; + amdgpu_ring_emit_wreg(ring, reg, v); r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) @@ -1129,6 +1141,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 failed_undo: amdgpu_ring_undo(ring); +failed_unlock: spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_write: dev_err(adev->dev, "failed to write reg:%x\n", reg); @@ -1381,6 +1394,214 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, return sysfs_emit(buf, "%s\n", supported_partition); } +static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + long timeout = msecs_to_jiffies(1000); + struct dma_fence *f = NULL; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + int i, r; + + r = amdgpu_job_alloc_with_ib(adev, NULL, NULL, + 64, AMDGPU_IB_POOL_DIRECT, + &job); + if (r) + goto err; + + job->enforce_isolation = true; + + ib = &job->ibs[0]; + for (i = 0; i <= ring->funcs->align_mask; ++i) + ib->ptr[i] = ring->funcs->nop; + ib->length_dw = ring->funcs->align_mask + 1; + + r = amdgpu_job_submit_direct(job, ring, &f); + if (r) + goto err_free; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) + r = -ETIMEDOUT; + else if (r > 0) + r = 0; + + amdgpu_ib_free(adev, ib, f); + dma_fence_put(f); + + return 0; + +err_free: + amdgpu_job_free(job); + amdgpu_ib_free(adev, ib, f); +err: + return r; +} + +static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id) +{ + int num_xcc = NUM_XCC(adev->gfx.xcc_mask); + struct amdgpu_ring *ring; + int num_xcc_to_clear; + int i, r, xcc_id; + + if (adev->gfx.num_xcc_per_xcp) + num_xcc_to_clear = adev->gfx.num_xcc_per_xcp; + else + num_xcc_to_clear = 1; + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; + if ((ring->xcp_id == xcp_id) && ring->sched.ready) { + r = amdgpu_gfx_run_cleaner_shader_job(ring); + if (r) + return r; + num_xcc_to_clear--; + break; + } + } + } + + if (num_xcc_to_clear) + return -ENOENT; + + return 0; +} + +static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int ret; + long value; + + if (amdgpu_in_reset(adev)) + return -EPERM; + if (adev->in_suspend && !adev->in_runpm) + return -EPERM; + + ret = kstrtol(buf, 0, &value); + + if (ret) + return -EINVAL; + + if (value < 0) + return -EINVAL; + + if (adev->xcp_mgr) { + if (value >= adev->xcp_mgr->num_xcps) + return -EINVAL; + } else { + if (value > 1) + return -EINVAL; + } + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + ret = amdgpu_gfx_run_cleaner_shader(adev, value); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + if (ret) + return ret; + + return count; +} + +static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int i; + ssize_t size = 0; + + if (adev->xcp_mgr) { + for (i = 0; i < adev->xcp_mgr->num_xcps; i++) { + size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]); + if (i < (adev->xcp_mgr->num_xcps - 1)) + size += sysfs_emit_at(buf, size, " "); + } + buf[size++] = '\n'; + } else { + size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]); + } + + return size; +} + +static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + long partition_values[MAX_XCP] = {0}; + int ret, i, num_partitions; + const char *input_buf = buf; + + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + ret = sscanf(input_buf, "%ld", &partition_values[i]); + if (ret <= 0) + break; + + /* Move the pointer to the next value in the string */ + input_buf = strchr(input_buf, ' '); + if (input_buf) { + input_buf++; + } else { + i++; + break; + } + } + num_partitions = i; + + if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps) + return -EINVAL; + + if (!adev->xcp_mgr && num_partitions != 1) + return -EINVAL; + + for (i = 0; i < num_partitions; i++) { + if (partition_values[i] != 0 && partition_values[i] != 1) + return -EINVAL; + } + + mutex_lock(&adev->enforce_isolation_mutex); + + for (i = 0; i < num_partitions; i++) { + if (adev->enforce_isolation[i] && !partition_values[i]) { + /* Going from enabled to disabled */ + amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); + } else if (!adev->enforce_isolation[i] && partition_values[i]) { + /* Going from disabled to enabled */ + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + } + adev->enforce_isolation[i] = partition_values[i]; + } + + mutex_unlock(&adev->enforce_isolation_mutex); + + return count; +} + +static DEVICE_ATTR(run_cleaner_shader, 0200, + NULL, amdgpu_gfx_set_run_cleaner_shader); + +static DEVICE_ATTR(enforce_isolation, 0644, + amdgpu_gfx_get_enforce_isolation, + amdgpu_gfx_set_enforce_isolation); + static DEVICE_ATTR(current_compute_partition, 0644, amdgpu_gfx_get_current_compute_partition, amdgpu_gfx_set_compute_partition); @@ -1406,3 +1627,229 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_current_compute_partition); device_remove_file(adev->dev, &dev_attr_available_compute_partition); } + +int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) +{ + int r; + + if (!amdgpu_sriov_vf(adev)) { + r = device_create_file(adev->dev, &dev_attr_enforce_isolation); + if (r) + return r; + } + + r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); + if (r) + return r; + + return 0; +} + +void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev)) + device_remove_file(adev->dev, &dev_attr_enforce_isolation); + device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); +} + +int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size) +{ + if (!adev->gfx.enable_cleaner_shader) + return -EOPNOTSUPP; + + return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.cleaner_shader_obj, + &adev->gfx.cleaner_shader_gpu_addr, + (void **)&adev->gfx.cleaner_shader_cpu_ptr); +} + +void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev) +{ + if (!adev->gfx.enable_cleaner_shader) + return; + + amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj, + &adev->gfx.cleaner_shader_gpu_addr, + (void **)&adev->gfx.cleaner_shader_cpu_ptr); +} + +void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size, + const void *cleaner_shader_ptr) +{ + if (!adev->gfx.enable_cleaner_shader) + return; + + if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr) + memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr, + cleaner_shader_size); +} + +/** + * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver) + * @adev: amdgpu_device pointer + * @idx: Index of the scheduler to control + * @enable: Whether to enable or disable the KFD scheduler + * + * This function is used to control the KFD (Kernel Fusion Driver) scheduler + * from the KGD. It is part of the cleaner shader feature. This function plays + * a key role in enforcing process isolation on the GPU. + * + * The function uses a reference count mechanism (kfd_sch_req_count) to keep + * track of the number of requests to enable the KFD scheduler. When a request + * to enable the KFD scheduler is made, the reference count is decremented. + * When the reference count reaches zero, a delayed work is scheduled to + * enforce isolation after a delay of GFX_SLICE_PERIOD. + * + * When a request to disable the KFD scheduler is made, the function first + * checks if the reference count is zero. If it is, it cancels the delayed work + * for enforcing isolation and checks if the KFD scheduler is active. If the + * KFD scheduler is active, it sends a request to stop the KFD scheduler and + * sets the KFD scheduler state to inactive. Then, it increments the reference + * count. + * + * The function is synchronized using the kfd_sch_mutex to ensure that the KFD + * scheduler state and reference count are updated atomically. + * + * Note: If the reference count is already zero when a request to enable the + * KFD scheduler is made, it means there's an imbalance bug somewhere. The + * function triggers a warning in this case. + */ +static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, + bool enable) +{ + mutex_lock(&adev->gfx.kfd_sch_mutex); + + if (enable) { + /* If the count is already 0, it means there's an imbalance bug somewhere. + * Note that the bug may be in a different caller than the one which triggers the + * WARN_ON_ONCE. + */ + if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) { + dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n"); + goto unlock; + } + + adev->gfx.kfd_sch_req_count[idx]--; + + if (adev->gfx.kfd_sch_req_count[idx] == 0 && + adev->gfx.kfd_sch_inactive[idx]) { + schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, + GFX_SLICE_PERIOD); + } + } else { + if (adev->gfx.kfd_sch_req_count[idx] == 0) { + cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); + if (!adev->gfx.kfd_sch_inactive[idx]) { + amdgpu_amdkfd_stop_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = true; + } + } + + adev->gfx.kfd_sch_req_count[idx]++; + } + +unlock: + mutex_unlock(&adev->gfx.kfd_sch_mutex); +} + +/** + * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation + * + * @work: work_struct. + * + * This function is the work handler for enforcing shader isolation on AMD GPUs. + * It counts the number of emitted fences for each GFX and compute ring. If there + * are any fences, it schedules the `enforce_isolation_work` to be run after a + * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion + * Driver (KFD) to resume the runqueue. The function is synchronized using the + * `enforce_isolation_mutex`. + */ +void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) +{ + struct amdgpu_isolation_work *isolation_work = + container_of(work, struct amdgpu_isolation_work, work.work); + struct amdgpu_device *adev = isolation_work->adev; + u32 i, idx, fences = 0; + + if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = isolation_work->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) { + if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id) + fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]); + } + for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) { + if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id) + fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); + } + if (fences) { + schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, + GFX_SLICE_PERIOD); + } else { + /* Tell KFD to resume the runqueue */ + if (adev->kfd.init_complete) { + WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); + WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); + amdgpu_amdkfd_start_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = false; + } + } + mutex_unlock(&adev->enforce_isolation_mutex); +} + +void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 idx; + + if (!adev->gfx.enable_cleaner_shader) + return; + + if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = ring->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + if (adev->kfd.init_complete) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); + } + mutex_unlock(&adev->enforce_isolation_mutex); +} + +void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 idx; + + if (!adev->gfx.enable_cleaner_shader) + return; + + if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = ring->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + if (adev->kfd.init_complete) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); + } + mutex_unlock(&adev->enforce_isolation_mutex); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ddda94e49db4..5644e10a86a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -34,6 +34,7 @@ #include "soc15.h" #include "amdgpu_ras.h" #include "amdgpu_ring_mux.h" +#include "amdgpu_xcp.h" /* GFX current status */ #define AMDGPU_GFX_NORMAL_MODE 0x00000000L @@ -138,6 +139,10 @@ struct kiq_pm4_funcs { void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, uint16_t pasid, uint32_t flush_type, bool all_hub); + void (*kiq_reset_hw_queue)(struct amdgpu_ring *kiq_ring, + uint32_t queue_type, uint32_t me_id, + uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid); /* Packet sizes */ int set_resources_size; int map_queues_size; @@ -240,6 +245,12 @@ struct amdgpu_gfx_config { uint32_t gc_tcp_size_per_cu; uint32_t gc_num_cu_per_sqc; uint32_t gc_tcc_size; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_cache_line_size; }; struct amdgpu_cu_info { @@ -339,6 +350,12 @@ struct amdgpu_me { DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); }; +struct amdgpu_isolation_work { + struct amdgpu_device *adev; + u32 xcp_id; + struct delayed_work work; +}; + struct amdgpu_gfx { struct mutex gpu_clock_mutex; struct amdgpu_gfx_config config; @@ -391,6 +408,7 @@ struct amdgpu_gfx { struct amdgpu_irq_src eop_irq; struct amdgpu_irq_src priv_reg_irq; struct amdgpu_irq_src priv_inst_irq; + struct amdgpu_irq_src bad_op_irq; struct amdgpu_irq_src cp_ecc_error_irq; struct amdgpu_irq_src sq_irq; struct amdgpu_irq_src rlc_gc_fed_irq; @@ -439,6 +457,21 @@ struct amdgpu_gfx { uint32_t *ip_dump_core; uint32_t *ip_dump_compute_queues; uint32_t *ip_dump_gfx_queues; + + struct mutex reset_sem_mutex; + + /* cleaner shader */ + struct amdgpu_bo *cleaner_shader_obj; + unsigned int cleaner_shader_size; + u64 cleaner_shader_gpu_addr; + void *cleaner_shader_cpu_ptr; + const void *cleaner_shader_ptr; + bool enable_cleaner_shader; + struct amdgpu_isolation_work enforce_isolation[MAX_XCP]; + /* Mutex for synchronizing KFD scheduler operations */ + struct mutex kfd_sch_mutex; + u64 kfd_sch_req_count[MAX_XCP]; + bool kfd_sch_inactive[MAX_XCP]; }; struct amdgpu_gfx_ras_reg_entry { @@ -540,6 +573,17 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, void *ras_error_status, void (*func)(struct amdgpu_device *adev, void *ras_error_status, int xcc_id)); +int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size); +void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev); +void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size, + const void *cleaner_shader_ptr); +int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev); +void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); +void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); +void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index 103a837ccc71..c7b44aeb671b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -38,8 +38,6 @@ struct amdgpu_gfxhub_funcs { void (*mode2_save_regs)(struct amdgpu_device *adev); void (*mode2_restore_regs)(struct amdgpu_device *adev); void (*halt)(struct amdgpu_device *adev); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, - int xcc_id); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index b49b3650fd62..17a19d49d30a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -786,7 +786,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, goto failed_kiq; might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY && + !amdgpu_reset_pending(adev->reset_domain)) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index b6a8bddada4c..92d27d32de41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -424,7 +424,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (amdgpu_vmid_uses_reserved(vm, vmhub)) { + if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -476,15 +476,19 @@ error: /* * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID + * @adev: amdgpu_device pointer * @vm: the VM to check * @vmhub: the VMHUB which will be used * * Returns: True if the VM will use a reserved VMID. */ -bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) +bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, unsigned int vmhub) { return vm->reserved_vmid[vmhub] || - (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0))); + (adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ? + vm->root.bo->xcp_id : 0] && + AMDGPU_IS_GFXHUB(vmhub)); } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, @@ -600,9 +604,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) } } /* alloc a default reserved vmid to enforce isolation */ - if (enforce_isolation) - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); - + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + if (adev->enforce_isolation[i]) + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + } } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 240fa6751260..4012fb2dd08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,7 +78,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); -bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); +bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h index 44e2ea8c9728..b03664c66dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h @@ -49,6 +49,7 @@ struct amdgpu_isp { const struct isp_funcs *funcs; struct mfd_cell *isp_cell; struct resource *isp_res; + struct resource *isp_i2c_res; struct isp_platform_data *isp_pdata; unsigned int harvest_config; const struct firmware *fw; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 908e13455152..597489dea114 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -72,6 +72,26 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) dma_fence_set_error(&s_job->s_fence->finished, -ETIME); + /* attempt a per ring reset */ + if (amdgpu_gpu_recovery && + ring->funcs->reset) { + /* stop the scheduler, but don't mess with the + * bad job yet because if ring reset fails + * we'll fall back to full GPU reset. + */ + drm_sched_wqueue_stop(&ring->sched); + r = amdgpu_ring_reset(ring, job->vmid); + if (!r) { + if (amdgpu_ring_sched_ready(ring)) + drm_sched_stop(&ring->sched, s_job); + atomic_inc(&ring->adev->gpu_reset_counter); + amdgpu_fence_driver_force_completion(ring); + if (amdgpu_ring_sched_ready(ring)) + drm_sched_start(&ring->sched); + goto exit; + } + } + if (amdgpu_device_should_recover_gpu(ring->adev)) { struct amdgpu_reset_context reset_context; memset(&reset_context, 0, sizeof(reset_context)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index a963a25ddd62..ce6b9ba967ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -76,6 +76,9 @@ struct amdgpu_job { /* job_run_counter >= 1 means a resubmit job */ uint32_t job_run_counter; + /* enforce isolation */ + bool enforce_isolation; + uint32_t num_ibs; struct amdgpu_ib ibs[]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 0a799942343d..016a6f6c4267 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -43,6 +43,7 @@ #include "amdgpu_gem.h" #include "amdgpu_display.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "amd_pcie.h" void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) @@ -778,6 +779,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ? -EFAULT : 0; } case AMDGPU_INFO_READ_MMR_REG: { + int ret = 0; unsigned int n, alloc_size; uint32_t *regs; unsigned int se_num = (info->read_mmr_reg.instance >> @@ -787,24 +789,37 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) AMDGPU_INFO_MMR_SH_INDEX_SHIFT) & AMDGPU_INFO_MMR_SH_INDEX_MASK; + if (!down_read_trylock(&adev->reset_domain->sem)) + return -ENOENT; + /* set full masks if the userspace set all bits * in the bitfields */ - if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) + if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) { se_num = 0xffffffff; - else if (se_num >= AMDGPU_GFX_MAX_SE) - return -EINVAL; - if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) - sh_num = 0xffffffff; - else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) - return -EINVAL; + } else if (se_num >= AMDGPU_GFX_MAX_SE) { + ret = -EINVAL; + goto out; + } - if (info->read_mmr_reg.count > 128) - return -EINVAL; + if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) { + sh_num = 0xffffffff; + } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) { + ret = -EINVAL; + goto out; + } + + if (info->read_mmr_reg.count > 128) { + ret = -EINVAL; + goto out; + } regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); - if (!regs) - return -ENOMEM; + if (!regs) { + ret = -ENOMEM; + goto out; + } + alloc_size = info->read_mmr_reg.count * sizeof(*regs); amdgpu_gfx_off_ctrl(adev, false); @@ -816,13 +831,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) info->read_mmr_reg.dword_offset + i); kfree(regs); amdgpu_gfx_off_ctrl(adev, true); - return -EFAULT; + ret = -EFAULT; + goto out; } } amdgpu_gfx_off_ctrl(adev, true); n = copy_to_user(out, regs, min(size, alloc_size)); kfree(regs); - return n ? -EFAULT : 0; + ret = (n ? -EFAULT : 0); +out: + up_read(&adev->reset_domain->sem); + return ret; } case AMDGPU_INFO_DEV_INFO: { struct drm_amdgpu_info_device *dev_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 2542bd7aa7c7..18ee60378727 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -396,7 +396,6 @@ static int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set, struct ras_err_data *err_data) { - struct ras_err_addr err_addr; struct amdgpu_smuio_mcm_config_info mcm_info; struct mca_bank_node *node, *tmp; struct mca_bank_entry *entry; @@ -421,27 +420,20 @@ static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_r continue; memset(&mcm_info, 0, sizeof(mcm_info)); - memset(&err_addr, 0, sizeof(err_addr)); mcm_info.socket_id = entry->info.socket_id; mcm_info.die_id = entry->info.aid; - if (blk == AMDGPU_RAS_BLOCK__UMC) { - err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS]; - err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID]; - err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR]; - } - if (type == AMDGPU_MCA_ERROR_TYPE_UE) { amdgpu_ras_error_statistic_ue_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); } else { if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS])) amdgpu_ras_error_statistic_de_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); else amdgpu_ras_error_statistic_ce_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); } amdgpu_mca_bank_set_remove_node(mca_set, node); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 1cb1ec7beefe..44c74a08987d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -501,60 +501,50 @@ int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) int amdgpu_mes_suspend(struct amdgpu_device *adev) { - struct idr *idp; - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; struct mes_suspend_gang_input input; - int r, pasid; + int r; + + if (!amdgpu_mes_suspend_resume_all_supported(adev)) + return 0; + + memset(&input, 0x0, sizeof(struct mes_suspend_gang_input)); + input.suspend_all_gangs = 1; /* * Avoid taking any other locks under MES lock to avoid circular * lock dependencies. */ amdgpu_mes_lock(&adev->mes); - - idp = &adev->mes.pasid_idr; - - idr_for_each_entry(idp, process, pasid) { - list_for_each_entry(gang, &process->gang_list, list) { - r = adev->mes.funcs->suspend_gang(&adev->mes, &input); - if (r) - DRM_ERROR("failed to suspend pasid %d gangid %d", - pasid, gang->gang_id); - } - } - + r = adev->mes.funcs->suspend_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); - return 0; + if (r) + DRM_ERROR("failed to suspend all gangs"); + + return r; } int amdgpu_mes_resume(struct amdgpu_device *adev) { - struct idr *idp; - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; struct mes_resume_gang_input input; - int r, pasid; + int r; + + if (!amdgpu_mes_suspend_resume_all_supported(adev)) + return 0; + + memset(&input, 0x0, sizeof(struct mes_resume_gang_input)); + input.resume_all_gangs = 1; /* * Avoid taking any other locks under MES lock to avoid circular * lock dependencies. */ amdgpu_mes_lock(&adev->mes); - - idp = &adev->mes.pasid_idr; - - idr_for_each_entry(idp, process, pasid) { - list_for_each_entry(gang, &process->gang_list, list) { - r = adev->mes.funcs->resume_gang(&adev->mes, &input); - if (r) - DRM_ERROR("failed to resume pasid %d gangid %d", - pasid, gang->gang_id); - } - } - + r = adev->mes.funcs->resume_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); - return 0; + if (r) + DRM_ERROR("failed to resume all gangs"); + + return r; } static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, @@ -793,6 +783,49 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) return 0; } +int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id) +{ + unsigned long flags; + struct amdgpu_mes_queue *queue; + struct amdgpu_mes_gang *gang; + struct mes_reset_queue_input queue_input; + int r; + + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); + + /* remove the mes gang from idr list */ + spin_lock_irqsave(&adev->mes.queue_id_lock, flags); + + queue = idr_find(&adev->mes.queue_id_idr, queue_id); + if (!queue) { + spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); + amdgpu_mes_unlock(&adev->mes); + DRM_ERROR("queue id %d doesn't exist\n", queue_id); + return -EINVAL; + } + spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); + + DRM_DEBUG("try to reset queue, doorbell off = 0x%llx\n", + queue->doorbell_off); + + gang = queue->gang; + queue_input.doorbell_offset = queue->doorbell_off; + queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; + + r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to reset hardware queue, queue id = %d\n", + queue_id); + + amdgpu_mes_unlock(&adev->mes); + + return 0; +} + int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -838,6 +871,30 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, return r; } +int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct mes_reset_legacy_queue_input queue_input; + int r; + + memset(&queue_input, 0, sizeof(queue_input)); + + queue_input.queue_type = ring->funcs->type; + queue_input.doorbell_offset = ring->doorbell_index; + queue_input.pipe_id = ring->pipe; + queue_input.queue_id = ring->queue; + queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + queue_input.wptr_addr = ring->wptr_gpu_addr; + queue_input.vmid = vmid; + + r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to reset legacy queue\n"); + + return r; +} + uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) { struct mes_misc_op_input op_input; @@ -1533,7 +1590,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); } - r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); + r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], "%s", fw_name); if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], @@ -1584,6 +1641,19 @@ out: return r; } +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) +{ + uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + bool is_supported = false; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && + mes_rev >= 0x63) + is_supported = true; + + return is_supported; +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 0bc837dab578..a5b1ea60cac8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -248,6 +248,11 @@ struct mes_remove_queue_input { uint64_t gang_context_addr; }; +struct mes_reset_queue_input { + uint32_t doorbell_offset; + uint64_t gang_context_addr; +}; + struct mes_map_legacy_queue_input { uint32_t queue_type; uint32_t doorbell_offset; @@ -279,6 +284,16 @@ struct mes_resume_gang_input { uint64_t gang_context_addr; }; +struct mes_reset_legacy_queue_input { + uint32_t queue_type; + uint32_t doorbell_offset; + uint32_t pipe_id; + uint32_t queue_id; + uint64_t mqd_addr; + uint64_t wptr_addr; + uint32_t vmid; +}; + enum mes_misc_opcode { MES_MISC_OP_WRITE_REG, MES_MISC_OP_READ_REG, @@ -347,6 +362,12 @@ struct amdgpu_mes_funcs { int (*misc_op)(struct amdgpu_mes *mes, struct mes_misc_op_input *input); + + int (*reset_legacy_queue)(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input); + + int (*reset_hw_queue)(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input); }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) @@ -374,6 +395,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, struct amdgpu_mes_queue_properties *qprops, int *queue_id); int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); +int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id); int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); @@ -381,6 +403,9 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, u64 gpu_addr, u64 seq); +int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned int vmid); uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); int amdgpu_mes_wreg(struct amdgpu_device *adev, @@ -478,4 +503,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) memalloc_noreclaim_restore(mes->saved_flags); mutex_unlock(&mes->mutex_hidden); } + +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 95d676ee207f..1ca9d4ed8063 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -63,8 +63,6 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, - int hub_inst); }; struct amdgpu_mmhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d002b845d8ac..5e3faefc5510 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -51,6 +51,7 @@ struct amdgpu_encoder; struct amdgpu_router; struct amdgpu_hpd; struct edid; +struct drm_edid; #define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base) #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) @@ -326,8 +327,7 @@ struct amdgpu_mode_info { /* FMT dithering */ struct drm_property *dither_property; /* hardcoded DFP edid from BIOS */ - struct edid *bios_hardcoded_edid; - int bios_hardcoded_edid_size; + const struct drm_edid *bios_hardcoded_edid; /* firmware flags */ u32 firmware_flags; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index bc42ccbde659..d7e27957013f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -90,6 +90,12 @@ struct amdgpu_bo_va { bool cleared; bool is_xgmi; + + /* + * protected by vm reservation lock + * if non-zero, cannot unmap from GPU because user queues may still access it + */ + unsigned int queue_refcount; }; struct amdgpu_bo { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d0307c55da50..61a2f386d9fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1223,11 +1223,11 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; amdgpu_ras_error_statistic_de_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->de_count); + &err_info->mcm_info, err_info->de_count); amdgpu_ras_error_statistic_ce_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->ce_count); + &err_info->mcm_info, err_info->ce_count); amdgpu_ras_error_statistic_ue_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->ue_count); + &err_info->mcm_info, err_info->ue_count); } } else { /* for legacy asic path which doesn't has error source info */ @@ -2153,7 +2153,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * /* gpu reset is fallback for failed and default cases. * For RMA case, amdgpu_umc_poison_handler will handle gpu reset. */ - if (poison_stat && !con->is_rma) { + if (poison_stat && !amdgpu_ras_is_rma(adev)) { event_id = amdgpu_ras_acquire_event_id(adev, type); RAS_EVENT_LOG(adev, event_id, "GPU reset for %s RAS poison consumption is issued!\n", @@ -2881,9 +2881,6 @@ static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log) { mutex_init(&ecc_log->lock); - /* Set any value as siphash key */ - memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key)); - INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL); ecc_log->de_queried_count = 0; ecc_log->prev_de_queried_count = 0; @@ -2948,7 +2945,7 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work) amdgpu_ras_error_data_fini(&err_data); - if (err_cnt && con->is_rma) + if (err_cnt && amdgpu_ras_is_rma(adev)) amdgpu_ras_reset_gpu(adev); amdgpu_ras_schedule_retirement_dwork(con, @@ -3049,7 +3046,7 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, } /* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */ - if (reset_flags && !con->is_rma) { + if (reset_flags && !amdgpu_ras_is_rma(adev)) { if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) @@ -3195,7 +3192,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) * This calling fails when is_rma is true or * ret != 0. */ - if (con->is_rma || ret) + if (amdgpu_ras_is_rma(adev) || ret) goto free; if (con->eeprom_control.ras_num_recs) { @@ -3244,7 +3241,7 @@ out: * Except error threshold exceeding case, other failure cases in this * function would not fail amdgpu driver init. */ - if (!con->is_rma) + if (!amdgpu_ras_is_rma(adev)) ret = 0; else ret = -EINVAL; @@ -4287,7 +4284,7 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); /* mode1 is the only selection for RMA status */ - if (ras->is_rma) { + if (amdgpu_ras_is_rma(adev)) { ras->gpu_reset_flags = 0; ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; } @@ -4611,8 +4608,6 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d if (!err_node) return NULL; - INIT_LIST_HEAD(&err_node->err_info.err_addr_list); - memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); err_data->err_list_count++; @@ -4622,21 +4617,9 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d return &err_node->err_info; } -void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr) -{ - /* This function will be retired. */ - return; -} - -void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr) -{ - list_del(&mca_err_addr->node); - kfree(mca_err_addr); -} - int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; @@ -4650,9 +4633,6 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, if (!err_info) return -EINVAL; - if (err_addr && err_addr->err_status) - amdgpu_ras_add_mca_err_addr(err_info, err_addr); - err_info->ue_count += count; err_data->ue_count += count; @@ -4660,8 +4640,8 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, } int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; @@ -4682,8 +4662,8 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, } int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; @@ -4697,9 +4677,6 @@ int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, if (!err_info) return -EINVAL; - if (err_addr && err_addr->err_status) - amdgpu_ras_add_mca_err_addr(err_info, err_addr); - err_info->de_count += count; err_data->de_count += count; @@ -4771,6 +4748,16 @@ static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev, dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n", socket_id, aid_id, hbm_id, fw_status); + + if (AMDGPU_RAS_GPU_ERR_DATA_ABORT(boot_error)) + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n", + socket_id, aid_id, fw_status); + + if (AMDGPU_RAS_GPU_ERR_UNKNOWN(boot_error)) + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, unknown boot time errors\n", + socket_id, aid_id, fw_status); } static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev, @@ -4837,3 +4824,13 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, va_end(args); } + +bool amdgpu_ras_is_rma(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return false; + + return con->is_rma; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index dcf1f3dbb5c4..669720a9c60a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -28,7 +28,6 @@ #include #include #include -#include #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" #include "amdgpu_smuio.h" @@ -47,6 +46,8 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8) #define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11) #define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13) +#define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x) AMDGPU_GET_REG_FIELD(x, 29, 29) +#define AMDGPU_RAS_GPU_ERR_UNKNOWN(x) AMDGPU_GET_REG_FIELD(x, 30, 30) #define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100 #define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA @@ -476,16 +477,15 @@ struct ras_err_pages { }; struct ras_ecc_err { - u64 hash_index; uint64_t status; uint64_t ipid; uint64_t addr; + uint64_t pa_pfn; struct ras_err_pages err_pages; }; struct ras_ecc_log_info { struct mutex lock; - siphash_key_t ecc_key; struct radix_tree_root de_page_tree; uint64_t de_queried_count; uint64_t prev_de_queried_count; @@ -572,19 +572,11 @@ struct ras_fs_data { char debugfs_name[32]; }; -struct ras_err_addr { - struct list_head node; - uint64_t err_status; - uint64_t err_ipid; - uint64_t err_addr; -}; - struct ras_err_info { struct amdgpu_smuio_mcm_config_info mcm_info; u64 ce_count; u64 ue_count; u64 de_count; - struct list_head err_addr_list; }; struct ras_err_node { @@ -941,14 +933,14 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, int amdgpu_ras_error_data_init(struct ras_err_data *err_data); void amdgpu_ras_error_data_fini(struct ras_err_data *err_data); int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances); int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, const struct aca_info *aca_info, void *data); @@ -957,12 +949,6 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, struct aca_handle *handle, char *buf, void *data); -void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, - struct ras_err_addr *err_addr); - -void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, - struct ras_err_addr *mca_err_addr); - void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); @@ -982,4 +968,5 @@ __printf(3, 4) void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, const char *fmt, ...); +bool amdgpu_ras_is_rma(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 4ae581f3fcb5..1cb920abc2fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -136,6 +136,12 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma return queue_work(domain->wq, work); } +static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) +{ + lockdep_assert_held(&domain->sem); + return rwsem_is_contended(&domain->sem); +} + void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index e6344a6b0a9f..690976665cf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -144,8 +144,10 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) /* We pad to match fetch size */ count = ring->funcs->align_mask + 1 - (ring->wptr & ring->funcs->align_mask); - count %= ring->funcs->align_mask + 1; - ring->funcs->insert_nop(ring, count); + count &= ring->funcs->align_mask; + + if (count != 0) + ring->funcs->insert_nop(ring, count); mb(); amdgpu_ring_set_wptr(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 582053f1cd56..f93f51002201 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -235,6 +235,8 @@ struct amdgpu_ring_funcs { void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset); void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); + int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); + void (*emit_cleaner_shader)(struct amdgpu_ring *ring); }; struct amdgpu_ring { @@ -334,6 +336,7 @@ struct amdgpu_ring { #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) #define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o))) +#define amdgpu_ring_reset(r, v) (r)->funcs->reset((r), (v)) unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index d234b7ccfaaf..1c66da1c3fb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -410,7 +410,7 @@ void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) struct amdgpu_ring_mux *mux = &adev->gfx.muxer; WARN_ON(!ring->is_sw_ring); - if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) + if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) return; amdgpu_ring_mux_end_ib(mux, ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index d3706a484870..087ce0f6fa07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -115,6 +115,7 @@ struct amdgpu_sdma { bool has_page_queue; struct ras_common_if *ras_if; struct amdgpu_sdma_ras *ras; + uint32_t *ip_dump; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 2f84bdb8c594..bb7b9b2eaac1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -196,7 +196,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, amdgpu_umc_handle_bad_pages(adev, ras_error_status); if ((err_data->ue_count || err_data->de_count) && - (reset || (con && con->is_rma))) { + (reset || amdgpu_ras_is_rma(adev))) { con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); } @@ -204,55 +204,6 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, return AMDGPU_RAS_SUCCESS; } -int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - uint32_t reset, uint32_t timeout_ms) -{ - struct ras_err_data err_data; - struct ras_common_if head = { - .block = AMDGPU_RAS_BLOCK__UMC, - }; - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - uint32_t timeout = timeout_ms; - - memset(&err_data, 0, sizeof(err_data)); - amdgpu_ras_error_data_init(&err_data); - - do { - - amdgpu_umc_handle_bad_pages(adev, &err_data); - - if (timeout && !err_data.de_count) { - msleep(1); - timeout--; - } - - } while (timeout && !err_data.de_count); - - if (!timeout) - dev_warn(adev->dev, "Can't find bad pages\n"); - - if (err_data.de_count) - dev_info(adev->dev, "%ld new deferred hardware errors detected\n", err_data.de_count); - - if (obj) { - obj->err_data.ue_count += err_data.ue_count; - obj->err_data.ce_count += err_data.ce_count; - obj->err_data.de_count += err_data.de_count; - } - - amdgpu_ras_error_data_fini(&err_data); - - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - - if (reset || (err_data.err_addr_cnt && con && con->is_rma)) { - con->gpu_reset_flags |= reset; - amdgpu_ras_reset_gpu(adev); - } - - return 0; -} - int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint16_t pasid, pasid_notify pasid_fn, void *data, uint32_t reset) @@ -472,43 +423,6 @@ int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, return 0; } -static int amdgpu_umc_uint64_cmp(const void *a, const void *b) -{ - uint64_t *addr_a = (uint64_t *)a; - uint64_t *addr_b = (uint64_t *)b; - - if (*addr_a > *addr_b) - return 1; - else if (*addr_a < *addr_b) - return -1; - else - return 0; -} - -/* Use string hash to avoid logging the same bad pages repeatedly */ -int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, - uint64_t *pfns, int len, uint64_t *val) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - char buf[MAX_UMC_HASH_STRING_SIZE] = {0}; - int offset = 0, i = 0; - uint64_t hash_val; - - if (!pfns || !len) - return -EINVAL; - - sort(pfns, len, sizeof(uint64_t), amdgpu_umc_uint64_cmp, NULL); - - for (i = 0; i < len; i++) - offset += snprintf(&buf[offset], sizeof(buf) - offset, "%llx", pfns[i]); - - hash_val = siphash(buf, offset, &con->umc_ecc_log.ecc_key); - - *val = hash_val; - - return 0; -} - int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err) { @@ -519,18 +433,10 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, ecc_log = &con->umc_ecc_log; mutex_lock(&ecc_log->lock); - ret = radix_tree_insert(ecc_tree, ecc_err->hash_index, ecc_err); - if (!ret) { - struct ras_err_pages *err_pages = &ecc_err->err_pages; - int i; - - /* Reserve memory */ - for (i = 0; i < err_pages->count; i++) - amdgpu_ras_reserve_page(adev, err_pages->pfn[i]); - + ret = radix_tree_insert(ecc_tree, ecc_err->pa_pfn, ecc_err); + if (!ret) radix_tree_tag_set(ecc_tree, - ecc_err->hash_index, UMC_ECC_NEW_DETECTED_TAG); - } + ecc_err->pa_pfn, UMC_ECC_NEW_DETECTED_TAG); mutex_unlock(&ecc_log->lock); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 5f50c69c3cec..ce4179db2a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -127,13 +127,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data); -int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - uint32_t reset, uint32_t timeout_ms); - int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, uint64_t status, uint64_t ipid, uint64_t addr); -int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, - uint64_t *pfns, int len, uint64_t *val); int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index fbc2852278e1..6162582d0aa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -587,7 +587,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) break; } - r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, "%s", fw_name); if (r) { release_firmware(adev->umsch_mm.fw); adev->umsch_mm.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 07d930339b07..31fd30dcd593 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -260,7 +260,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->uvd.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->uvd.fw, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n", fw_name); @@ -1088,7 +1088,6 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, int r; job->vm = NULL; - ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); if (ib->length_dw % 16) { DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 968ca2c84ef7..74fdbf71d95b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -158,7 +158,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->vce.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->vce.fw, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", fw_name); @@ -749,7 +749,6 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, int i, r = 0; job->vm = NULL; - ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); for (idx = 0; idx < ib->length_dw;) { uint32_t len = amdgpu_ib_get_value(ib, idx); @@ -1044,7 +1043,6 @@ out: if (!r) { /* No error, free all destroyed handle slots */ tmp = destroyed; - amdgpu_ib_free(p->adev, ib, NULL); } else { /* Error during parsing, free all allocated handle slots */ tmp = allocated; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index c87d68d4be53..2a1f3dbb14d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -330,6 +330,9 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; bool using_unified_queue; + + /* IP reg dump */ + uint32_t *ip_dump; }; struct amdgpu_fw_shared_rb_ptrs_struct { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index b287a82e6177..b6397d3229e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -33,6 +33,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" #include "amdgpu_reset.h" +#include "amdgpu_dpm.h" #include "vi.h" #include "soc15.h" #include "nv.h" @@ -849,6 +850,13 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad return mode; } +void amdgpu_virt_pre_reset(struct amdgpu_device *adev) +{ + /* stop the data exchange thread */ + amdgpu_virt_fini_data_exchange(adev); + amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR); +} + void amdgpu_virt_post_reset(struct amdgpu_device *adev) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b42a8854dca0..b650a2032c42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -376,6 +376,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id); bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id); +void amdgpu_virt_pre_reset(struct amdgpu_device *adev); void amdgpu_virt_post_reset(struct amdgpu_device *adev); bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 6415d0d039e1..e5f508d34ed8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -549,7 +549,7 @@ static int amdgpu_vkms_sw_fini(void *handle) adev->mode_info.mode_config_initialized = false; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); kfree(adev->amdgpu_vkms_output); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 52e6a0b3f0c8..e20d19ae01b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -681,6 +681,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; + if (adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader && + job->enforce_isolation) + ring->funcs->emit_cleaner_shader(ring); + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; @@ -742,6 +747,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring); } + amdgpu_ring_ib_end(ring); return 0; } @@ -2397,6 +2403,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) { + struct amdgpu_ip_block *ip_block; struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; int r, i; @@ -2428,6 +2435,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); + /* use CPU for page table update if SDMA is unavailable */ + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA); + if (!ip_block || ip_block->status.valid == false) + vm->use_cpu_for_update = true; + DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && @@ -2768,7 +2780,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault) { bool is_compute_context = false; @@ -2794,7 +2806,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, write_fault)) { + node_id, addr, ts, write_fault)) { amdgpu_bo_unref(&root); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 046949c4b695..d12d66dca8e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -558,7 +558,7 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 821ba2309dec..7de449fae1e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1389,10 +1389,10 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { case ACA_ERROR_TYPE_UE: - amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL); + amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL); break; case ACA_ERROR_TYPE_CE: - amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL); + amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index fb2b394bb9c5..6e9eeaeb3de1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -213,7 +213,7 @@ struct amd_sriov_msg_pf2vf_info { uint32_t gpu_capacity; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; -}; +} __packed; struct amd_sriov_msg_vf2pf_info_header { /* the total structure size in byte */ @@ -273,7 +273,7 @@ struct amd_sriov_msg_vf2pf_info { uint32_t mes_info_size; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; -}; +} __packed; /* mailbox message send from guest to host */ enum amd_sriov_mailbox_request_message { diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 228fd4dd32f1..26e2188101e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -75,6 +75,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, uint32_t inst_mask; ring->xcp_id = AMDGPU_XCP_NO_PARTITION; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id; if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) return; @@ -103,6 +105,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { ring->xcp_id = xcp_id; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 650ec95bb40a..a51f3414b65d 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -2064,27 +2064,25 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder) case LCD_FAKE_EDID_PATCH_RECORD_TYPE: fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { - struct edid *edid; - int edid_size = - max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength); - edid = kmalloc(edid_size, GFP_KERNEL); - if (edid) { - memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0], - fake_edid_record->ucFakeEDIDLength); + const struct drm_edid *edid; + int edid_size; - if (drm_edid_is_valid(edid)) { - adev->mode_info.bios_hardcoded_edid = edid; - adev->mode_info.bios_hardcoded_edid_size = edid_size; - } else - kfree(edid); - } + if (fake_edid_record->ucFakeEDIDLength == 128) + edid_size = fake_edid_record->ucFakeEDIDLength; + else + edid_size = fake_edid_record->ucFakeEDIDLength * 128; + edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size); + if (drm_edid_valid(edid)) + adev->mode_info.bios_hardcoded_edid = edid; + else + drm_edid_free(edid); + record += struct_size(fake_edid_record, + ucFakeEDIDString, + edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; } - record += fake_edid_record->ucFakeEDIDLength ? - struct_size(fake_edid_record, - ucFakeEDIDString, - fake_edid_record->ucFakeEDIDLength) : - /* empty fake edid record must be 3 bytes long */ - sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 55982c0064b5..06088d52d81c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -364,6 +364,7 @@ * 1 - Stream * 2 - Bypass */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard * 1 - send low 32bit data diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index dddb5fe16f2c..742adbc460c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2846,7 +2846,7 @@ static int dce_v10_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 11780e4d7e9f..8d46ebadfa46 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2973,7 +2973,7 @@ static int dce_v11_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 05c0df97f01d..f08dc6a3886f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2745,7 +2745,7 @@ static int dce_v6_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index dc73e301d937..a6a3adf2ae13 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2766,7 +2766,7 @@ static int dce_v8_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index e444e621ddaa..ca983a014ba0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4649,7 +4649,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -4662,7 +4662,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -4675,7 +4675,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { @@ -4741,6 +4741,13 @@ static int gfx_v10_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_10_1__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -5213,26 +5220,74 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) } +static u32 gfx_v10_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1); + default: + return 0; + } +} + +static u32 gfx_v10_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp; + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v10_0_init_csb(struct amdgpu_device *adev) @@ -7369,6 +7424,7 @@ static int gfx_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); /* WA added for Vangogh asic fixing the SMU suspend failure * It needs to set power gating again during gfxoff control @@ -7679,6 +7735,10 @@ static int gfx_v10_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + return 0; } @@ -9074,12 +9134,39 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -9088,17 +9175,75 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v10_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -9122,8 +9267,8 @@ static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; @@ -9150,6 +9295,15 @@ static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v10_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -9244,6 +9398,24 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static void gfx_v10_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -9435,7 +9607,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .test_ring = gfx_v10_0_ring_test_ring, .test_ib = gfx_v10_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v10_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v10_0_ring_emit_sb, .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, @@ -9476,11 +9648,12 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .test_ring = gfx_v10_0_ring_test_ring, .test_ib = gfx_v10_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v10_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, }; @@ -9536,6 +9709,11 @@ static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = { .process = gfx_v10_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v10_0_bad_op_irq_funcs = { + .set = gfx_v10_0_set_bad_op_fault_state, + .process = gfx_v10_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = { .set = gfx_v10_0_set_priv_inst_fault_state, .process = gfx_v10_0_priv_inst_irq, @@ -9557,6 +9735,9 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v10_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index dcef39907449..ee8604722467 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -481,6 +481,24 @@ static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, amdgpu_ring_write(ring, inv); /* poll interval */ } +static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -1484,7 +1502,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -1497,7 +1515,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -1510,7 +1528,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { @@ -1569,6 +1587,13 @@ static int gfx_v11_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, @@ -1953,26 +1978,74 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev) gfx_v11_0_init_gds_vmid(adev); } +static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); + default: + return 0; + } +} + +static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp; + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v11_0_init_csb(struct amdgpu_device *adev) @@ -4598,6 +4671,7 @@ static int gfx_v11_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -4668,8 +4742,8 @@ static int gfx_v11_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, - int req) +int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req) { u32 i, tmp, val; @@ -4707,6 +4781,8 @@ static int gfx_v11_0_soft_reset(void *handle) int r, i, j, k; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gfx_v11_0_set_safe_mode(adev, 0); + tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); @@ -4714,8 +4790,6 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_set_safe_mode(adev, 0); - mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { @@ -4740,8 +4814,10 @@ static int gfx_v11_0_soft_reset(void *handle) mutex_unlock(&adev->srbm_mutex); /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ - r = gfx_v11_0_request_gfx_index_mutex(adev, 1); + mutex_lock(&adev->gfx.reset_sem_mutex); + r = gfx_v11_0_request_gfx_index_mutex(adev, true); if (r) { + mutex_unlock(&adev->gfx.reset_sem_mutex); DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); return r; } @@ -4755,7 +4831,8 @@ static int gfx_v11_0_soft_reset(void *handle) RREG32_SOC15(GC, 0, regCP_VMID_RESET); /* release the gfx mutex */ - r = gfx_v11_0_request_gfx_index_mutex(adev, 0); + r = gfx_v11_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); if (r) { DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); return r; @@ -4954,6 +5031,9 @@ static int gfx_v11_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; return 0; } @@ -6201,15 +6281,42 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6218,17 +6325,75 @@ static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } -static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) +static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + +static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6252,8 +6417,8 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; @@ -6281,6 +6446,15 @@ static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v11_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -6556,7 +6730,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, .test_ring = gfx_v11_0_ring_test_ring, .test_ib = gfx_v11_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v11_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, @@ -6598,11 +6772,12 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, .test_ring = gfx_v11_0_ring_test_ring, .test_ib = gfx_v11_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v11_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, }; @@ -6658,6 +6833,11 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { .process = gfx_v11_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { + .set = gfx_v11_0_set_bad_op_fault_state, + .process = gfx_v11_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { .set = gfx_v11_0_set_priv_inst_fault_state, .process = gfx_v11_0_priv_inst_irq, @@ -6675,6 +6855,9 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h index 10cfc29c27c9..157a5c812259 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h @@ -26,4 +26,7 @@ extern const struct amdgpu_ip_block_version gfx_v11_0_ip_block; +int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index 9cd221ed240c..999bb3cc88b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -97,7 +97,7 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev, ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; } - if (con && !con->is_rma) + if (con && !amdgpu_ras_is_rma(adev)) amdgpu_ras_reset_gpu(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 2c611b8577a7..54059cbcfc08 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1281,7 +1281,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -1294,7 +1294,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -1307,7 +1307,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { @@ -1355,6 +1355,13 @@ static int gfx_v12_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, @@ -1686,26 +1693,68 @@ static void gfx_v12_0_constants_init(struct amdgpu_device *adev) gfx_v12_0_init_compute_vmid(adev); } -static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, - bool enable) +static u32 gfx_v12_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) { - u32 tmp; + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); + default: + return 0; + } +} + +static u32 gfx_v12_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + default: + return 0; + } +} + +static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v12_0_init_csb(struct amdgpu_device *adev) @@ -3005,7 +3054,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); @@ -3553,6 +3602,7 @@ static int gfx_v12_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -3672,6 +3722,10 @@ static int gfx_v12_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + return 0; } @@ -4747,15 +4801,42 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -4764,17 +4845,75 @@ static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } -static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) +static int gfx_v12_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + +static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -4798,8 +4937,8 @@ static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; @@ -4827,6 +4966,15 @@ static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v12_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -4859,6 +5007,24 @@ static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static void gfx_v12_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -5040,7 +5206,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush, .test_ring = gfx_v12_0_ring_test_ring, .test_ib = gfx_v12_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v12_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl, .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec, @@ -5078,11 +5244,12 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush, .test_ring = gfx_v12_0_ring_test_ring, .test_ib = gfx_v12_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v12_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, }; @@ -5138,6 +5305,11 @@ static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = { .process = gfx_v12_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v12_0_bad_op_irq_funcs = { + .set = gfx_v12_0_set_bad_op_fault_state, + .process = gfx_v12_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = { .set = gfx_v12_0_set_priv_inst_fault_state, .process = gfx_v12_0_priv_inst_irq, @@ -5151,6 +5323,9 @@ static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v12_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d84589137df9..f146806c4633 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2114,6 +2114,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; + /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. */ @@ -2133,7 +2135,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + EVENT_INDEX(5) | + (exec ? EOP_EXEC : 0))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -4921,6 +4924,76 @@ static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } +static void gfx_v7_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static void gfx_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v7_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + +static int gfx_v7_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v7_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) + return -ENOMEM; + gfx_v7_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v7_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); + gfx_v7_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); + + return amdgpu_ring_test_ring(ring); +} + static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, @@ -4972,6 +5045,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .emit_wreg = gfx_v7_0_ring_emit_wreg, .soft_recovery = gfx_v7_0_ring_soft_recovery, .emit_mem_sync = gfx_v7_0_emit_mem_sync, + .reset = gfx_v7_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { @@ -5002,6 +5076,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v7_0_ring_emit_wreg, + .soft_recovery = gfx_v7_0_ring_soft_recovery, .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b4658c7db0e1..bc8295812cc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6149,6 +6149,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. @@ -6172,7 +6173,8 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, EOP_TC_ACTION_EN | EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + EVENT_INDEX(5) | + (exec ? EOP_EXEC : 0))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -6380,6 +6382,34 @@ static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, amdgpu_ring_write(ring, val); } +static void gfx_v8_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static void gfx_v8_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v8_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) { struct amdgpu_device *adev = ring->adev; @@ -6856,6 +6886,48 @@ static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } +static int gfx_v8_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v8_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) + return -ENOMEM; + gfx_v8_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v8_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); + gfx_v8_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); + + return amdgpu_ring_test_ring(ring); +} + static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -6923,6 +6995,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync, + .reset = gfx_v8_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { @@ -6955,6 +7028,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v8_0_ring_emit_wreg, + .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute, .emit_wave_limit = gfx_v8_0_emit_wave_limit, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 2929c8972ea7..21089aadbb7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -50,6 +50,7 @@ #include "amdgpu_ring_mux.h" #include "gfx_v9_4.h" #include "gfx_v9_0.h" +#include "gfx_v9_0_cleaner_shader.h" #include "gfx_v9_4_2.h" #include "asic_reg/pwr/pwr_10_0_offset.h" @@ -893,10 +894,18 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid); +static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { + struct amdgpu_device *adev = kiq_ring->adev; + u64 shader_mc_addr; + + /* Cleaner shader MC address */ + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | @@ -906,8 +915,8 @@ static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ } @@ -1004,12 +1013,47 @@ static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); } + +static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid) +{ + struct amdgpu_device *adev = kiq_ring->adev; + unsigned i; + + /* enter save mode */ + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0); + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + } else { + dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type); + } + + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + /* exit safe mode */ + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); +} + static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_0_kiq_set_resources, .kiq_map_queues = gfx_v9_0_kiq_map_queues, .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, .kiq_query_status = gfx_v9_0_kiq_query_status, .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, + .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, @@ -2129,7 +2173,7 @@ static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -2142,7 +2186,7 @@ static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -2174,6 +2218,12 @@ static int gfx_v9_0_sw_init(void *handle) break; } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + default: + adev->gfx.enable_cleaner_shader = false; + break; + } + adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -2182,6 +2232,13 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -2329,6 +2386,10 @@ static int gfx_v9_0_sw_init(void *handle) gfx_v9_0_alloc_ip_dump(adev); + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + return r; + return 0; } @@ -2364,6 +2425,8 @@ static int gfx_v9_0_sw_fini(void *handle) } gfx_v9_0_free_microcode(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); + kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); @@ -2634,7 +2697,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); - if(adev->gfx.num_gfx_rings) + if (adev->gfx.num_gfx_rings) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); @@ -3735,7 +3798,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -3747,8 +3810,8 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) */ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; - if (!tmp_mqd->cp_hqd_pq_control || - (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + if (!restore && (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend))) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -3812,7 +3875,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_0_kcq_init_queue(ring); + r = gfx_v9_0_kcq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -3908,6 +3971,9 @@ static int gfx_v9_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, + adev->gfx.cleaner_shader_ptr); + if (!amdgpu_sriov_vf(adev)) gfx_v9_0_init_golden_registers(adev); @@ -3937,6 +4003,7 @@ static int gfx_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) @@ -4747,6 +4814,10 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + r = gfx_v9_0_ecc_late_init(handle); if (r) return r; @@ -5858,7 +5929,9 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); WREG32_SOC15(GC, 0, mmSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, @@ -5929,17 +6002,95 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, } } +static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + + return 0; +} + +static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6121,6 +6272,15 @@ static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream\n"); + gfx_v9_0_fault(adev, entry); + return 0; +} + static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -7001,6 +7161,153 @@ static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + +static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v9_0_ring_emit_wreg(kiq_ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) + return -ENOMEM; + gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v9_0_ring_emit_reg_wait(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); + gfx_v9_0_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); + + return amdgpu_ring_test_ring(ring); +} + +static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int i, r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + /* make sure dequeue is complete*/ + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)){ + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_0_kcq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + DRM_ERROR("fail to remap queue\n"); + return r; + } + return amdgpu_ring_test_ring(ring); +} + static void gfx_v9_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -7083,6 +7390,13 @@ static void gfx_v9_ip_dump(void *handle) } +static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) +{ + /* Emit the cleaner shader */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -7132,7 +7446,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2 + /* SWITCH_BUFFER */ - 7, /* gfx_v9_0_emit_mem_sync */ + 7 + /* gfx_v9_0_emit_mem_sync */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7141,7 +7456,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, @@ -7153,6 +7468,10 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, + .reset = gfx_v9_0_reset_kgq, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { @@ -7185,7 +7504,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2 + /* SWITCH_BUFFER */ - 7, /* gfx_v9_0_emit_mem_sync */ + 7 + /* gfx_v9_0_emit_mem_sync */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7195,7 +7515,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, - .insert_nop = amdgpu_sw_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, @@ -7209,6 +7529,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .patch_cntl = gfx_v9_0_ring_patch_cntl, .patch_de = gfx_v9_0_ring_patch_de_meta, .patch_ce = gfx_v9_0_ring_patch_ce_meta, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -7229,7 +7552,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_0_emit_mem_sync */ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ - 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ + 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7239,13 +7563,18 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_wave_limit = gfx_v9_0_emit_wave_limit, + .reset = gfx_v9_0_reset_kcq, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { @@ -7303,6 +7632,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { .process = gfx_v9_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = { + .set = gfx_v9_0_set_bad_op_fault_state, + .process = gfx_v9_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { .set = gfx_v9_0_set_priv_inst_fault_state, .process = gfx_v9_0_priv_inst_irq, @@ -7322,6 +7656,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h new file mode 100644 index 000000000000..36c0292b5110 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +/* Define the cleaner shader gfx_9_0 */ +static const u32 __maybe_unused gfx_9_0_cleaner_shader_hex[] = { + /* Add the cleaner shader code here */ +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 20ea6cb01edf..2067f26d3a9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -37,6 +37,7 @@ #include "gc/gc_9_4_3_sh_mask.h" #include "gfx_v9_4_3.h" +#include "gfx_v9_4_3_cleaner_shader.h" #include "amdgpu_xcp.h" #include "amdgpu_aca.h" @@ -63,6 +64,98 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); #define NORMALIZE_XCC_REG_OFFSET(offset) \ (offset & 0xFFFF) +static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_DCACHE_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_ICACHE_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQ_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regTCP_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC2_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_COMMAND), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_MESSAGE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_1), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_2), + SOC15_REG_ENTRY_STR(GC, 0, regSMU_RLC_RESPONSE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3) +}; + +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = { + /* compute queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS), +}; + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -71,10 +164,18 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev); static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); +static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { + struct amdgpu_device *adev = kiq_ring->adev; + u64 shader_mc_addr; + + /* Cleaner shader MC address */ + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | @@ -84,8 +185,8 @@ static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ } @@ -182,12 +283,46 @@ static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); } +static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid) +{ + struct amdgpu_device *adev = kiq_ring->adev; + unsigned i; + + /* enter save mode */ + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, xcc_id); + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_COMPUTE_QUEUE_RESET, 0x1); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + } else { + dev_err(adev->dev, "reset queue_type(%d) not supported\n\n", queue_type); + } + + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + /* exit safe mode */ + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); +} + static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_4_3_kiq_set_resources, .kiq_map_queues = gfx_v9_4_3_kiq_map_queues, .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues, .kiq_query_status = gfx_v9_4_3_kiq_query_status, .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs, + .kiq_reset_hw_queue = gfx_v9_4_3_kiq_reset_hw_queue, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, @@ -885,11 +1020,59 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio, NULL); } +static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + uint32_t *ptr, num_xcc, inst; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + ptr = kcalloc(reg_count * num_xcc, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); + adev->gfx.ip_dump_core = NULL; + } else { + adev->gfx.ip_dump_core = ptr; + } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst * num_xcc, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } +} + static int gfx_v9_4_3_sw_init(void *handle) { int i, j, k, r, ring_id, xcc_id, num_xcc; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + adev->gfx.cleaner_shader_ptr = gfx_9_4_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_3_cleaner_shader_hex); + if (adev->gfx.mec_fw_version >= 153) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + default: + adev->gfx.enable_cleaner_shader = false; + break; + } + adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -901,6 +1084,13 @@ static int gfx_v9_4_3_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -976,10 +1166,19 @@ static int gfx_v9_4_3_sw_init(void *handle) return r; - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { r = amdgpu_gfx_sysfs_init(adev); + if (r) + return r; + } - return r; + gfx_v9_4_3_alloc_ip_dump(adev); + + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + return r; + + return 0; } static int gfx_v9_4_3_sw_fini(void *handle) @@ -997,11 +1196,17 @@ static int gfx_v9_4_3_sw_fini(void *handle) amdgpu_gfx_kiq_fini(adev, i); } + amdgpu_gfx_cleaner_shader_sw_fini(adev); + gfx_v9_4_3_mec_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); gfx_v9_4_3_free_microcode(adev); if (!amdgpu_sriov_vf(adev)) amdgpu_gfx_sysfs_fini(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); + + kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); return 0; } @@ -1910,7 +2115,7 @@ static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) return 0; } -static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) +static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, bool restore) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -1922,8 +2127,8 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) */ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; - if (!tmp_mqd->cp_hqd_pq_control || - (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + if (!restore && (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend))) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -2008,7 +2213,7 @@ static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id); + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -2139,6 +2344,9 @@ static int gfx_v9_4_3_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, + adev->gfx.cleaner_shader_ptr); + if (!amdgpu_sriov_vf(adev)) gfx_v9_4_3_init_golden_registers(adev); @@ -2162,6 +2370,7 @@ static int gfx_v9_4_3_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { @@ -2327,6 +2536,10 @@ static int gfx_v9_4_3_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + if (adev->gfx.ras && adev->gfx.ras->enable_watchdog_timer) adev->gfx.ras->enable_watchdog_timer(adev); @@ -2833,6 +3046,21 @@ static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } +static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring, + unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id); + WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id); +} + static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( struct amdgpu_device *adev, int me, int pipe, enum amdgpu_interrupt_state state, int xcc_id) @@ -2886,21 +3114,103 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( } } +static u32 gfx_v9_4_3_get_cpc_int_cntl(struct amdgpu_device *adev, + int xcc_id, int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, enum amdgpu_interrupt_state state) { - int i, num_xcc; + u32 mec_int_cntl_reg, mec_int_cntl; + int i, j, k, num_xcc; num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - for (i = 0; i < num_xcc; i++) + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (j = 0; j < adev->gfx.mec.num_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + /* MECs start at 1 */ + mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k); + + if (mec_int_cntl_reg) { + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? + 1 : 0); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i); + } + } + } + } + break; + default: + break; + } + + return 0; +} + +static int gfx_v9_4_3_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 mec_int_cntl_reg, mec_int_cntl; + int i, j, k, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < num_xcc; i++) { + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (j = 0; j < adev->gfx.mec.num_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + /* MECs start at 1 */ + mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k); + + if (mec_int_cntl_reg) { + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? + 1 : 0); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i); + } + } + } + } break; default: break; @@ -3061,6 +3371,15 @@ static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v9_4_3_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream\n"); + gfx_v9_4_3_fault(adev, entry); + return 0; +} + static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -3147,6 +3466,91 @@ static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int r, i; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + /* make sure dequeue is complete*/ + amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, ring->xcc_id)); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, ring->xcc_id)); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)){ + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + dev_err(adev->dev, "fail to remap queue\n"); + return r; + } + return amdgpu_ring_test_ring(ring); +} + enum amdgpu_gfx_cp_ras_mem_id { AMDGPU_GFX_CP_MEM1 = 1, AMDGPU_GFX_CP_MEM2, @@ -3959,8 +4363,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, /* the caller should make sure initialize value of * err_data->ue_count and err_data->ce_count */ - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); } static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, @@ -4062,6 +4466,151 @@ static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev) amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer); } +static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + +static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k; + uint32_t xcc_id, xcc_offset, inst_offset; + uint32_t num_xcc, reg, num_inst; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + + if (!adev->gfx.ip_dump_core) + return; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + drm_printf(p, "Number of Instances:%d\n", num_xcc); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count; + drm_printf(p, "\nInstance id:%d\n", xcc_id); + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_9_4_3[i].reg_name, + adev->gfx.ip_dump_core[xcc_offset + i]); + } + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + drm_printf(p, "\nnum_xcc: %d num_mec: %d num_pipe: %d num_queue: %d\n", + num_xcc, + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count * num_inst; + inst_offset = 0; + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, + "\nxcc:%d mec:%d, pipe:%d, queue:%d\n", + xcc_id, i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, + "%-50s \t 0x%08x\n", + gc_cp_reg_list_9_4_3[reg].reg_name, + adev->gfx.ip_dump_compute_queues + [xcc_offset + inst_offset + + reg]); + } + inst_offset += reg_count; + } + } + } + } +} + +static void gfx_v9_4_3_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k; + uint32_t num_xcc, reg, num_inst; + uint32_t xcc_id, xcc_offset, inst_offset; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + + if (!adev->gfx.ip_dump_core) + return; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + amdgpu_gfx_off_ctrl(adev, false); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count; + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump_core[xcc_offset + i] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i], + GET_INST(GC, xcc_id))); + } + amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count * num_inst; + inst_offset = 0; + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + soc15_grbm_select(adev, 1 + i, j, k, 0, + GET_INST(GC, xcc_id)); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_compute_queues + [xcc_offset + + inst_offset + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST( + gc_cp_reg_list_9_4_3[reg], + GET_INST(GC, xcc_id))); + } + inst_offset += reg_count; + } + } + } + } + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); +} + +static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring) +{ + /* Emit the cleaner shader */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ +} + static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .name = "gfx_v9_4_3", .early_init = gfx_v9_4_3_early_init, @@ -4078,8 +4627,8 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .set_clockgating_state = gfx_v9_4_3_set_clockgating_state, .set_powergating_state = gfx_v9_4_3_set_powergating_state, .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = gfx_v9_4_3_ip_dump, + .print_ip_state = gfx_v9_4_3_ip_print, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { @@ -4101,7 +4650,8 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_4_3_emit_mem_sync */ 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */ - 15, /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + 15 + /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + 2, /* gfx_v9_4_3_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */ .emit_ib = gfx_v9_4_3_ring_emit_ib_compute, .emit_fence = gfx_v9_4_3_ring_emit_fence, @@ -4111,13 +4661,18 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush, .test_ring = gfx_v9_4_3_ring_test_ring, .test_ib = gfx_v9_4_3_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_4_3_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v9_4_3_ring_emit_wreg, .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_4_3_ring_soft_recovery, .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, + .reset = gfx_v9_4_3_reset_kcq, + .emit_cleaner_shader = gfx_v9_4_3_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { @@ -4172,6 +4727,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = { .process = gfx_v9_4_3_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v9_4_3_bad_op_irq_funcs = { + .set = gfx_v9_4_3_set_bad_op_fault_state, + .process = gfx_v9_4_3_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = { .set = gfx_v9_4_3_set_priv_inst_fault_state, .process = gfx_v9_4_3_priv_inst_irq, @@ -4185,6 +4745,9 @@ static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v9_4_3_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm new file mode 100644 index 000000000000..d5325ef80ab0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// MI300 : Clear SGPRs, VGPRs and LDS +// Uses two kernels launched separately: +// 1. Clean VGPRs, LDS, and lower SGPRs +// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU +// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD +// Waves in the workgroup share the 64KB of LDS +// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383 +// Each wave clears 128 VGPRs, so all 512 in the SIMD +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. +// 2. Clean remaining SGPRs +// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU +// Waves are allocating 96 SGPRs +// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223. +// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799 +// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER +// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command +// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799 + +shader main + asic(MI300) + type(CS) + wave_size(64) +// Note: original source code from SQ team + +// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks) + + s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3 + s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1) + S_BARRIER + + s_movk_i32 m0, 0x0000 + s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance) + // + // CLEAR VGPRs + // + s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing +label_0005: + v_mov_b32 v0, 0 + v_mov_b32 v1, 0 + v_mov_b32 v2, 0 + v_mov_b32 v3, 0 + v_mov_b32 v4, 0 + v_mov_b32 v5, 0 + v_mov_b32 v6, 0 + v_mov_b32 v7, 0 + s_sub_u32 s2, s2, 8 + s_set_gpr_idx_idx s2 + s_cbranch_scc0 label_0005 + s_set_gpr_idx_off + + // + // + + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iteraions + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + // + // CLEAR SGPRs + // +label_clean_sgpr_1: + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0 //clear vcc + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 +s_endpgm + +label_0023: + + s_sethalt 1 + + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop1: + + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop1 + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0xee //clear vcc + +s_endpgm +end + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h new file mode 100644 index 000000000000..69aa567c6c1d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Define the cleaner shader gfx_9_4_3 */ +static const u32 gfx_9_4_3_cleaner_shader_hex[] = { + 0xbf068100, 0xbf84003b, + 0xbf8a0000, 0xb07c0000, + 0xbe8200ff, 0x00000078, + 0xbf110802, 0x7e000280, + 0x7e020280, 0x7e040280, + 0x7e060280, 0x7e080280, + 0x7e0a0280, 0x7e0c0280, + 0x7e0e0280, 0x80828802, + 0xbe803202, 0xbf84fff5, + 0xbf9c0000, 0xbe8200ff, + 0x80000000, 0x86020102, + 0xbf840011, 0xbefe00c1, + 0xbeff00c1, 0xd28c0001, + 0x0001007f, 0xd28d0001, + 0x0002027e, 0x10020288, + 0xbe8200bf, 0xbefc00c1, + 0xd89c2000, 0x00020201, + 0xd89c6040, 0x00040401, + 0x320202ff, 0x00000400, + 0x80828102, 0xbf84fff8, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea0180, + 0xbeec0180, 0xbeee0180, + 0xbef00180, 0xbef20180, + 0xbef40180, 0xbef60180, + 0xbef80180, 0xbefa0180, + 0xbf810000, 0xbf8d0001, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea01ff, + 0x000000ee, 0xbf810000, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index d200310d1731..0e3ddea7b8e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -443,23 +443,6 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev) mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } -static bool gfxhub_v1_0_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id) -{ - u32 status = 0; - struct amdgpu_vmhub *hub; - - if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) - return false; - - hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - status = RREG32(hub->vm_l2_pro_fault_status); - /* reset page fault status */ - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - - return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); -} - const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs, @@ -468,5 +451,4 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, .init = gfxhub_v1_0_init, .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, - .query_utcl2_poison_status = gfxhub_v1_0_query_utcl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 72109abe7c86..ed8e130c7d19 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -622,22 +622,6 @@ static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) return 0; } -static bool gfxhub_v1_2_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id) -{ - u32 fed, status; - - status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVM_L2_PROTECTION_FAULT_STATUS); - fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); - if (!amdgpu_sriov_vf(adev)) { - /* clear page fault status and address */ - WREG32_P(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), - regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); - } - - return fed; -} - const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs, @@ -646,7 +630,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default, .init = gfxhub_v1_2_init, .get_xgmi_info = gfxhub_v1_2_get_xgmi_info, - .query_utcl2_poison_status = gfxhub_v1_2_query_utcl2_poison_status, }; static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f0ceab3ce5bf..9784a2892185 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -132,7 +132,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, + entry->timestamp, write_fault)) return 1; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b73136d390cc..c76ac0dfe572 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -595,7 +595,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault); + addr, entry->timestamp, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1; @@ -618,7 +618,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * tables */ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault)) + addr, entry->timestamp, write_fault)) return 1; } } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 077c6d920e27..e019249883fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -41,7 +41,7 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index a9ea23fa0def..ed7facacf2fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -32,7 +32,7 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index ab06c2b4b20b..33736d361dd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -35,7 +35,7 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 8d7d0813e331..1c99bb09e2a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -32,7 +32,7 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c index aac107898bae..964c29ef25dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c @@ -42,23 +42,23 @@ static const unsigned int isp_4_1_0_int_srcid[MAX_ISP410_INT_SRC] = { static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) { struct amdgpu_device *adev = isp->adev; + int idx, int_idx, num_res, r; u64 isp_base; - int int_idx; - int r; if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; isp_base = adev->rmmio_base; - isp->isp_cell = kcalloc(1, sizeof(struct mfd_cell), GFP_KERNEL); + isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); goto failure; } - isp->isp_res = kcalloc(MAX_ISP410_INT_SRC + 1, sizeof(struct resource), + num_res = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES + MAX_ISP410_INT_SRC; + isp->isp_res = kcalloc(num_res, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; @@ -83,22 +83,53 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) isp->isp_res[0].start = isp_base; isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END; - for (int_idx = 0; int_idx < MAX_ISP410_INT_SRC; int_idx++) { - isp->isp_res[int_idx + 1].name = "isp_4_1_0_irq"; - isp->isp_res[int_idx + 1].flags = IORESOURCE_IRQ; - isp->isp_res[int_idx + 1].start = + isp->isp_res[1].name = "isp_4_1_phy0_reg"; + isp->isp_res[1].flags = IORESOURCE_MEM; + isp->isp_res[1].start = isp_base + ISP410_PHY0_OFFSET; + isp->isp_res[1].end = isp_base + ISP410_PHY0_OFFSET + ISP410_PHY0_SIZE; + + isp->isp_res[2].name = "isp_gpio_sensor0_reg"; + isp->isp_res[2].flags = IORESOURCE_MEM; + isp->isp_res[2].start = isp_base + ISP410_GPIO_SENSOR0_OFFSET; + isp->isp_res[2].end = isp_base + ISP410_GPIO_SENSOR0_OFFSET + + ISP410_GPIO_SENSOR0_SIZE; + + for (idx = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES, int_idx = 0; + idx < num_res; idx++, int_idx++) { + isp->isp_res[idx].name = "isp_4_1_0_irq"; + isp->isp_res[idx].flags = IORESOURCE_IRQ; + isp->isp_res[idx].start = amdgpu_irq_create_mapping(adev, isp_4_1_0_int_srcid[int_idx]); - isp->isp_res[int_idx + 1].end = - isp->isp_res[int_idx + 1].start; + isp->isp_res[idx].end = + isp->isp_res[idx].start; } isp->isp_cell[0].name = "amd_isp_capture"; - isp->isp_cell[0].num_resources = MAX_ISP410_INT_SRC + 1; + isp->isp_cell[0].num_resources = num_res; isp->isp_cell[0].resources = &isp->isp_res[0]; isp->isp_cell[0].platform_data = isp->isp_pdata; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 1); + isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), + GFP_KERNEL); + if (!isp->isp_i2c_res) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + goto failure; + } + + isp->isp_i2c_res[0].name = "isp_i2c0_reg"; + isp->isp_i2c_res[0].flags = IORESOURCE_MEM; + isp->isp_i2c_res[0].start = isp_base + ISP410_I2C0_OFFSET; + isp->isp_i2c_res[0].end = isp_base + ISP410_I2C0_OFFSET + ISP410_I2C0_SIZE; + + isp->isp_cell[1].name = "amd_isp_i2c_designware"; + isp->isp_cell[1].num_resources = 1; + isp->isp_cell[1].resources = &isp->isp_i2c_res[0]; + isp->isp_cell[1].platform_data = isp->isp_pdata; + isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); if (r) { DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); goto failure; @@ -111,6 +142,7 @@ failure: kfree(isp->isp_pdata); kfree(isp->isp_res); kfree(isp->isp_cell); + kfree(isp->isp_i2c_res); return r; } @@ -122,6 +154,7 @@ static int isp_v4_1_0_hw_fini(struct amdgpu_isp *isp) kfree(isp->isp_res); kfree(isp->isp_cell); kfree(isp->isp_pdata); + kfree(isp->isp_i2c_res); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h index 315f2822410c..7db24c0f1080 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h @@ -32,8 +32,19 @@ #include "ivsrcid/isp/irqsrcs_isp_4_1.h" +#define MAX_ISP410_MEM_RES 2 +#define MAX_ISP410_SENSOR_RES 1 #define MAX_ISP410_INT_SRC 8 +#define ISP410_PHY0_OFFSET 0x66700 +#define ISP410_PHY0_SIZE 0xD30 + +#define ISP410_I2C0_OFFSET 0x66400 +#define ISP410_I2C0_SIZE 0x100 + +#define ISP410_GPIO_SENSOR0_OFFSET 0x6613C +#define ISP410_GPIO_SENSOR0_SIZE 0x4 + void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index 4e17fa03f7b5..b56f27295468 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -42,23 +42,24 @@ static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = { static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) { struct amdgpu_device *adev = isp->adev; + int idx, int_idx, num_res, r; u64 isp_base; - int int_idx; - int r; if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; isp_base = adev->rmmio_base; - isp->isp_cell = kcalloc(1, sizeof(struct mfd_cell), GFP_KERNEL); + isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); goto failure; } - isp->isp_res = kcalloc(MAX_ISP411_INT_SRC + 1, sizeof(struct resource), + num_res = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES + MAX_ISP411_INT_SRC; + + isp->isp_res = kcalloc(num_res, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; @@ -83,22 +84,52 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_res[0].start = isp_base; isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END; - for (int_idx = 0; int_idx < MAX_ISP411_INT_SRC; int_idx++) { - isp->isp_res[int_idx + 1].name = "isp_4_1_1_irq"; - isp->isp_res[int_idx + 1].flags = IORESOURCE_IRQ; - isp->isp_res[int_idx + 1].start = + isp->isp_res[1].name = "isp_4_1_1_phy0_reg"; + isp->isp_res[1].flags = IORESOURCE_MEM; + isp->isp_res[1].start = isp_base + ISP411_PHY0_OFFSET; + isp->isp_res[1].end = isp_base + ISP411_PHY0_OFFSET + ISP411_PHY0_SIZE; + + isp->isp_res[2].name = "isp_4_1_1_sensor0_reg"; + isp->isp_res[2].flags = IORESOURCE_MEM; + isp->isp_res[2].start = isp_base + ISP411_GPIO_SENSOR0_OFFSET; + isp->isp_res[2].end = isp_base + ISP411_GPIO_SENSOR0_OFFSET + + ISP411_GPIO_SENSOR0_SIZE; + + for (idx = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES, int_idx = 0; + idx < num_res; idx++, int_idx++) { + isp->isp_res[idx].name = "isp_4_1_1_irq"; + isp->isp_res[idx].flags = IORESOURCE_IRQ; + isp->isp_res[idx].start = amdgpu_irq_create_mapping(adev, isp_4_1_1_int_srcid[int_idx]); - isp->isp_res[int_idx + 1].end = - isp->isp_res[int_idx + 1].start; + isp->isp_res[idx].end = + isp->isp_res[idx].start; } isp->isp_cell[0].name = "amd_isp_capture"; - isp->isp_cell[0].num_resources = MAX_ISP411_INT_SRC + 1; + isp->isp_cell[0].num_resources = num_res; isp->isp_cell[0].resources = &isp->isp_res[0]; isp->isp_cell[0].platform_data = isp->isp_pdata; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 1); + isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); + if (!isp->isp_i2c_res) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + goto failure; + } + + isp->isp_i2c_res[0].name = "isp_i2c0_reg"; + isp->isp_i2c_res[0].flags = IORESOURCE_MEM; + isp->isp_i2c_res[0].start = isp_base + ISP411_I2C0_OFFSET; + isp->isp_i2c_res[0].end = isp_base + ISP411_I2C0_OFFSET + ISP411_I2C0_SIZE; + + isp->isp_cell[1].name = "amd_isp_i2c_designware"; + isp->isp_cell[1].num_resources = 1; + isp->isp_cell[1].resources = &isp->isp_i2c_res[0]; + isp->isp_cell[1].platform_data = isp->isp_pdata; + isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); if (r) { DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); goto failure; @@ -111,6 +142,7 @@ failure: kfree(isp->isp_pdata); kfree(isp->isp_res); kfree(isp->isp_cell); + kfree(isp->isp_i2c_res); return r; } @@ -122,6 +154,7 @@ static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp) kfree(isp->isp_res); kfree(isp->isp_cell); kfree(isp->isp_pdata); + kfree(isp->isp_i2c_res); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h index dfb9522c9d6a..40887ddeb08c 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h @@ -32,8 +32,19 @@ #include "ivsrcid/isp/irqsrcs_isp_4_1.h" +#define MAX_ISP411_MEM_RES 2 +#define MAX_ISP411_SENSOR_RES 1 #define MAX_ISP411_INT_SRC 8 +#define ISP411_PHY0_OFFSET 0x66700 +#define ISP411_PHY0_SIZE 0xD30 + +#define ISP411_I2C0_OFFSET 0x66400 +#define ISP411_I2C0_SIZE 0x100 + +#define ISP411_GPIO_SENSOR0_OFFSET 0x6613C +#define ISP411_GPIO_SENSOR0_SIZE 0x4 + void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 2ea8223eb969..8edcd85a1261 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -360,6 +360,26 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -421,13 +441,41 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes, struct mes_suspend_gang_input *input) { - return 0; + union MESAPI__SUSPEND mes_suspend_gang_pkt; + + memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt)); + + mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND; + mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs; + mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; + mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; + mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), + offsetof(union MESAPI__SUSPEND, api_status)); } static int mes_v11_0_resume_gang(struct amdgpu_mes *mes, struct mes_resume_gang_input *input) { - return 0; + union MESAPI__RESUME mes_resume_gang_pkt; + + memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt)); + + mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME; + mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; + mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), + offsetof(union MESAPI__RESUME, api_status)); } static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) @@ -595,6 +643,38 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } +static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + + if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + mes_reset_queue_pkt.reset_legacy_gfx = 1; + mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; + mes_reset_queue_pkt.queue_id_lp = input->queue_id; + mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; + mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; + mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; + mes_reset_queue_pkt.vmid_id_lp = input->vmid; + } else { + mes_reset_queue_pkt.reset_queue_only = 1; + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + } + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, @@ -603,6 +683,8 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .suspend_gang = mes_v11_0_suspend_gang, .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, + .reset_legacy_queue = mes_v11_0_reset_legacy_queue, + .reset_hw_queue = mes_v11_0_reset_hw_queue, }; static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index e39a58d262c9..47a73f6ae4da 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -350,6 +350,32 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + int pipe; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -676,6 +702,44 @@ static void mes_v12_0_enable_unmapped_doorbell_handling( WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data); } +static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + int pipe; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + + if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + mes_reset_queue_pkt.reset_legacy_gfx = 1; + mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; + mes_reset_queue_pkt.queue_id_lp = input->queue_id; + mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; + mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; + mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; + mes_reset_queue_pkt.vmid_id_lp = input->vmid; + } else { + mes_reset_queue_pkt.reset_queue_only = 1; + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + } + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .add_hw_queue = mes_v12_0_add_hw_queue, .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -684,6 +748,8 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .suspend_gang = mes_v12_0_suspend_gang, .resume_gang = mes_v12_0_resume_gang, .misc_op = mes_v12_0_misc_op, + .reset_legacy_queue = mes_v12_0_reset_legacy_queue, + .reset_hw_queue = mes_v12_0_reset_hw_queue, }; static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 621761a17ac7..b01bb759d0f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -559,22 +559,6 @@ static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags) } -static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst) -{ - u32 fed, status; - - status = RREG32_SOC15(MMHUB, hub_inst, regVM_L2_PROTECTION_FAULT_STATUS); - fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); - if (!amdgpu_sriov_vf(adev)) { - /* clear page fault status and address */ - WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst, - regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); - } - - return fed; -} - const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .get_fb_location = mmhub_v1_8_get_fb_location, .init = mmhub_v1_8_init, @@ -584,7 +568,6 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs, .set_clockgating = mmhub_v1_8_set_clockgating, .get_clockgating = mmhub_v1_8_get_clockgating, - .query_utcl2_poison_status = mmhub_v1_8_query_utcl2_poison_status, }; static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = { @@ -670,8 +653,8 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); } static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index caf616a2c8a6..1d099ffb3a5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -25,7 +25,7 @@ #define __MXGPU_NV_H__ #define NV_MAILBOX_POLL_ACK_TIMEDOUT 500 -#define NV_MAILBOX_POLL_MSG_TIMEDOUT 6000 +#define NV_MAILBOX_POLL_MSG_TIMEDOUT 15000 #define NV_MAILBOX_POLL_FLR_TIMEDOUT 10000 #define NV_MAILBOX_POLL_MSG_REP_MAX 11 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 772604feb6ac..23ef4eb36b40 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -72,6 +72,53 @@ MODULE_FIRMWARE("amdgpu/renoir_sdma.bin"); MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_sdma.bin"); +static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL) +}; + #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L @@ -1750,6 +1797,8 @@ static int sdma_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1870,6 +1919,13 @@ static int sdma_v4_0_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1890,6 +1946,8 @@ static int sdma_v4_0_sw_fini(void *handle) else amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -2292,6 +2350,48 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v4_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v4_0_get_reg_offset(adev, i, + sdma_reg_list_4_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .name = "sdma_v4_0", .early_init = sdma_v4_0_early_init, @@ -2308,6 +2408,8 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .set_clockgating_state = sdma_v4_0_set_clockgating_state, .set_powergating_state = sdma_v4_0_set_powergating_state, .get_clockgating_state = sdma_v4_0_get_clockgating_state, + .dump_ip_state = sdma_v4_0_dump_ip_state, + .print_ip_state = sdma_v4_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 2c55bfd935bb..c77889040760 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -46,6 +46,53 @@ MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); +static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_VM_CNTL) +}; + #define mmSMNAID_AID0_MCA_SMU 0x03b30400 #define WREG32_SDMA(instance, offset, value) \ @@ -1291,6 +1338,8 @@ static int sdma_v4_4_2_sw_init(void *handle) int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 aid_id; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) { @@ -1386,6 +1435,13 @@ static int sdma_v4_4_2_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1406,6 +1462,8 @@ static int sdma_v4_4_2_sw_fini(void *handle) else amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1799,6 +1857,48 @@ static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_4_2_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_4_2[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v4_4_2_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v4_4_2_get_reg_offset(adev, i, + sdma_reg_list_4_4_2[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { .name = "sdma_v4_4_2", .early_init = sdma_v4_4_2_early_init, @@ -1815,6 +1915,8 @@ const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { .set_clockgating_state = sdma_v4_4_2_set_clockgating_state, .set_powergating_state = sdma_v4_4_2_set_powergating_state, .get_clockgating_state = sdma_v4_4_2_get_clockgating_state, + .dump_ip_state = sdma_v4_4_2_dump_ip_state, + .print_ip_state = sdma_v4_4_2_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { @@ -2141,7 +2243,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); } static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b7d33d78bce0..3e48ea38385d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -59,6 +59,55 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin"); #define SDMA0_HYP_DEC_REG_END 0x5893 #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2) +}; + static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1341,6 +1390,8 @@ static int sdma_v5_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, @@ -1378,6 +1429,13 @@ static int sdma_v5_0_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1391,6 +1449,8 @@ static int sdma_v5_0_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1718,7 +1778,49 @@ static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } -const struct amd_ip_funcs sdma_v5_0_ip_funcs = { +static void sdma_v5_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v5_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v5_0_get_reg_offset(adev, i, + sdma_reg_list_5_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + +static const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .name = "sdma_v5_0", .early_init = sdma_v5_0_early_init, .late_init = NULL, @@ -1734,6 +1836,8 @@ const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .set_clockgating_state = sdma_v5_0_set_clockgating_state, .set_powergating_state = sdma_v5_0_set_powergating_state, .get_clockgating_state = sdma_v5_0_get_clockgating_state, + .dump_ip_state = sdma_v5_0_dump_ip_state, + .print_ip_state = sdma_v5_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h index d4e3c2e696f6..2ab71f21755a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h @@ -24,7 +24,6 @@ #ifndef __SDMA_V5_0_H__ #define __SDMA_V5_0_H__ -extern const struct amd_ip_funcs sdma_v5_0_ip_funcs; extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block; #endif /* __SDMA_V5_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 2e72d445415f..bc9b240a3488 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -60,6 +60,55 @@ MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin"); #define SDMA0_HYP_DEC_REG_END 0x5893 #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_2[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2) +}; + static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1224,6 +1273,8 @@ static int sdma_v5_2_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1255,6 +1306,13 @@ static int sdma_v5_2_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1268,6 +1326,8 @@ static int sdma_v5_2_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, true); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1676,7 +1736,49 @@ static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) amdgpu_gfx_off_ctrl(adev, true); } -const struct amd_ip_funcs sdma_v5_2_ip_funcs = { +static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_2[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v5_2_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v5_2_get_reg_offset(adev, i, + sdma_reg_list_5_2[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + +static const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, .late_init = NULL, @@ -1692,6 +1794,8 @@ const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .set_clockgating_state = sdma_v5_2_set_clockgating_state, .set_powergating_state = sdma_v5_2_set_powergating_state, .get_clockgating_state = sdma_v5_2_get_clockgating_state, + .dump_ip_state = sdma_v5_2_dump_ip_state, + .print_ip_state = sdma_v5_2_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h index b70414fef2a1..863145b3a77e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h @@ -24,7 +24,6 @@ #ifndef __SDMA_V5_2_H__ #define __SDMA_V5_2_H__ -extern const struct amd_ip_funcs sdma_v5_2_ip_funcs; extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block; #endif /* __SDMA_V5_2_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index dab4c2db8c9d..208a1fa9d4e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -57,6 +57,63 @@ MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin"); #define SDMA0_HYP_DEC_REG_END 0x589a #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_6_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS), +}; + static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1239,6 +1296,8 @@ static int sdma_v6_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, @@ -1274,6 +1333,13 @@ static int sdma_v6_0_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1287,6 +1353,8 @@ static int sdma_v6_0_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, true); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1488,6 +1556,48 @@ static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v6_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_6_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v6_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v6_0_get_reg_offset(adev, i, + sdma_reg_list_6_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .name = "sdma_v6_0", .early_init = sdma_v6_0_early_init, @@ -1505,6 +1615,8 @@ const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .set_clockgating_state = sdma_v6_0_set_clockgating_state, .set_powergating_state = sdma_v6_0_set_powergating_state, .get_clockgating_state = sdma_v6_0_get_clockgating_state, + .dump_ip_state = sdma_v6_0_dump_ip_state, + .print_ip_state = sdma_v6_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index ecee9e7d7e4c..cfd8e183ad50 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -51,6 +51,64 @@ MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); #define SDMA0_HYP_DEC_REG_END 0x589a #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_REV), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS), +}; + static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1217,6 +1275,8 @@ static int sdma_v7_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, @@ -1247,6 +1307,13 @@ static int sdma_v7_0_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1263,6 +1330,8 @@ static int sdma_v7_0_sw_fini(void *handle) if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) sdma_v12_0_free_ucode_buffer(adev); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1466,6 +1535,48 @@ static void sdma_v7_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v7_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v7_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v7_0_get_reg_offset(adev, i, + sdma_reg_list_7_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v7_0_ip_funcs = { .name = "sdma_v7_0", .early_init = sdma_v7_0_early_init, @@ -1483,6 +1594,8 @@ const struct amd_ip_funcs sdma_v7_0_ip_funcs = { .set_clockgating_state = sdma_v7_0_set_clockgating_state, .set_powergating_state = sdma_v7_0_set_powergating_state, .get_clockgating_state = sdma_v7_0_get_clockgating_state, + .dump_ip_state = sdma_v7_0_dump_ip_state, + .print_ip_state = sdma_v7_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 282584a48be0..ef7c603b50ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -93,6 +93,10 @@ struct soc15_ras_field_entry { #define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) +/* Over ride the instance id */ +#define SOC15_REG_ENTRY_OFFSET_INST(entry, inst) \ + (adev->reg_offset[entry.hwip][inst][entry.seg] + entry.reg_offset) + #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index e74e1983da53..b9cbeb389edc 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -413,6 +413,10 @@ # define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) +#define PACKET3_RUN_CLEANER_SHADER 0xD2 +/* 1. header + * 2. RESERVED [31:0] + */ #define VCE_CMD_NO_OP 0x00000000 #define VCE_CMD_END 0x00000001 diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 9dbb13adb661..1a8ea834efa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -157,9 +157,9 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, &de_count, umc_v12_0_is_deferred_error); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); - amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, de_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, de_count); return 0; } @@ -225,26 +225,16 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, } } -static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, - struct ta_ras_query_address_input *addr_in, - uint64_t *pfns, int len) +static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev, + struct ta_ras_query_address_output *addr_out, + uint64_t err_addr) { uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column, err_addr; - struct ta_ras_query_address_output addr_out; - uint32_t pos = 0; + uint64_t soc_pa, retired_page, column; - err_addr = addr_in->ma.err_addr; - addr_in->addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { - dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", - err_addr); - return 0; - } - - soc_pa = addr_out.pa.pa; - bank = addr_out.pa.bank; - channel_index = addr_out.pa.channel_idx; + soc_pa = addr_out->pa.pa; + bank = addr_out->pa.bank; + channel_index = addr_out->pa.channel_idx; col = (err_addr >> 1) & 0x1fULL; row = (err_addr >> 10) & 0x3fffULL; @@ -254,6 +244,37 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, /* clear [C4] in soc physical address */ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); + retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + /* include column bit 0 and 1 */ + col &= 0x3; + col |= (column << 2); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row, col, bank, channel_index); + + /* shift R13 bit */ + retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row_xor, col, bank, channel_index); + } +} + +static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t pa_addr, uint64_t *pfns, int len) +{ + uint64_t soc_pa, retired_page, column; + uint32_t pos = 0; + + soc_pa = pa_addr; + /* clear [C3 C2] in soc physical address */ + soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); + /* clear [C4] in soc physical address */ + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + /* loop for all possibilities of [C4 C3 C2] */ for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); @@ -263,13 +284,6 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, return 0; pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - /* include column bit 0 and 1 */ - col &= 0x3; - col |= (column << 2); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); - /* shift R13 bit */ retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); @@ -277,14 +291,40 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, return 0; pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); } return pos; } +static int umc_v12_0_convert_mca_to_addr(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch, uint32_t umc, + uint32_t node, uint32_t socket, + uint64_t *addr, bool dump_addr) +{ + struct ta_ras_query_address_input addr_in; + struct ta_ras_query_address_output addr_out; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch; + addr_in.ma.umc_inst = umc; + addr_in.ma.node_inst = node; + addr_in.ma.socket_id = socket; + addr_in.addr_type = TA_RAS_MCA_TO_PA; + if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); + return -EINVAL; + } + + if (dump_addr) + umc_v12_0_dump_addr_info(adev, &addr_out, err_addr); + + *addr = addr_out.pa.pa; + + return 0; +} + static int umc_v12_0_query_error_address(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data) @@ -483,12 +523,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); uint16_t hwid, mcatype; - struct ta_ras_query_address_input addr_in; uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; - uint64_t err_addr, hash_val = 0; + uint64_t err_addr, pa_addr = 0; struct ras_ecc_err *ecc_err; - int count; - int ret; + int count, ret, i; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); @@ -514,46 +552,25 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, MCA_IPID_2_UMC_CH(ipid), err_addr); - memset(page_pfn, 0, sizeof(page_pfn)); - - memset(&addr_in, 0, sizeof(addr_in)); - addr_in.ma.err_addr = err_addr; - addr_in.ma.ch_inst = MCA_IPID_2_UMC_CH(ipid); - addr_in.ma.umc_inst = MCA_IPID_2_UMC_INST(ipid); - addr_in.ma.node_inst = MCA_IPID_2_DIE_ID(ipid); - addr_in.ma.socket_id = MCA_IPID_2_SOCKET_ID(ipid); - - count = umc_v12_0_convert_err_addr(adev, - &addr_in, page_pfn, ARRAY_SIZE(page_pfn)); - if (count <= 0) { - dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count); - return 0; - } - - ret = amdgpu_umc_build_pages_hash(adev, - page_pfn, count, &hash_val); - if (ret) { - dev_err(adev->dev, "Fail to build error pages hash\n"); + ret = umc_v12_0_convert_mca_to_addr(adev, + err_addr, MCA_IPID_2_UMC_CH(ipid), + MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid), + MCA_IPID_2_SOCKET_ID(ipid), &pa_addr, true); + if (ret) return ret; - } ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL); if (!ecc_err) return -ENOMEM; - ecc_err->err_pages.pfn = kcalloc(count, sizeof(*ecc_err->err_pages.pfn), GFP_KERNEL); - if (!ecc_err->err_pages.pfn) { - kfree(ecc_err); - return -ENOMEM; - } - - memcpy(ecc_err->err_pages.pfn, page_pfn, count * sizeof(*ecc_err->err_pages.pfn)); - ecc_err->err_pages.count = count; - - ecc_err->hash_index = hash_val; ecc_err->status = status; ecc_err->ipid = ipid; ecc_err->addr = addr; + ecc_err->pa_pfn = UMC_V12_ADDR_MASK_BAD_COLS(pa_addr) >> AMDGPU_GPU_PAGE_SHIFT; + + /* If converted pa_pfn is 0, use pa C4 pfn. */ + if (!ecc_err->pa_pfn) + ecc_err->pa_pfn = BIT_ULL(UMC_V12_0_PA_C4_BIT) >> AMDGPU_GPU_PAGE_SHIFT; ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); if (ret) { @@ -562,13 +579,25 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, else dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret); - kfree(ecc_err->err_pages.pfn); kfree(ecc_err); return ret; } con->umc_ecc_log.de_queried_count++; + memset(page_pfn, 0, sizeof(page_pfn)); + count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + pa_addr, + page_pfn, ARRAY_SIZE(page_pfn)); + if (count <= 0) { + dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count); + return 0; + } + + /* Reserve memory */ + for (i = 0; i < count; i++) + amdgpu_ras_reserve_page(adev, page_pfn[i]); + /* The problem case is as follows: * 1. GPU A triggers a gpu ras reset, and GPU A drives * GPU B to also perform a gpu ras reset. @@ -593,16 +622,21 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, struct ras_ecc_err *ecc_err, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - uint32_t i = 0; - int ret = 0; + uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; + int ret, i, count; if (!err_data || !ecc_err) return -EINVAL; - for (i = 0; i < ecc_err->err_pages.count; i++) { + memset(page_pfn, 0, sizeof(page_pfn)); + count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT, + page_pfn, ARRAY_SIZE(page_pfn)); + + for (i = 0; i < count; i++) { ret = amdgpu_umc_fill_error_record(err_data, ecc_err->addr, - ecc_err->err_pages.pfn[i] << AMDGPU_GPU_PAGE_SHIFT, + page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT, MCA_IPID_2_UMC_CH(ecc_err->ipid), MCA_IPID_2_UMC_INST(ecc_err->ipid)); if (ret) @@ -636,7 +670,8 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev, dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret); break; } - radix_tree_tag_clear(ecc_tree, entries[i]->hash_index, UMC_ECC_NEW_DETECTED_TAG); + radix_tree_tag_clear(ecc_tree, + entries[i]->pa_pfn, UMC_ECC_NEW_DETECTED_TAG); } mutex_unlock(&con->umc_ecc_log.lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index b4974793850b..be5598d76c1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -81,6 +81,11 @@ (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \ (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03)) +#define UMC_V12_ADDR_MASK_BAD_COLS(addr) \ + ((addr) & ~((0x3ULL << UMC_V12_0_PA_C2_BIT) | \ + (0x1ULL << UMC_V12_0_PA_C4_BIT) | \ + (0x1ULL << UMC_V12_0_PA_R13_BIT))) + bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 32517c364cf7..4bfba2931b08 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -950,7 +950,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, - .parse_cs = amdgpu_vce_ring_parse_cs_vm, + .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = 6 + /* vce_v3_0_emit_vm_flush */ 4 + /* vce_v3_0_emit_pipeline_sync */ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 06d787385ad4..0748bf44c880 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1102,7 +1102,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, - .parse_cs = amdgpu_vce_ring_parse_cs_vm, + .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index a280b9fecb77..ecdfbfefd66a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -45,6 +45,42 @@ #define mmUVD_REG_XX_MASK_1_0 0x05ac #define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_1_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); @@ -90,6 +126,8 @@ static int vcn_v1_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCN DEC TRAP */ @@ -161,6 +199,14 @@ static int vcn_v1_0_sw_init(void *handle) r = jpeg_v1_0_sw_init(handle); + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return r; } @@ -184,6 +230,8 @@ static int vcn_v1_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1877,6 +1925,66 @@ void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring) mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround); } +static void vcn_v1_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_1_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v1_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_1_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .name = "vcn_v1_0", .early_init = vcn_v1_0_early_init, @@ -1895,8 +2003,8 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, .set_clockgating_state = vcn_v1_0_set_clockgating_state, .set_powergating_state = vcn_v1_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v1_0_dump_ip_state, + .print_ip_state = vcn_v1_0_print_ip_state, }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index d3d096909a7f..bfd067e2d2f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -53,6 +53,42 @@ #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); @@ -96,6 +132,8 @@ static int vcn_v2_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; volatile struct amdgpu_fw_shared *fw_shared; @@ -184,6 +222,15 @@ static int vcn_v2_0_sw_init(void *handle) if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(adev->vcn.inst); + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -213,6 +260,8 @@ static int vcn_v2_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1985,6 +2034,66 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table); } +static void vcn_v2_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v2_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .name = "vcn_v2_0", .early_init = vcn_v2_0_early_init, @@ -2003,8 +2112,8 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_v2_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v2_0_dump_ip_state, + .print_ip_state = vcn_v2_0_print_ip_state, }; static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 96f60c303161..04e9e806e318 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -55,6 +55,43 @@ #define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); @@ -122,6 +159,8 @@ static int vcn_v2_5_sw_init(void *handle) { struct amdgpu_ring *ring; int i, j, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (j = 0; j < adev->vcn.num_vcn_inst; j++) { @@ -241,6 +280,15 @@ static int vcn_v2_5_sw_init(void *handle) if (r) return r; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -277,6 +325,8 @@ static int vcn_v2_5_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1876,6 +1926,66 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v2_5_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_5[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v2_5_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_5[j], i)); + } +} + static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .name = "vcn_v2_5", .early_init = vcn_v2_5_early_init, @@ -1894,8 +2004,8 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v2_5_dump_ip_state, + .print_ip_state = vcn_v2_5_print_ip_state, }; static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { @@ -1916,8 +2026,8 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v2_5_dump_ip_state, + .print_ip_state = vcn_v2_5_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v2_5_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 24f947751c46..65dd68b32280 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -60,6 +60,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -126,6 +162,8 @@ static int vcn_v3_0_sw_init(void *handle) struct amdgpu_ring *ring; int i, j, r; int vcn_doorbell_index = 0; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; r = amdgpu_vcn_sw_init(adev); @@ -246,6 +284,15 @@ static int vcn_v3_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -284,6 +331,7 @@ static int vcn_v3_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); return r; } @@ -2203,6 +2251,67 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t inst_off; + bool is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v3_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .name = "vcn_v3_0", .early_init = vcn_v3_0_early_init, @@ -2221,8 +2330,8 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v3_0_dump_ip_state, + .print_ip_state = vcn_v3_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 776c539bfdda..26c6f10a8c8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -52,6 +52,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -137,6 +173,8 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -200,6 +238,15 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -239,6 +286,8 @@ static int vcn_v4_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -2109,6 +2158,67 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v4_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0[j], + i)); + } +} + static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .name = "vcn_v4_0", .early_init = vcn_v4_0_early_init, @@ -2127,8 +2237,8 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_v4_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v4_0_dump_ip_state, + .print_ip_state = vcn_v4_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 9bae95538b62..0fda70336300 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,42 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + #define NORMALIZE_VCN_REG_OFFSET(offset) \ (offset & 0x1FFFF) @@ -92,6 +128,8 @@ static int vcn_v4_0_3_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; int i, r, vcn_inst; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -159,6 +197,15 @@ static int vcn_v4_0_3_sw_init(void *handle) } } + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -194,6 +241,8 @@ static int vcn_v4_0_3_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1684,6 +1733,68 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs; } +static void vcn_v4_0_3_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_3[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v4_0_3_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off, inst_id; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_id = GET_INST(VCN, i); + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, inst_id, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_3[j], + inst_id)); + } +} + static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .name = "vcn_v4_0_3", .early_init = vcn_v4_0_3_early_init, @@ -1702,8 +1813,8 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_v4_0_3_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v4_0_3_dump_ip_state, + .print_ip_state = vcn_v4_0_3_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 8d75061f9f38..b1fd226b7efb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -52,6 +52,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_5[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -97,6 +133,8 @@ static int vcn_v4_0_5_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -168,6 +206,14 @@ static int vcn_v4_0_5_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -207,6 +253,8 @@ static int vcn_v4_0_5_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1733,6 +1781,67 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_5_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_5[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v4_0_5_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_5[j], + i)); + } +} + static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .name = "vcn_v4_0_5", .early_init = vcn_v4_0_5_early_init, @@ -1751,8 +1860,8 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_v4_0_5_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v4_0_5_dump_ip_state, + .print_ip_state = vcn_v4_0_5_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 68c97fcd539b..c305386358b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -37,6 +37,40 @@ #include +static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -83,6 +117,8 @@ static int vcn_v5_0_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -137,6 +173,14 @@ static int vcn_v5_0_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -173,6 +217,8 @@ static int vcn_v5_0_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1297,6 +1343,66 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v5_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_5_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v5_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_5_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .name = "vcn_v5_0_0", .early_init = vcn_v5_0_0_early_init, @@ -1315,8 +1421,8 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v5_0_dump_ip_state, + .print_ip_state = vcn_v5_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 80ce42aacc0c..b61f6b838ec2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -246,6 +246,7 @@ * 1 - Stream * 2 - Bypass */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard * 1 - send low 32bit data diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 32e5db509560..00350eccd571 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -247,14 +247,15 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->priority = args->queue_priority; q_properties->queue_address = args->ring_base_address; q_properties->queue_size = args->ring_size; - q_properties->read_ptr = (uint32_t *) args->read_pointer_address; - q_properties->write_ptr = (uint32_t *) args->write_pointer_address; + q_properties->read_ptr = (void __user *)args->read_pointer_address; + q_properties->write_ptr = (void __user *)args->write_pointer_address; q_properties->eop_ring_buffer_address = args->eop_buffer_address; q_properties->eop_ring_buffer_size = args->eop_buffer_size; q_properties->ctx_save_restore_area_address = args->ctx_save_restore_address; q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; q_properties->ctl_stack_size = args->ctl_stack_size; + q_properties->sdma_engine_id = args->sdma_engine_id; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -262,6 +263,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->type = KFD_QUEUE_TYPE_SDMA; else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID) + q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID; else return -ENOTSUPP; @@ -306,7 +309,6 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, struct kfd_process_device *pdd; struct queue_properties q_properties; uint32_t doorbell_offset_in_process = 0; - struct amdgpu_bo *wptr_bo = NULL; memset(&q_properties, 0, sizeof(struct queue_properties)); @@ -334,6 +336,18 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } + if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { + int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) + + kfd_get_num_xgmi_sdma_engines(dev) - 1; + + if (q_properties.sdma_engine_id > max_sdma_eng_id) { + err = -EINVAL; + pr_err("sdma_engine_id %i exceeds maximum id of %i\n", + q_properties.sdma_engine_id, max_sdma_eng_id); + goto err_sdma_engine_id; + } + } + if (!pdd->qpd.proc_doorbells) { err = kfd_alloc_process_doorbells(dev->kfd, pdd); if (err) { @@ -342,53 +356,17 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, } } - /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work - * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) - */ - if (dev->kfd->shared_resources.enable_mes && - ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) - >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { - struct amdgpu_bo_va_mapping *wptr_mapping; - struct amdgpu_vm *wptr_vm; - - wptr_vm = drm_priv_to_vm(pdd->drm_priv); - err = amdgpu_bo_reserve(wptr_vm->root.bo, false); - if (err) - goto err_wptr_map_gart; - - wptr_mapping = amdgpu_vm_bo_lookup_mapping( - wptr_vm, args->write_pointer_address >> PAGE_SHIFT); - amdgpu_bo_unreserve(wptr_vm->root.bo); - if (!wptr_mapping) { - pr_err("Failed to lookup wptr bo\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - - wptr_bo = wptr_mapping->bo_va->base.bo; - if (wptr_bo->tbo.base.size > PAGE_SIZE) { - pr_err("Requested GART mapping for wptr bo larger than one page\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - if (dev->adev != amdgpu_ttm_adev(wptr_bo->tbo.bdev)) { - pr_err("Queue memory allocated to wrong device\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - - err = amdgpu_amdkfd_map_gtt_bo_to_gart(wptr_bo); - if (err) { - pr_err("Failed to map wptr bo to GART\n"); - goto err_wptr_map_gart; - } + err = kfd_queue_acquire_buffers(pdd, &q_properties); + if (err) { + pr_debug("failed to acquire user queue buffers\n"); + goto err_acquire_queue_buf; } pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n", p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo, + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL, &doorbell_offset_in_process); if (err != 0) goto err_create_queue; @@ -422,9 +400,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, return 0; err_create_queue: - if (wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo); -err_wptr_map_gart: + kfd_queue_unref_bo_vas(pdd, &q_properties); + kfd_queue_release_buffers(pdd, &q_properties); +err_acquire_queue_buf: +err_sdma_engine_id: err_bind_process: err_pdd: mutex_unlock(&p->mutex); @@ -1422,8 +1401,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) { - pr_err("Failed to unmap from gpu %d/%d\n", - i, args->n_devices); + pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices); goto unmap_memory_from_gpu_failed; } args->n_success = i+1; @@ -1963,7 +1941,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo); for (i = 0; i < p->n_pdds; i++) { - if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem)) + if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem)) bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 34a282540c7e..312dfa84f29f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -365,47 +365,47 @@ static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_i *watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID; - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); for (i = 0; i < MAX_WATCH_ADDRESSES; i++) { /* device watchpoint in use so skip */ - if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1) + if ((pdd->dev->alloc_watch_ids >> i) & 0x1) continue; pdd->alloc_watch_ids |= 0x1 << i; - pdd->dev->kfd->alloc_watch_ids |= 0x1 << i; + pdd->dev->alloc_watch_ids |= 0x1 << i; *watch_id = i; - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return 0; } - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return -ENOMEM; } static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id) { - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); /* process owns device watch point so safe to clear */ if ((pdd->alloc_watch_ids >> watch_id) & 0x1) { pdd->alloc_watch_ids &= ~(0x1 << watch_id); - pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id); + pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id); } - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); } static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id) { bool owns_watch_id = false; - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); owns_watch_id = watch_id < MAX_WATCH_ADDRESSES && ((pdd->alloc_watch_ids >> watch_id) & 0x1); - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return owns_watch_id; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index f4d20adaa068..fad1c8f2bc83 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -884,13 +884,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, dev_err(kfd_device, "Error initializing KFD node\n"); goto node_init_error; } + + spin_lock_init(&node->watch_points_lock); + kfd->nodes[i] = node; } svm_range_set_max_pages(kfd->adev); - spin_lock_init(&kfd->watch_points_lock); - kfd->init_complete = true; dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor, kfd->adev->pdev->device); @@ -907,7 +908,7 @@ node_alloc_error: kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: - amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); alloc_gtt_mem_failure: dev_err(kfd_device, "device %x:%x NOT added due to errors\n", @@ -925,7 +926,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); ida_destroy(&kfd->doorbell_ida); kfd_gtt_sa_fini(kfd); - amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); } kfree(kfd); @@ -1445,6 +1446,45 @@ void kgd2kfd_unlock_kfd(void) mutex_unlock(&kfd_processes_mutex); } +int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + int ret; + + if (!kfd->init_complete) + return 0; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return -EINVAL; + } + node = kfd->nodes[node_id]; + + ret = node->dqm->ops.unhalt(node->dqm); + if (ret) + dev_err(kfd_device, "Error in starting scheduler\n"); + + return ret; +} + +int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + + if (!kfd->init_complete) + return 0; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return -EINVAL; + } + + node = kfd->nodes[node_id]; + return node->dqm->ops.halt(node->dqm); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 4f48507418d2..577d121cc6d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -153,6 +153,20 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, static void kfd_hws_hang(struct device_queue_manager *dqm) { + struct device_process_node *cur; + struct qcm_process_device *qpd; + struct queue *q; + + /* Mark all device queues as reset. */ + list_for_each_entry(cur, &dqm->queues, list) { + qpd = cur->qpd; + list_for_each_entry(q, &qpd->queues_list, list) { + struct kfd_process_device *pdd = qpd_to_pdd(qpd); + + pdd->has_reset_queue = true; + } + } + /* * Issue a GPU reset if HWS is unresponsive */ @@ -208,10 +222,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.mqd_addr = q->gart_mqd_addr; queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; - if (q->wptr_bo) { - wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); - queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off; - } + wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); + queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; queue_input.is_kfd_process = 1; queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); @@ -307,6 +319,46 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm) return retval; } +static int suspend_all_queues_mes(struct device_queue_manager *dqm) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r = 0; + + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EIO; + + r = amdgpu_mes_suspend(adev); + up_read(&adev->reset_domain->sem); + + if (r) { + dev_err(adev->dev, "failed to suspend gangs from MES\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + +static int resume_all_queues_mes(struct device_queue_manager *dqm) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r = 0; + + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EIO; + + r = amdgpu_mes_resume(adev); + up_read(&adev->reset_domain->sem); + + if (r) { + dev_err(adev->dev, "failed to resume gangs from MES\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + static void increment_queue_count(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -880,6 +932,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, else if (prev_active) retval = remove_queue_mes(dqm, q, &pdd->qpd); + /* queue is reset so inaccessable */ + if (pdd->has_reset_queue) { + retval = -EACCES; + goto out_unlock; + } + if (retval) { dev_err(dev, "unmap queue failed\n"); goto out_unlock; @@ -1534,6 +1592,41 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); q->properties.sdma_queue_id = q->sdma_id / kfd_get_num_xgmi_sdma_engines(dqm->dev); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { + int i, num_queues, num_engines, eng_offset = 0, start_engine; + bool free_bit_found = false, is_xgmi = false; + + if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { + num_queues = get_num_sdma_queues(dqm); + num_engines = kfd_get_num_sdma_engines(dqm->dev); + q->properties.type = KFD_QUEUE_TYPE_SDMA; + } else { + num_queues = get_num_xgmi_sdma_queues(dqm); + num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); + eng_offset = kfd_get_num_sdma_engines(dqm->dev); + q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; + is_xgmi = true; + } + + /* Scan available bit based on target engine ID. */ + start_engine = q->properties.sdma_engine_id - eng_offset; + for (i = start_engine; i < num_queues; i += num_engines) { + + if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) + continue; + + clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); + q->sdma_id = i; + q->properties.sdma_queue_id = q->sdma_id / num_engines; + free_bit_found = true; + break; + } + + if (!free_bit_found) { + dev_err(dev, "No more SDMA queue to allocate for target ID %i\n", + q->properties.sdma_engine_id); + return -ENOMEM; + } } pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -1626,10 +1719,64 @@ static int initialize_cpsch(struct device_queue_manager *dqm) return 0; } +/* halt_cpsch: + * Unmap queues so the schedule doesn't continue remaining jobs in the queue. + * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch + * is called. + */ +static int halt_cpsch(struct device_queue_manager *dqm) +{ + int ret = 0; + + dqm_lock(dqm); + if (!dqm->sched_running) { + dqm_unlock(dqm); + return 0; + } + + WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n"); + + if (!dqm->is_hws_hang) { + if (!dqm->dev->kfd->shared_resources.enable_mes) + ret = unmap_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD, false); + else + ret = remove_all_queues_mes(dqm); + } + dqm->sched_halt = true; + dqm_unlock(dqm); + + return ret; +} + +/* unhalt_cpsch + * Unset dqm->sched_halt and map queues back to runlist + */ +static int unhalt_cpsch(struct device_queue_manager *dqm) +{ + int ret = 0; + + dqm_lock(dqm); + if (!dqm->sched_running || !dqm->sched_halt) { + WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n"); + dqm_unlock(dqm); + return 0; + } + dqm->sched_halt = false; + if (!dqm->dev->kfd->shared_resources.enable_mes) + ret = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, + 0, USE_DEFAULT_GRACE_PERIOD); + dqm_unlock(dqm); + + return ret; +} + static int start_cpsch(struct device_queue_manager *dqm) { struct device *dev = dqm->dev->adev->dev; - int retval; + int retval, num_hw_queue_slots; retval = 0; @@ -1682,9 +1829,24 @@ static int start_cpsch(struct device_queue_manager *dqm) &dqm->wait_times); } + /* setup per-queue reset detection buffer */ + num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe * + dqm->dev->kfd->shared_resources.num_pipe_per_mec * + NUM_XCC(dqm->dev->xcc_mask); + + dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info); + dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL); + + if (!dqm->detect_hang_info) { + retval = -ENOMEM; + goto fail_detect_hang_buffer; + } + dqm_unlock(dqm); return 0; +fail_detect_hang_buffer: + kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); fail_allocate_vidmem: fail_set_sched_resources: if (!dqm->dev->kfd->shared_resources.enable_mes) @@ -1715,6 +1877,8 @@ static int stop_cpsch(struct device_queue_manager *dqm) kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); if (!dqm->dev->kfd->shared_resources.enable_mes) pm_uninit(&dqm->packet_mgr); + kfree(dqm->detect_hang_info); + dqm->detect_hang_info = NULL; dqm_unlock(dqm); return 0; @@ -1786,7 +1950,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } if (q->properties.type == KFD_QUEUE_TYPE_SDMA || - q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || + q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { dqm_lock(dqm); retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); dqm_unlock(dqm); @@ -1913,7 +2078,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) struct device *dev = dqm->dev->adev->dev; int retval; - if (!dqm->sched_running) + if (!dqm->sched_running || dqm->sched_halt) return 0; if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) return 0; @@ -1931,6 +2096,135 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) return retval; } +static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd = qpd_to_pdd(qpd); + + dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid 0x%0x is reset\n", + q->properties.queue_id, q->process->pasid); + + pdd->has_reset_queue = true; + if (q->properties.is_active) { + q->properties.is_active = false; + decrement_queue_count(dqm, qpd, q); + } +} + +static int detect_queue_hang(struct device_queue_manager *dqm) +{ + int i; + + /* detect should be used only in dqm locked queue reset */ + if (WARN_ON(dqm->detect_hang_count > 0)) + return 0; + + memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size); + + for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { + uint32_t mec, pipe, queue; + int xcc_id; + + mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) + / dqm->dev->kfd->shared_resources.num_pipe_per_mec; + + if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) + continue; + + amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue); + + for_each_inst(xcc_id, dqm->dev->xcc_mask) { + uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr( + dqm->dev->adev, pipe, queue, xcc_id); + struct dqm_detect_hang_info hang_info; + + if (!queue_addr) + continue; + + hang_info.pipe_id = pipe; + hang_info.queue_id = queue; + hang_info.xcc_id = xcc_id; + hang_info.queue_address = queue_addr; + + dqm->detect_hang_info[dqm->detect_hang_count] = hang_info; + dqm->detect_hang_count++; + } + } + + return dqm->detect_hang_count; +} + +static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address) +{ + struct device_process_node *cur; + struct qcm_process_device *qpd; + struct queue *q; + + list_for_each_entry(cur, &dqm->queues, list) { + qpd = cur->qpd; + list_for_each_entry(q, &qpd->queues_list, list) { + if (queue_address == q->properties.queue_address) + return q; + } + } + + return NULL; +} + +/* only for compute queue */ +static int reset_queues_on_hws_hang(struct device_queue_manager *dqm) +{ + int r = 0, reset_count = 0, i; + + if (!dqm->detect_hang_info || dqm->is_hws_hang) + return -EIO; + + /* assume dqm locked. */ + if (!detect_queue_hang(dqm)) + return -ENOTRECOVERABLE; + + for (i = 0; i < dqm->detect_hang_count; i++) { + struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i]; + struct queue *q = find_queue_by_address(dqm, hang_info.queue_address); + struct kfd_process_device *pdd; + uint64_t queue_addr = 0; + + if (!q) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + pdd = kfd_get_process_device_data(dqm->dev, q->process); + if (!pdd) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev, + hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id, + KFD_UNMAP_LATENCY_MS); + + /* either reset failed or we reset an unexpected queue. */ + if (queue_addr != q->properties.queue_address) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + set_queue_as_reset(dqm, q, &pdd->qpd); + reset_count++; + } + + if (reset_count == dqm->detect_hang_count) + kfd_signal_reset_event(dqm->dev); + else + r = -ENOTRECOVERABLE; + +reset_fail: + dqm->detect_hang_count = 0; + + return r; +} + /* dqm->lock mutex has to be locked before calling this function */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, @@ -1981,11 +2275,14 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { - while (halt_if_hws_hang) - schedule(); - kfd_hws_hang(dqm); - retval = -ETIME; - goto out; + if (reset_queues_on_hws_hang(dqm)) { + while (halt_if_hws_hang) + schedule(); + dqm->is_hws_hang = true; + kfd_hws_hang(dqm); + retval = -ETIME; + goto out; + } } /* We need to reset the grace period value for this device */ @@ -2004,8 +2301,7 @@ out: } /* only for compute queue */ -static int reset_queues_cpsch(struct device_queue_manager *dqm, - uint16_t pasid) +static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid) { int retval; @@ -2525,6 +2821,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) dqm->ops.initialize = initialize_cpsch; dqm->ops.start = start_cpsch; dqm->ops.stop = stop_cpsch; + dqm->ops.halt = halt_cpsch; + dqm->ops.unhalt = unhalt_cpsch; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; dqm->ops.register_process = register_process; @@ -2621,7 +2919,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, { WARN(!mqd, "No hiq sdma mqd trunk to free"); - amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem); } void device_queue_manager_uninit(struct device_queue_manager *dqm) @@ -2633,6 +2931,95 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) kfree(dqm); } +int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id) +{ + struct kfd_process_device *pdd; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + struct device_queue_manager *dqm = knode->dqm; + struct device *dev = dqm->dev->adev->dev; + struct qcm_process_device *qpd; + struct queue *q = NULL; + int ret = 0; + + if (!p) + return -EINVAL; + + dqm_lock(dqm); + + pdd = kfd_get_process_device_data(dqm->dev, p); + if (pdd) { + qpd = &pdd->qpd; + + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->doorbell_id == doorbell_id && q->properties.is_active) { + ret = suspend_all_queues_mes(dqm); + if (ret) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + + q->properties.is_evicted = true; + q->properties.is_active = false; + decrement_queue_count(dqm, qpd, q); + + ret = remove_queue_mes(dqm, q, qpd); + if (ret) { + dev_err(dev, "Removing bad queue failed"); + goto out; + } + + ret = resume_all_queues_mes(dqm); + if (ret) + dev_err(dev, "Resuming all queues failed"); + + break; + } + } + } + +out: + dqm_unlock(dqm); + return ret; +} + +static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct device *dev = dqm->dev->adev->dev; + int ret = 0; + + /* Check if process is already evicted */ + dqm_lock(dqm); + if (qpd->evicted) { + /* Increment the evicted count to make sure the + * process stays evicted before its terminated. + */ + qpd->evicted++; + dqm_unlock(dqm); + goto out; + } + dqm_unlock(dqm); + + ret = suspend_all_queues_mes(dqm); + if (ret) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + + ret = dqm->ops.evict_process_queues(dqm, qpd); + if (ret) { + dev_err(dev, "Evicting process queues failed"); + goto out; + } + + ret = resume_all_queues_mes(dqm); + if (ret) + dev_err(dev, "Resuming all queues failed"); + +out: + return ret; +} + int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) { struct kfd_process_device *pdd; @@ -2643,8 +3030,13 @@ int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) return -EINVAL; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); pdd = kfd_get_process_device_data(dqm->dev, p); - if (pdd) - ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + if (pdd) { + if (dqm->dev->kfd->shared_resources.enable_mes) + ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd); + else + ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + } + kfd_unref_process(p); return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 3b9b8eabaacc..08b40826ad1e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -106,6 +106,12 @@ union GRBM_GFX_INDEX_BITS { * @uninitialize: Destroys all the device queue manager resources allocated in * initialize routine. * + * @halt: This routine unmaps queues from runlist and set halt status to true + * so no more queues will be mapped to runlist until unhalt. + * + * @unhalt: This routine unset halt status to flase and maps queues back to + * runlist. + * * @create_kernel_queue: Creates kernel queue. Used for debug queue. * * @destroy_kernel_queue: Destroys kernel queue. Used for debug queue. @@ -153,6 +159,8 @@ struct device_queue_manager_ops { int (*start)(struct device_queue_manager *dqm); int (*stop)(struct device_queue_manager *dqm); void (*uninitialize)(struct device_queue_manager *dqm); + int (*halt)(struct device_queue_manager *dqm); + int (*unhalt)(struct device_queue_manager *dqm); int (*create_kernel_queue)(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd); @@ -210,6 +218,13 @@ struct device_queue_manager_asic_ops { struct kfd_node *dev); }; +struct dqm_detect_hang_info { + int pipe_id; + int queue_id; + int xcc_id; + uint64_t queue_address; +}; + /** * struct device_queue_manager * @@ -257,6 +272,7 @@ struct device_queue_manager { struct work_struct hw_exception_work; struct kfd_mem_obj hiq_sdma_mqd; bool sched_running; + bool sched_halt; /* used for GFX 9.4.3 only */ uint32_t current_logical_xcc_start; @@ -264,6 +280,11 @@ struct device_queue_manager { uint32_t wait_times; wait_queue_head_t destroy_wait; + + /* for per-queue reset support */ + struct dqm_detect_hang_info *detect_hang_info; + size_t detect_hang_info_size; + int detect_hang_count; }; void device_queue_manager_init_cik( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 9b33d9d2c9ad..ea3792249209 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -31,6 +31,7 @@ #include #include "kfd_priv.h" #include "kfd_events.h" +#include "kfd_device_queue_manager.h" #include /* @@ -1244,12 +1245,33 @@ void kfd_signal_reset_event(struct kfd_node *dev) idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id); + struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p); if (unlikely(user_gpu_id == -EINVAL)) { WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id); continue; } + if (unlikely(!pdd)) { + WARN_ONCE(1, "Could not get device data from pasid:0x%x\n", p->pasid); + continue; + } + + if (dev->dqm->detect_hang_count && !pdd->has_reset_queue) + continue; + + if (dev->dqm->detect_hang_count) { + struct amdgpu_task_info *ti; + + ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid); + if (ti) { + dev_err(dev->adev->dev, + "Queues reset on process %s tid %d thread %s pid %d\n", + ti->process_name, ti->tgid, ti->task_name, ti->pid); + amdgpu_vm_put_task_info(ti); + } + } + rcu_read_lock(); id = KFD_FIRST_NONSIGNAL_EVENT_ID; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 8e0d0356e810..bb8cbfc39b90 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -129,63 +129,6 @@ enum SQ_INTERRUPT_ERROR_TYPE { KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \ >> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT) -static void event_interrupt_poison_consumption(struct kfd_node *dev, - uint16_t pasid, uint16_t client_id) -{ - enum amdgpu_ras_block block = 0; - int old_poison, ret = -EINVAL; - uint32_t reset = 0; - struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); - - if (!p) - return; - - /* all queues of a process will be unmapped in one time */ - old_poison = atomic_cmpxchg(&p->poison, 0, 1); - kfd_unref_process(p); - if (old_poison) - return; - - switch (client_id) { - case SOC15_IH_CLIENTID_SE0SH: - case SOC15_IH_CLIENTID_SE1SH: - case SOC15_IH_CLIENTID_SE2SH: - case SOC15_IH_CLIENTID_SE3SH: - case SOC15_IH_CLIENTID_UTCL2: - ret = kfd_dqm_evict_pasid(dev->dqm, pasid); - block = AMDGPU_RAS_BLOCK__GFX; - if (ret) - reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; - break; - case SOC15_IH_CLIENTID_SDMA0: - case SOC15_IH_CLIENTID_SDMA1: - case SOC15_IH_CLIENTID_SDMA2: - case SOC15_IH_CLIENTID_SDMA3: - case SOC15_IH_CLIENTID_SDMA4: - block = AMDGPU_RAS_BLOCK__SDMA; - reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; - break; - default: - break; - } - - kfd_signal_poison_consumed_event(dev, pasid); - - /* resetting queue passes, do page retirement without gpu reset - * resetting queue fails, fallback to gpu reset solution - */ - if (!ret) - dev_warn(dev->adev->dev, - "RAS poison consumption, unmap queue flow succeeded: client id %d\n", - client_id); - else - dev_warn(dev->adev->dev, - "RAS poison consumption, fall back to gpu reset flow: client id %d\n", - client_id); - - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset); -} - static bool event_interrupt_isr_v10(struct kfd_node *dev, const uint32_t *ih_ring_entry, uint32_t *patched_ihre, @@ -332,11 +275,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID), sq_intr_err_type); - if (sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && - sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { - event_interrupt_poison_consumption(dev, pasid, source_id); - return; - } break; default: break; @@ -362,9 +300,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_SDMA7) { if (source_id == SOC15_INTSRC_SDMA_TRAP) { kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); - } else if (source_id == SOC15_INTSRC_SDMA_ECC) { - event_interrupt_poison_consumption(dev, pasid, source_id); - return; } } else if (client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_VMC1 || @@ -388,12 +323,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) hub_inst = node_id / 4; - if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, - hub_inst, vmid_type)) { - event_interrupt_poison_consumption(dev, pasid, client_id); - return; - } - info.vmid = vmid; info.mc_id = client_id; info.page_addr = ih_ring_entry[4] | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index f524a55eee11..b3f988b275a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -330,11 +330,14 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) kfd_signal_event_interrupt(pasid, context_id0, 32); else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && - KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) - kfd_set_dbg_ev_from_interrupt(dev, pasid, - KFD_CTXID0_DOORBELL_ID(context_id0), + KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) { + u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0); + + kfd_set_dbg_ev_from_interrupt(dev, pasid, doorbell_id, KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), NULL, 0); + kfd_dqm_suspend_bad_queue_mes(dev, pasid, doorbell_id); + } /* SDMA */ else if (source_id == SOC21_INTSRC_SDMA_TRAP) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index a9c3580be8c9..fecdbbab9894 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -431,25 +431,9 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); - int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; - /* gfxhub */ - if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { - hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, - node_id); - if (hub_inst < 0) - hub_inst = 0; - } - - /* mmhub */ - if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) - hub_inst = node_id / 4; - - if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, - hub_inst, vmid_type)) { + if (source_id == SOC15_INTSRC_VMC_UTCL2_POISON) { event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 50a81da43ce1..d9ae854b6908 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -225,7 +225,7 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { if (mqd_mem_obj->gtt_mem) { - amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, &mqd_mem_obj->gtt_mem); kfree(mqd_mem_obj); } else { kfd_gtt_sa_free(mm->dev, mqd_mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index b7a08e7a4423..d163d92a692f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -187,6 +187,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 66c73825c0a0..84e8ea3a8a0c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -321,8 +321,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, static bool check_preemption_failed(struct mqd_manager *mm, void *mqd) { struct v9_mqd *m = (struct v9_mqd *)mqd; + uint32_t doorbell_id = m->queue_doorbell_id0; - return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0); + m->queue_doorbell_id0 = 0; + + return kfd_check_hiq_mqd_doorbell_id(mm->dev, doorbell_id, 0); } static int get_wave_state(struct mqd_manager *mm, void *mqd, @@ -624,6 +627,7 @@ static bool check_preemption_failed_v9_4_3(struct mqd_manager *mm, void *mqd) m = get_mqd(mqd + hiq_mqd_size * inst); ret |= kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, inst); + m->queue_doorbell_id0 = 0; ++inst; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 00776f08351c..1f9f5bfeaf86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -37,11 +37,14 @@ static int pm_map_process_v9(struct packet_manager *pm, struct kfd_node *kfd = pm->dqm->dev; struct kfd_process_device *pdd = container_of(qpd, struct kfd_process_device, qpd); + struct amdgpu_device *adev = kfd->adev; packet = (struct pm4_mes_map_process *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); + if (adev->enforce_isolation[kfd->node_id]) + packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -89,14 +92,18 @@ static int pm_map_process_aldebaran(struct packet_manager *pm, struct pm4_mes_map_process_aldebaran *packet; uint64_t vm_page_table_base_addr = qpd->page_table_base; struct kfd_dev *kfd = pm->dqm->dev->kfd; + struct kfd_node *knode = pm->dqm->dev; struct kfd_process_device *pdd = container_of(qpd, struct kfd_process_device, qpd); int i; + struct amdgpu_device *adev = kfd->adev; packet = (struct pm4_mes_map_process_aldebaran *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process_aldebaran)); + if (adev->enforce_isolation[knode->node_id]) + packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -144,17 +151,22 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, int concurrent_proc_cnt = 0; struct kfd_node *kfd = pm->dqm->dev; + struct amdgpu_device *adev = kfd->adev; /* Determine the number of processes to map together to HW: * it can not exceed the number of VMIDs available to the * scheduler, and it is determined by the smaller of the number * of processes in the runlist and kfd module parameter * hws_max_conc_proc. + * However, if enforce_isolation is set (toggle LDS/VGPRs/SGPRs + * cleaner between process switch), enable single-process mode + * in HWS. * Note: the arbitration between the number of VMIDs and * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - concurrent_proc_cnt = min(pm->dqm->processes_count, + concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ? + 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum); packet = (struct pm4_mes_runlist *)buffer; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index 8b6b2bd5c148..cd8611401a66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -145,8 +145,9 @@ struct pm4_mes_map_process { union { struct { - uint32_t pasid:16; - uint32_t reserved1:2; + uint32_t pasid:16; /* 0 - 15 */ + uint32_t reserved1:1; /* 16 */ + uint32_t exec_cleaner_shader:1; /* 17 */ uint32_t debug_vmid:4; uint32_t new_debug:1; uint32_t reserved2:1; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h index 38f5cb6a222a..e0ed62c4ade0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h @@ -37,7 +37,7 @@ struct pm4_mes_map_process_aldebaran { struct { uint32_t pasid:16; /* 0 - 15 */ uint32_t single_memops:1; /* 16 */ - uint32_t reserved1:1; /* 17 */ + uint32_t exec_cleaner_shader:1; /* 17 */ uint32_t debug_vmid:4; /* 18 - 21 */ uint32_t new_debug:1; /* 22 */ uint32_t tmz:1; /* 23 */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2b3ec92981e8..9ae9abc6eb43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -310,6 +310,10 @@ struct kfd_node { struct kfd_local_mem_info local_mem_info; struct kfd_dev *kfd; + + /* Track per device allocated watch points */ + uint32_t alloc_watch_ids; + spinlock_t watch_points_lock; }; struct kfd_dev { @@ -362,10 +366,6 @@ struct kfd_dev { struct kfd_node *nodes[MAX_KFD_NODES]; unsigned int num_nodes; - /* Track per device allocated watch points */ - uint32_t alloc_watch_ids; - spinlock_t watch_points_lock; - /* Kernel doorbells for KFD device */ struct amdgpu_bo *doorbells; @@ -414,13 +414,16 @@ enum kfd_unmap_queues_filter { * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. * * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface. + * + * @KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: SDMA user mode queue with target SDMA engine ID. */ enum kfd_queue_type { KFD_QUEUE_TYPE_COMPUTE, KFD_QUEUE_TYPE_SDMA, KFD_QUEUE_TYPE_HIQ, KFD_QUEUE_TYPE_DIQ, - KFD_QUEUE_TYPE_SDMA_XGMI + KFD_QUEUE_TYPE_SDMA_XGMI, + KFD_QUEUE_TYPE_SDMA_BY_ENG_ID }; enum kfd_queue_format { @@ -494,8 +497,8 @@ struct queue_properties { uint64_t queue_size; uint32_t priority; uint32_t queue_percent; - uint32_t *read_ptr; - uint32_t *write_ptr; + void __user *read_ptr; + void __user *write_ptr; void __iomem *doorbell_ptr; uint32_t doorbell_off; bool is_interop; @@ -522,6 +525,12 @@ struct queue_properties { uint64_t tba_addr; uint64_t tma_addr; uint64_t exception_status; + + struct amdgpu_bo *wptr_bo; + struct amdgpu_bo *rptr_bo; + struct amdgpu_bo *ring_bo; + struct amdgpu_bo *eop_buf_bo; + struct amdgpu_bo *cwsr_bo; }; #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \ @@ -604,7 +613,7 @@ struct queue { uint64_t gang_ctx_gpu_addr; void *gang_ctx_cpu_ptr; - struct amdgpu_bo *wptr_bo; + struct amdgpu_bo *wptr_bo_gart; }; enum KFD_MQD_TYPE { @@ -837,6 +846,9 @@ struct kfd_process_device { void *proc_ctx_bo; uint64_t proc_ctx_gpu_addr; void *proc_ctx_cpu_ptr; + + /* Tracks queue reset status */ + bool has_reset_queue; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -854,6 +866,8 @@ struct svm_range_list { struct delayed_work restore_work; DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE); struct task_struct *faulting_task; + /* check point ts decides if page fault recovery need be dropped */ + uint64_t checkpoint_ts[MAX_GPU_INSTANCE]; }; /* Process data */ @@ -1284,6 +1298,15 @@ int init_queue(struct queue **q, const struct queue_properties *properties); void uninit_queue(struct queue *q); void print_queue_properties(struct queue_properties *q); void print_queue(struct queue *q); +int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, + u64 expected_size); +void kfd_queue_buffer_put(struct amdgpu_bo **bo); +int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); +int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); +void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo); +int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd, + struct queue_properties *properties); +void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_node *dev); @@ -1303,6 +1326,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_node *dev, enum kfd_queue_type type); void kernel_queue_uninit(struct kernel_queue *kq); int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid); +int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id); /* Process Queue Manager */ struct process_queue_node { @@ -1320,7 +1344,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct file *f, struct queue_properties *properties, unsigned int *qid, - struct amdgpu_bo *wptr_bo, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 17e42161b015..a902950cc060 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1048,7 +1048,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->dev->kfd->shared_resources.enable_mes) amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, - pdd->proc_ctx_bo); + &pdd->proc_ctx_bo); /* * before destroying pdd, make sure to report availability * for auto suspend @@ -1851,6 +1851,8 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) goto fail; } n_evicted++; + + pdd->dev->dqm->is_hws_hang = false; } return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 21f5a1fb3bf8..20ea745729ee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -204,19 +204,23 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm, } if (dev->kfd->shared_resources.enable_mes) { - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo); - if (pqn->q->wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart); } } void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; + struct kfd_process_device *pdd; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - if (pqn->q) + if (pqn->q) { + pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); + kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); + kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); + } kfd_procfs_del_queue(pqn->q); uninit_queue(pqn->q); @@ -231,8 +235,7 @@ void pqm_uninit(struct process_queue_manager *pqm) static int init_user_queue(struct process_queue_manager *pqm, struct kfd_node *dev, struct queue **q, struct queue_properties *q_properties, - struct file *f, struct amdgpu_bo *wptr_bo, - unsigned int qid) + struct file *f, unsigned int qid) { int retval; @@ -263,12 +266,32 @@ static int init_user_queue(struct process_queue_manager *pqm, goto cleanup; } memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); - (*q)->wptr_bo = wptr_bo; + + /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work + * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) + */ + if (((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) + >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { + if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) { + pr_err("Queue memory allocated to wrong device\n"); + retval = -EINVAL; + goto free_gang_ctx_bo; + } + + retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo, + &(*q)->wptr_bo_gart); + if (retval) { + pr_err("Failed to map wptr bo to GART\n"); + goto free_gang_ctx_bo; + } + } } pr_debug("PQM After init queue"); return 0; +free_gang_ctx_bo: + amdgpu_amdkfd_free_gtt_mem(dev->adev, (*q)->gang_ctx_bo); cleanup: uninit_queue(*q); *q = NULL; @@ -280,7 +303,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct file *f, struct queue_properties *properties, unsigned int *qid, - struct amdgpu_bo *wptr_bo, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, @@ -345,13 +367,14 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: + case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: /* SDMA queues are always allocated statically no matter * which scheduler mode is used. We also do not need to * check whether a SDMA queue can be allocated here, because * allocate_sdma_queue() in create_queue() has the * corresponding check logic. */ - retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -372,7 +395,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -490,6 +513,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q) { + retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); + if (retval) + goto err_destroy_queue; + kfd_procfs_del_queue(pqn->q); dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); @@ -500,7 +527,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } - + kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); } @@ -524,11 +551,42 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); - if (!pqn) { + if (!pqn || !pqn->q) { pr_debug("No queue %d exists for update operation\n", qid); return -EFAULT; } + /* + * Update with NULL ring address is used to disable the queue + */ + if (p->queue_address && p->queue_size) { + struct kfd_process_device *pdd; + struct amdgpu_vm *vm; + struct queue *q = pqn->q; + int err; + + pdd = kfd_get_process_device_data(q->device, q->process); + if (!pdd) + return -ENODEV; + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo, + p->queue_size)) { + pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n", + p->queue_address, p->queue_size); + return -EFAULT; + } + + kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo); + kfd_queue_buffer_put(&pqn->q->properties.ring_bo); + amdgpu_bo_unreserve(vm->root.bo); + + pqn->q->properties.ring_bo = p->ring_bo; + } + pqn->q->properties.queue_address = p->queue_address; pqn->q->properties.queue_size = p->queue_size; pqn->q->properties.queue_percent = p->queue_percent; @@ -971,7 +1029,7 @@ int kfd_criu_restore_queue(struct kfd_process *p, print_queue_properties(&qp); - ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack, + ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack, NULL); if (ret) { pr_err("Failed to create new queue err:%d\n", ret); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 0f6992b1895c..ad29634f8b44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -24,6 +24,8 @@ #include #include "kfd_priv.h" +#include "kfd_topology.h" +#include "kfd_svm.h" void print_queue_properties(struct queue_properties *q) { @@ -82,3 +84,374 @@ void uninit_queue(struct queue *q) { kfree(q); } + +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + +static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + struct kfd_process *p = pdd->process; + struct list_head update_list; + struct svm_range *prange; + int ret = -EINVAL; + + INIT_LIST_HEAD(&update_list); + addr >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + + mutex_lock(&p->svms.lock); + + /* + * range may split to multiple svm pranges aligned to granularity boundaery. + */ + while (size) { + uint32_t gpuid, gpuidx; + int r; + + prange = svm_range_from_addr(&p->svms, addr, NULL); + if (!prange) + break; + + if (!prange->mapped_to_gpu) + break; + + r = kfd_process_gpuid_from_node(p, pdd->dev, &gpuid, &gpuidx); + if (r < 0) + break; + if (!test_bit(gpuidx, prange->bitmap_access) && + !test_bit(gpuidx, prange->bitmap_aip)) + break; + + if (!(prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) + break; + + list_add(&prange->update_list, &update_list); + + if (prange->last - prange->start + 1 >= size) { + size = 0; + break; + } + + size -= prange->last - prange->start + 1; + addr += prange->last - prange->start + 1; + } + if (size) { + pr_debug("[0x%llx 0x%llx] not registered\n", addr, addr + size - 1); + goto out_unlock; + } + + list_for_each_entry(prange, &update_list, update_list) + atomic_inc(&prange->queue_refcount); + ret = 0; + +out_unlock: + mutex_unlock(&p->svms.lock); + return ret; +} + +static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + struct kfd_process *p = pdd->process; + struct svm_range *prange, *pchild; + struct interval_tree_node *node; + unsigned long last; + + addr >>= PAGE_SHIFT; + last = addr + (size >> PAGE_SHIFT) - 1; + + mutex_lock(&p->svms.lock); + + node = interval_tree_iter_first(&p->svms.objects, addr, last); + while (node) { + struct interval_tree_node *next_node; + unsigned long next_start; + + prange = container_of(node, struct svm_range, it_node); + next_node = interval_tree_iter_next(node, addr, last); + next_start = min(node->last, last) + 1; + + if (atomic_add_unless(&prange->queue_refcount, -1, 0)) { + list_for_each_entry(pchild, &prange->child_list, child_list) + atomic_add_unless(&pchild->queue_refcount, -1, 0); + } + + node = next_node; + addr = next_start; + } + + mutex_unlock(&p->svms.lock); +} +#else + +static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + return -EINVAL; +} + +static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size) +{ +} + +#endif + +int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, + u64 expected_size) +{ + struct amdgpu_bo_va_mapping *mapping; + u64 user_addr; + u64 size; + + user_addr = (u64)addr >> AMDGPU_GPU_PAGE_SHIFT; + size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; + + mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr); + if (!mapping) + goto out_err; + + if (user_addr != mapping->start || + (size != 0 && user_addr + size - 1 != mapping->last)) { + pr_debug("expected size 0x%llx not equal to mapping addr 0x%llx size 0x%llx\n", + expected_size, mapping->start << AMDGPU_GPU_PAGE_SHIFT, + (mapping->last - mapping->start + 1) << AMDGPU_GPU_PAGE_SHIFT); + goto out_err; + } + + *pbo = amdgpu_bo_ref(mapping->bo_va->base.bo); + mapping->bo_va->queue_refcount++; + return 0; + +out_err: + *pbo = NULL; + return -EINVAL; +} + +/* FIXME: remove this function, just call amdgpu_bo_unref directly */ +void kfd_queue_buffer_put(struct amdgpu_bo **bo) +{ + amdgpu_bo_unref(bo); +} + +int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) +{ + struct kfd_topology_device *topo_dev; + struct amdgpu_vm *vm; + u32 total_cwsr_size; + int err; + + topo_dev = kfd_topology_device_by_id(pdd->dev->id); + if (!topo_dev) + return -EINVAL; + + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE); + if (err) + goto out_err_unreserve; + + err = kfd_queue_buffer_get(vm, properties->read_ptr, &properties->rptr_bo, PAGE_SIZE); + if (err) + goto out_err_unreserve; + + err = kfd_queue_buffer_get(vm, (void *)properties->queue_address, + &properties->ring_bo, properties->queue_size); + if (err) + goto out_err_unreserve; + + /* only compute queue requires EOP buffer and CWSR area */ + if (properties->type != KFD_QUEUE_TYPE_COMPUTE) + goto out_unreserve; + + /* EOP buffer is not required for all ASICs */ + if (properties->eop_ring_buffer_address) { + if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { + pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", + properties->eop_buf_bo->tbo.base.size, + topo_dev->node_props.eop_buffer_size); + err = -EINVAL; + goto out_err_unreserve; + } + err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, + &properties->eop_buf_bo, + properties->eop_ring_buffer_size); + if (err) + goto out_err_unreserve; + } + + if (properties->ctl_stack_size != topo_dev->node_props.ctl_stack_size) { + pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n", + properties->ctl_stack_size, + topo_dev->node_props.ctl_stack_size); + err = -EINVAL; + goto out_err_unreserve; + } + + if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) { + pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", + properties->ctx_save_restore_area_size, + topo_dev->node_props.cwsr_size); + err = -EINVAL; + goto out_err_unreserve; + } + + total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) + * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); + + err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, + &properties->cwsr_bo, total_cwsr_size); + if (!err) + goto out_unreserve; + + amdgpu_bo_unreserve(vm->root.bo); + + err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address, + total_cwsr_size); + if (err) + goto out_err_release; + + return 0; + +out_unreserve: + amdgpu_bo_unreserve(vm->root.bo); + return 0; + +out_err_unreserve: + amdgpu_bo_unreserve(vm->root.bo); +out_err_release: + /* FIXME: make a _locked version of this that can be called before + * dropping the VM reservation. + */ + kfd_queue_unref_bo_vas(pdd, properties); + kfd_queue_release_buffers(pdd, properties); + return err; +} + +int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) +{ + struct kfd_topology_device *topo_dev; + u32 total_cwsr_size; + + kfd_queue_buffer_put(&properties->wptr_bo); + kfd_queue_buffer_put(&properties->rptr_bo); + kfd_queue_buffer_put(&properties->ring_bo); + kfd_queue_buffer_put(&properties->eop_buf_bo); + kfd_queue_buffer_put(&properties->cwsr_bo); + + topo_dev = kfd_topology_device_by_id(pdd->dev->id); + if (!topo_dev) + return -EINVAL; + total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) + * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); + + kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size); + return 0; +} + +void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo) +{ + if (*bo) { + struct amdgpu_bo_va *bo_va; + + bo_va = amdgpu_vm_bo_find(vm, *bo); + if (bo_va && bo_va->queue_refcount) + bo_va->queue_refcount--; + } +} + +int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd, + struct queue_properties *properties) +{ + struct amdgpu_vm *vm; + int err; + + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + kfd_queue_unref_bo_va(vm, &properties->wptr_bo); + kfd_queue_unref_bo_va(vm, &properties->rptr_bo); + kfd_queue_unref_bo_va(vm, &properties->ring_bo); + kfd_queue_unref_bo_va(vm, &properties->eop_buf_bo); + kfd_queue_unref_bo_va(vm, &properties->cwsr_bo); + + amdgpu_bo_unreserve(vm->root.bo); + return 0; +} + +#define SGPR_SIZE_PER_CU 0x4000 +#define LDS_SIZE_PER_CU 0x10000 +#define HWREG_SIZE_PER_CU 0x1000 +#define DEBUGGER_BYTES_ALIGN 64 +#define DEBUGGER_BYTES_PER_WAVE 32 + +static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) +{ + u32 vgpr_size = 0x40000; + + if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */ + gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */ + gfxv == 90008) /* GFX_VERSION_ARCTURUS */ + vgpr_size = 0x80000; + else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */ + gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */ + gfxv == 120000 || /* GFX_VERSION_GFX1200 */ + gfxv == 120001) /* GFX_VERSION_GFX1201 */ + vgpr_size = 0x60000; + + return vgpr_size; +} + +#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \ + (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\ + LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) + +#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ + ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/ + +#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40 + +void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) +{ + struct kfd_node_properties *props = &dev->node_props; + u32 gfxv = props->gfx_target_version; + u32 ctl_stack_size; + u32 wg_data_size; + u32 wave_num; + u32 cu_num; + + if (gfxv < 80001) /* GFX_VERSION_CARRIZO */ + return; + + cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask); + wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */ + min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512) + : cu_num * 32; + + wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE); + ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; + ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size, + PAGE_SIZE); + + if ((gfxv / 10000 * 10000) == 100000) { + /* HW design limits control stack size to 0x7000. + * This is insufficient for theoretical PM4 cases + * but sufficient for AQL, limited by SPI events. + */ + ctl_stack_size = min(ctl_stack_size, 0x7000); + } + + props->ctl_stack_size = ctl_stack_size; + props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN); + props->cwsr_size = ctl_stack_size + wg_data_size; + + if (gfxv == 80002) /* GFX_VERSION_TONGA */ + props->eop_buffer_size = 0x8000; + else if ((gfxv / 100 * 100) == 90400) /* GFX_VERSION_AQUA_VANJARAM */ + props->eop_buffer_size = 4096; + else if (gfxv >= 80000) + props->eop_buffer_size = 4096; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index bd9c2921e0dc..ce2a5d9f90d3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1051,6 +1051,7 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, new->mapped_to_gpu = old->mapped_to_gpu; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount)); return 0; } @@ -1992,6 +1993,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) new->vram_pages = old->vram_pages; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount)); return new; } @@ -2260,16 +2262,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms) { struct kfd_process_device *pdd; struct kfd_process *p; - int drain; uint32_t i; p = container_of(svms, struct kfd_process, svms); -restart: - drain = atomic_read(&svms->drain_pagefaults); - if (!drain) - return; - for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { pdd = p->pdds[i]; if (!pdd) @@ -2289,8 +2285,6 @@ restart: pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } - if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) - goto restart; } static void svm_range_deferred_list_work(struct work_struct *work) @@ -2312,17 +2306,8 @@ static void svm_range_deferred_list_work(struct work_struct *work) prange->start, prange->last, prange->work_item.op); mm = prange->work_item.mm; -retry: - mmap_write_lock(mm); - /* Checking for the need to drain retry faults must be inside - * mmap write lock to serialize with munmap notifiers. - */ - if (unlikely(atomic_read(&svms->drain_pagefaults))) { - mmap_write_unlock(mm); - svm_range_drain_retry_fault(svms); - goto retry; - } + mmap_write_lock(mm); /* Remove from deferred_list must be inside mmap write lock, for * two race cases: @@ -2443,6 +2428,17 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, struct kfd_process *p; unsigned long s, l; bool unmap_parent; + uint32_t i; + + if (atomic_read(&prange->queue_refcount)) { + int r; + + pr_warn("Freeing queue vital buffer 0x%lx, queue evicted\n", + prange->start << PAGE_SHIFT); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM); + if (r) + pr_debug("failed %d to quiesce KFD queues\n", r); + } p = kfd_lookup_process_by_mm(mm); if (!p) @@ -2452,11 +2448,35 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last, start, last); - /* Make sure pending page faults are drained in the deferred worker - * before the range is freed to avoid straggler interrupts on - * unmapped memory causing "phantom faults". + /* calculate time stamps that are used to decide which page faults need be + * dropped or handled before unmap pages from gpu vm */ - atomic_inc(&svms->drain_pagefaults); + for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct amdgpu_ih_ring *ih; + uint32_t checkpoint_wptr; + + pdd = p->pdds[i]; + if (!pdd) + continue; + + adev = pdd->dev->adev; + + /* Check and drain ih1 ring if cam not available */ + ih = &adev->irq.ih1; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) { + svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + continue; + } + + /* check if dev->irq.ih_soft is not empty */ + ih = &adev->irq.ih_soft; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) + svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + } unmap_parent = start <= prange->start && last >= prange->last; @@ -2897,7 +2917,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault) int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, - uint64_t addr, bool write_fault) + uint64_t addr, uint64_t ts, bool write_fault) { unsigned long start, last, size; struct mm_struct *mm = NULL; @@ -2907,7 +2927,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ktime_t timestamp = ktime_get_boottime(); struct kfd_node *node; int32_t best_loc; - int32_t gpuidx = MAX_GPU_INSTANCE; + int32_t gpuid, gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; struct vm_area_struct *vma; bool migration = false; @@ -2928,11 +2948,38 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); if (atomic_read(&svms->drain_pagefaults)) { - pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + pr_debug("page fault handling disabled, drop fault 0x%llx\n", addr); r = 0; goto out; } + node = kfd_node_by_irq_ids(adev, node_id, vmid); + if (!node) { + pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, + vmid); + r = -EFAULT; + goto out; + } + + if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) { + pr_debug("failed to get gpuid/gpuidex for node_id: %d\n", node_id); + r = -EFAULT; + goto out; + } + + /* check if this page fault time stamp is before svms->checkpoint_ts */ + if (svms->checkpoint_ts[gpuidx] != 0) { + if (amdgpu_ih_ts_after(ts, svms->checkpoint_ts[gpuidx])) { + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = 0; + goto out; + } else + /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts + * to zero to avoid following ts wrap around give wrong comparing + */ + svms->checkpoint_ts[gpuidx] = 0; + } + if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); r = -EFAULT; @@ -2949,13 +2996,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - node = kfd_node_by_irq_ids(adev, node_id, vmid); - if (!node) { - pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, - vmid); - r = -EFAULT; - goto out; - } mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); @@ -3170,8 +3210,9 @@ void svm_range_list_fini(struct kfd_process *p) /* * Ensure no retry fault comes in afterwards, as page fault handler will * not find kfd process and take mm lock to recover fault. + * stop kfd page fault handing, then wait pending page faults got drained */ - atomic_inc(&p->svms.drain_pagefaults); + atomic_set(&p->svms.drain_pagefaults, 1); svm_range_drain_retry_fault(&p->svms); list_for_each_entry_safe(prange, next, &p->svms.list, list) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 70c1776611c4..bddd24f04669 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -137,6 +137,7 @@ struct svm_range { DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); bool mapped_to_gpu; + atomic_t queue_refcount; }; static inline void svm_range_lock(struct svm_range *prange) @@ -173,7 +174,7 @@ int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, - uint32_t vmid, uint32_t node_id, uint64_t addr, + uint32_t vmid, uint32_t node_id, uint64_t addr, uint64_t ts, bool write_fault); int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence); void svm_range_add_list_work(struct svm_range_list *svms, @@ -224,7 +225,7 @@ static inline void svm_range_list_fini(struct kfd_process *p) static inline int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t client_id, uint32_t node_id, - uint64_t addr, bool write_fault) + uint64_t addr, uint64_t ts, bool write_fault) { return -EFAULT; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 6f89b06f89d3..3871591c9aec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -292,6 +292,8 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, iolink->max_bandwidth); sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size", iolink->rec_transfer_size); + sysfs_show_32bit_prop(buffer, offs, "recommended_sdma_engine_id_mask", + iolink->rec_sdma_eng_id_mask); sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); return offs; @@ -1265,6 +1267,54 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, } } +#define REC_SDMA_NUM_GPU 8 +static const int rec_sdma_eng_map[REC_SDMA_NUM_GPU][REC_SDMA_NUM_GPU] = { + { -1, 14, 12, 2, 4, 8, 10, 6 }, + { 14, -1, 2, 10, 8, 4, 6, 12 }, + { 10, 2, -1, 12, 14, 6, 4, 8 }, + { 2, 12, 10, -1, 6, 14, 8, 4 }, + { 4, 8, 14, 6, -1, 10, 12, 2 }, + { 8, 4, 6, 14, 12, -1, 2, 10 }, + { 10, 6, 4, 8, 12, 2, -1, 14 }, + { 6, 12, 8, 4, 2, 10, 14, -1 }}; + +static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev, + struct kfd_iolink_properties *outbound_link, + struct kfd_iolink_properties *inbound_link) +{ + struct kfd_node *gpu = outbound_link->gpu; + struct amdgpu_device *adev = gpu->adev; + int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && + adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && + kfd_get_num_xgmi_sdma_engines(gpu) >= 14 && + (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); + + if (support_rec_eng) { + int src_socket_id = adev->gmc.xgmi.physical_node_id; + int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; + + outbound_link->rec_sdma_eng_id_mask = + 1 << rec_sdma_eng_map[src_socket_id][dst_socket_id]; + inbound_link->rec_sdma_eng_id_mask = + 1 << rec_sdma_eng_map[dst_socket_id][src_socket_id]; + } else { + int num_sdma_eng = kfd_get_num_sdma_engines(gpu); + int i, eng_offset = 0; + + if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && + kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) { + eng_offset = num_sdma_eng; + num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu); + } + + for (i = 0; i < num_sdma_eng; i++) { + outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); + inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); + } + } +} + static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) { struct kfd_iolink_properties *link, *inbound_link; @@ -1303,6 +1353,7 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); + kfd_set_recommended_sdma_engines(peer_dev, link, inbound_link); } } @@ -2027,7 +2078,7 @@ int kfd_topology_add_device(struct kfd_node *gpu) HSA_CAP_ASIC_REVISION_MASK); dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); - if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3)) + if (gpu->kfd->num_nodes > 1) dev->node_props.location_id |= dev->gpu->node_id; dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); @@ -2120,6 +2171,8 @@ int kfd_topology_add_device(struct kfd_node *gpu) dev->gpu->adev->gmc.xgmi.connected_to_cpu) dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; + kfd_queue_ctx_save_restore_size(dev); + kfd_debug_print_topology(); kfd_notify_gpu_change(gpu_id, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 2d1c9d771bef..155b5c410af1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -74,6 +74,10 @@ struct kfd_node_properties { uint32_t num_sdma_xgmi_engines; uint32_t num_sdma_queues_per_engine; uint32_t num_cp_queues; + uint32_t cwsr_size; + uint32_t ctl_stack_size; + uint32_t eop_buffer_size; + uint32_t debug_memory_size; char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; @@ -121,6 +125,7 @@ struct kfd_iolink_properties { uint32_t min_bandwidth; uint32_t max_bandwidth; uint32_t rec_transfer_size; + uint32_t rec_sdma_eng_id_mask; uint32_t flags; struct kfd_node *gpu; struct kobject *kobj; diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h index 10138676f27f..e5c0205f2618 100644 --- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -29,6 +29,7 @@ #define SOC15_INTSRC_CP_BAD_OPCODE 183 #define SOC15_INTSRC_SQ_INTERRUPT_MSG 239 #define SOC15_INTSRC_VMC_FAULT 0 +#define SOC15_INTSRC_VMC_UTCL2_POISON 1 #define SOC15_INTSRC_SDMA_TRAP 224 #define SOC15_INTSRC_SDMA_ECC 220 #define SOC21_INTSRC_SDMA_TRAP 49 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 983a977632ff..7d999e352df3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -176,6 +176,7 @@ MODULE_FIRMWARE(FIRMWARE_DCN_401_DMUB); static int amdgpu_dm_init(struct amdgpu_device *adev); static void amdgpu_dm_fini(struct amdgpu_device *adev); static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector); +static void reset_freesync_config_for_crtc(struct dm_crtc_state *new_crtc_state); static enum drm_mode_subconnector get_subconnector_type(struct dc_link *link) { @@ -877,6 +878,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) "HPD_IRQ", "SET_CONFIGC_REPLY", "DPIA_NOTIFICATION", + "HPD_SENSE_NOTIFY", }; do { @@ -1740,7 +1742,7 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device * /* Send the chunk */ ret = dm_dmub_send_vbios_gpint_command(adev, send_addrs[i], chunk, 30000); if (ret != DMUB_STATUS_OK) - /* No need to free bb here since it shall be done unconditionally */ + /* No need to free bb here since it shall be done in dm_sw_fini() */ return NULL; } @@ -2218,7 +2220,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, fw_name_dmcu); + r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, "%s", fw_name_dmcu); if (r == -ENODEV) { /* DMCU firmware is not necessary, so don't raise a fuss if it's missing */ DRM_DEBUG_KMS("dm: DMCU firmware not found\n"); @@ -2465,8 +2467,17 @@ static int dm_sw_init(void *handle) static int dm_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct dal_allocation *da; + + list_for_each_entry(da, &adev->dm.da_list, list) { + if (adev->dm.bb_from_dmub == (void *) da->cpu_ptr) { + amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr); + list_del(&da->list); + kfree(da); + break; + } + } - kfree(adev->dm.bb_from_dmub); adev->dm.bb_from_dmub = NULL; kfree(adev->dm.dmub_fb_info); @@ -3206,8 +3217,11 @@ static int dm_resume(void *handle) drm_connector_list_iter_end(&iter); /* Force mode set in atomic commit */ - for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) + for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) { new_crtc_state->active_changed = true; + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + reset_freesync_config_for_crtc(dm_new_crtc_state); + } /* * atomic_check is expected to create the dc states. We need to release @@ -4839,18 +4853,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) /* Determine whether to enable Replay support by default. */ if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) { switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { -/* - * Disabled by default due to https://gitlab.freedesktop.org/drm/amd/-/issues/3344 - * case IP_VERSION(3, 1, 4): - * case IP_VERSION(3, 1, 5): - * case IP_VERSION(3, 1, 6): - * case IP_VERSION(3, 2, 0): - * case IP_VERSION(3, 2, 1): - * case IP_VERSION(3, 5, 0): - * case IP_VERSION(3, 5, 1): - * replay_feature_enabled = true; - * break; - */ + case IP_VERSION(3, 1, 4): + case IP_VERSION(3, 2, 0): + case IP_VERSION(3, 2, 1): + case IP_VERSION(3, 5, 0): + case IP_VERSION(3, 5, 1): + replay_feature_enabled = true; + break; + default: replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK; break; @@ -5147,7 +5157,7 @@ static int dm_init_microcode(struct amdgpu_device *adev) /* ASIC doesn't support DMUB. */ return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, fw_name_dmub); + r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, "%s", fw_name_dmub); return r; } @@ -7198,6 +7208,9 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, int requested_bpc = drm_state ? drm_state->max_requested_bpc : 8; enum dc_status dc_result = DC_OK; + if (!dm_state) + return NULL; + do { stream = create_stream_for_sink(connector, drm_mode, dm_state, old_stream, @@ -8237,42 +8250,10 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, struct amdgpu_crtc *acrtc, bool enable) { - /* - * We have no guarantee that the frontend index maps to the same - * backend index - some even map to more than one. - * - * TODO: Use a different interrupt or check DC itself for the mapping. - */ - int irq_type = - amdgpu_display_crtc_idx_to_irq_type( - adev, - acrtc->crtc_id); - - if (enable) { + if (enable) drm_crtc_vblank_on(&acrtc->base); - amdgpu_irq_get( - adev, - &adev->pageflip_irq, - irq_type); -#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - amdgpu_irq_get( - adev, - &adev->vline0_irq, - irq_type); -#endif - } else { -#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - amdgpu_irq_put( - adev, - &adev->vline0_irq, - irq_type); -#endif - amdgpu_irq_put( - adev, - &adev->pageflip_irq, - irq_type); + else drm_crtc_vblank_off(&acrtc->base); - } } static void dm_update_pflip_irq_state(struct amdgpu_device *adev, @@ -8715,7 +8696,8 @@ static void amdgpu_dm_update_cursor(struct drm_plane *plane, adev->dm.dc->caps.color.dpp.gamma_corr) attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1; - attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; + if (afb) + attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; if (crtc_state->stream) { if (!dc_stream_set_cursor_attributes(crtc_state->stream, @@ -9305,7 +9287,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, if (acrtc) old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); - if (!acrtc->wb_enabled) + if (!acrtc || !acrtc->wb_enabled) continue; dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); @@ -9709,9 +9691,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) DRM_INFO("[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); - hdcp_update_display( - adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector, - new_con_state->hdcp_content_type, enable_encryption); + if (aconnector->dc_link) + hdcp_update_display( + adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector, + new_con_state->hdcp_content_type, enable_encryption); } } @@ -11720,25 +11703,6 @@ fail: return ret; } -static bool is_dp_capable_without_timing_msa(struct dc *dc, - struct amdgpu_dm_connector *amdgpu_dm_connector) -{ - u8 dpcd_data; - bool capable = false; - - if (amdgpu_dm_connector->dc_link && - dm_helpers_dp_read_dpcd( - NULL, - amdgpu_dm_connector->dc_link, - DP_DOWN_STREAM_PORT_COUNT, - &dpcd_data, - sizeof(dpcd_data))) { - capable = (dpcd_data & DP_MSA_TIMING_PAR_IGNORED) ? true:false; - } - - return capable; -} - static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, unsigned int offset, unsigned int total_length, @@ -12041,8 +12005,8 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, sink->sink_signal == SIGNAL_TYPE_EDP)) { bool edid_check_required = false; - if (is_dp_capable_without_timing_msa(adev->dm.dc, - amdgpu_dm_connector)) { + if (amdgpu_dm_connector->dc_link && + amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) { if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) { amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; @@ -12124,7 +12088,8 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, } } - as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link); + if (amdgpu_dm_connector->dc_link) + as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link); if (as_type == FREESYNC_TYPE_PCON_IN_WHITELIST) { i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); @@ -12148,6 +12113,12 @@ update: if (dm_con_state) dm_con_state->freesync_capable = freesync_capable; + if (connector->state && amdgpu_dm_connector->dc_link && !freesync_capable && + amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported) { + amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported = false; + amdgpu_dm_connector->dc_link->replay_settings.replay_feature_enabled = false; + } + if (connector->vrr_capable_property) drm_connector_set_vrr_capable_property(connector, freesync_capable); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 99014339aaa3..a2cf2c066a76 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -251,9 +251,10 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) else if (dm->active_vblank_irq_count) dm->active_vblank_irq_count--; - dc_allow_idle_optimizations(dm->dc, dm->active_vblank_irq_count == 0); - - DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); + if (dm->active_vblank_irq_count > 0) { + DRM_DEBUG_KMS("Allow idle optimizations (MALL): false\n"); + dc_allow_idle_optimizations(dm->dc, false); + } /* * Control PSR based on vblank requirements from OS @@ -272,6 +273,11 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) vblank_work->stream->link->replay_settings.replay_feature_enabled); } + if (dm->active_vblank_irq_count == 0) { + DRM_DEBUG_KMS("Allow idle optimizations (MALL): true\n"); + dc_allow_idle_optimizations(dm->dc, true); + } + mutex_unlock(&dm->dc_lock); dc_stream_release(vblank_work->stream); @@ -286,11 +292,14 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); struct amdgpu_display_manager *dm = &adev->dm; struct vblank_control_work *work; + int irq_type; int rc = 0; if (acrtc->otg_inst == -1) goto skip; + irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); + if (enable) { /* vblank irq on -> Only need vupdate irq in vrr mode */ if (amdgpu_dm_crtc_vrr_active(acrtc_state)) @@ -303,13 +312,52 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) if (rc) return rc; - rc = (enable) - ? amdgpu_irq_get(adev, &adev->crtc_irq, acrtc->crtc_id) - : amdgpu_irq_put(adev, &adev->crtc_irq, acrtc->crtc_id); + /* crtc vblank or vstartup interrupt */ + if (enable) { + rc = amdgpu_irq_get(adev, &adev->crtc_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get crtc_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->crtc_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put crtc_irq ret=%d\n", rc); + } if (rc) return rc; + /* + * hubp surface flip interrupt + * + * We have no guarantee that the frontend index maps to the same + * backend index - some even map to more than one. + * + * TODO: Use a different interrupt or check DC itself for the mapping. + */ + if (enable) { + rc = amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get pageflip_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put pageflip_irq ret=%d\n", rc); + } + + if (rc) + return rc; + +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + /* crtc vline0 interrupt, only available on DCN+ */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) != 0) { + if (enable) { + rc = amdgpu_irq_get(adev, &adev->vline0_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get vline0_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->vline0_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put vline0_irq ret=%d\n", rc); + } + + if (rc) + return rc; + } +#endif skip: if (amdgpu_in_reset(adev)) return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 62cb59f00929..db56b0aa5454 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3804,9 +3804,12 @@ static int trigger_hpd_mst_set(void *data, u64 val) if (aconnector->dc_link->type == dc_connection_mst_branch && aconnector->mst_mgr.aux) { mutex_lock(&adev->dm.dc_lock); - dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD); + ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD); mutex_unlock(&adev->dm.dc_lock); + if (!ret) + DRM_ERROR("DM_MST: Failed to detect dc link!"); + ret = drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true); if (ret < 0) DRM_ERROR("DM_MST: Failed to set the device into MST mode!"); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index b490ae67b6be..165e010fe69c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1286,3 +1286,15 @@ enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link) return as_type; } + +bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream) +{ + // TODO + return false; +} + +bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream) +{ + // TODO + return false; +} \ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index a83bd0331c3b..a573a6639898 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "amdgpu.h" @@ -935,10 +936,14 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, } afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; + obj = drm_gem_fb_get_obj(new_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return -EINVAL; + } + rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); - r = amdgpu_bo_reserve(rbo, true); if (r) { dev_err(adev->dev, "fail to reserve bo (%d)\n", r); @@ -1372,7 +1377,8 @@ void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane, adev->dm.dc->caps.color.dpp.gamma_corr) attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1; - attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; + if (afb) + attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; if (crtc_state->stream) { mutex_lock(&adev->dm.dc_lock); diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 80069651def3..8992e697759f 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -35,7 +35,6 @@ DC_LIBS += dcn201 DC_LIBS += dcn30 DC_LIBS += dcn301 DC_LIBS += dcn31 -DC_LIBS += dcn314 DC_LIBS += dml DC_LIBS += dml2 endif diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 506f82cd5cc6..88d3f9d7dd55 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -486,3 +486,30 @@ int dc_fixpt_s4d19(struct fixed31_32 arg) else return ux_dy(arg.value, 4, 19); } + +struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct fixed31_32 fixpt_value = dc_fixpt_zero; + struct fixed31_32 fixpt_int_value = dc_fixpt_zero; + long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; + + fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + frac_mask = frac_mask << fractional_bits; + fixpt_int_value.value = value & frac_mask; + fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + fixpt_value.value |= fixpt_int_value.value; + return fixpt_value; +} + +struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct fixed31_32 fixpt_value = dc_fixpt_from_int(int_value); + + fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + return fixpt_value; +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index 78df96882d6e..f8409453434c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -195,7 +195,7 @@ void dce11_pplib_apply_display_requirements( * , then change minimum memory clock based on real-time bandwidth * limitation. */ - if ((dc->ctx->asic_id.chip_family == FAMILY_AI) && + if (dc->bw_vbios && (dc->ctx->asic_id.chip_family == FAMILY_AI) && ASICREV_IS_VEGA20_P(dc->ctx->asic_id.hw_internal_rev) && (context->stream_count >= 2)) { pp_display_cfg->min_memory_clock_khz = max(pp_display_cfg->min_memory_clock_khz, (uint32_t) div64_s64( diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 70ee0089a20d..0ce9b40dfc68 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -120,25 +120,40 @@ static int dcn35_get_active_display_cnt_wa( return display_count; } - static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; + if (dc->ctx->dce_environment == DCE_ENV_DIAG) + return; + for (i = 0; i < dc->res_pool->pipe_count; ++i) { + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; struct pipe_ctx *pipe = safe_to_lower ? &context->res_ctx.pipe_ctx[i] : &dc->current_state->res_ctx.pipe_ctx[i]; - + bool stream_changed_otg_dig_on = false; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; + stream_changed_otg_dig_on = old_pipe->stream && new_pipe->stream && + old_pipe->stream != new_pipe->stream && + old_pipe->stream_res.tg == new_pipe->stream_res.tg && + new_pipe->stream->link_enc && !new_pipe->stream->dpms_off && + new_pipe->stream->link_enc->funcs->is_dig_enabled && + new_pipe->stream->link_enc->funcs->is_dig_enabled( + new_pipe->stream->link_enc) && + new_pipe->stream_res.stream_enc && + new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled && + new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc); if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || - !pipe->stream->link_enc)) { + !pipe->stream->link_enc) && !stream_changed_otg_dig_on) { + /* This w/a should not trigger when we have a dig active */ if (disable) { - if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->disable_crtc) - pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); } else { @@ -290,6 +305,9 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz) new_clocks->ref_dtbclk_khz = 600000; + if (dc->debug.min_disp_clk_khz > 0 && new_clocks->dispclk_khz < dc->debug.min_disp_clk_khz) + new_clocks->dispclk_khz = dc->debug.min_disp_clk_khz; + /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything * also if safe to lower is false, we just go in the higher state diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 45fe17a46890..01ea3a31e54d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -14,6 +14,7 @@ #include "core_types.h" #include "dm_helpers.h" #include "link.h" +#include "dc_state_priv.h" #include "atomfirmware.h" #include "dcn401_smu14_driver_if.h" @@ -29,6 +30,7 @@ #define mmCLK01_CLK0_CLK2_DFS_CNTL 0x16E6F #define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E72 #define mmCLK01_CLK0_CLK4_DFS_CNTL 0x16E75 +#define mmCLK20_CLK2_CLK2_DFS_CNTL 0x1B051 #define CLK0_CLK_PLL_REQ__FbMult_int_MASK 0x000001ffUL #define CLK0_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000f000UL @@ -302,6 +304,197 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_build_wm_range_table(clk_mgr_base); } +static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + uint32_t dprefclk_did = 0; + uint32_t dcfclk_did = 0; + uint32_t dtbclk_did = 0; + uint32_t dispclk_did = 0; + uint32_t dppclk_did = 0; + uint32_t fclk_did = 0; + uint32_t target_div = 0; + + /* DFS Slice 0 is used for DISPCLK */ + dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL); + /* DFS Slice 1 is used for DPPCLK */ + dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL); + /* DFS Slice 2 is used for DPREFCLK */ + dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL); + /* DFS Slice 3 is used for DCFCLK */ + dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL); + /* DFS Slice 4 is used for DTBCLK */ + dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL); + /* DFS Slice _ is used for FCLK */ + fclk_did = REG_READ(CLK2_CLK2_DFS_CNTL); + + /* Convert DISPCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dispclk_did); + //Get dispclk in khz + regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DISPCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dppclk_did); + //Get dppclk in khz + regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DPREFCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dprefclk_did); + //Get dprefclk in khz + regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DCFCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dcfclk_did); + //Get dcfclk in khz + regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DTBCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dtbclk_did); + //Get dtbclk in khz + regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DTBCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(fclk_did); + //Get fclk in khz + regs_and_bypass->fclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; +} + +static bool dcn401_check_native_scaling(struct pipe_ctx *pipe) +{ + bool is_native_scaling = false; + int width = pipe->plane_state->src_rect.width; + int height = pipe->plane_state->src_rect.height; + + if (pipe->stream->timing.h_addressable == width && + pipe->stream->timing.v_addressable == height && + pipe->plane_state->dst_rect.width == width && + pipe->plane_state->dst_rect.height == height) + is_native_scaling = true; + + return is_native_scaling; +} + +static void dcn401_auto_dpm_test_log( + struct dc_clocks *new_clocks, + struct clk_mgr_internal *clk_mgr, + struct dc_state *context) +{ + unsigned int mall_ss_size_bytes; + int dramclk_khz_override, fclk_khz_override, num_fclk_levels; + + struct pipe_ctx *pipe_ctx_list[MAX_PIPES]; + int active_pipe_count = 0; + + for (int i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { + pipe_ctx_list[active_pipe_count] = pipe_ctx; + active_pipe_count++; + } + } + + msleep(5); + + mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes; + + struct clk_log_info log_info = {0}; + struct clk_state_registers_and_bypass clk_register_dump; + + dcn401_dump_clk_registers(&clk_register_dump, &clk_mgr->base, &log_info); + + // Overrides for these clocks in case there is no p_state change support + dramclk_khz_override = new_clocks->dramclk_khz; + fclk_khz_override = new_clocks->fclk_khz; + + num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1; + + if (!new_clocks->p_state_change_support) + dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000; + + if (!new_clocks->fclk_p_state_change_support) + fclk_khz_override = clk_mgr->base.bw_params->clk_table.entries[num_fclk_levels].fclk_mhz * 1000; + + + //////////////////////////////////////////////////////////////////////////// + // IMPORTANT: When adding more clocks to these logs, do NOT put a newline + // anywhere other than at the very end of the string. + // + // Formatting example (make sure to have " - " between each entry): + // + // AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n" + //////////////////////////////////////////////////////////////////////////// + if (active_pipe_count > 0 && + new_clocks->dramclk_khz > 0 && + new_clocks->fclk_khz > 0 && + new_clocks->dcfclk_khz > 0 && + new_clocks->dppclk_khz > 0) { + + uint32_t pix_clk_list[MAX_PIPES] = {0}; + int p_state_list[MAX_PIPES] = {0}; + int disp_src_width_list[MAX_PIPES] = {0}; + int disp_src_height_list[MAX_PIPES] = {0}; + uint64_t disp_src_refresh_list[MAX_PIPES] = {0}; + bool is_scaled_list[MAX_PIPES] = {0}; + + for (int i = 0; i < active_pipe_count; i++) { + struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i]; + uint64_t refresh_rate; + + pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz; + p_state_list[i] = curr_pipe_ctx->p_state_type; + + refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 + + curr_pipe_ctx->stream->timing.v_total + * (uint64_t) curr_pipe_ctx->stream->timing.h_total - (uint64_t)1); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total); + disp_src_refresh_list[i] = refresh_rate; + + if (curr_pipe_ctx->plane_state) { + is_scaled_list[i] = !(dcn401_check_native_scaling(curr_pipe_ctx)); + disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width; + disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height; + } + } + + DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - " + "dcfclk:%d - dppclk:%d - dispclk_hw:%d - " + "dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - " + "dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - " + "pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - " + "p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - " + "pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - " + "pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - " + "pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - " + "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n", + dramclk_khz_override, + fclk_khz_override, + new_clocks->dcfclk_khz, + new_clocks->dppclk_khz, + clk_register_dump.dispclk, + clk_register_dump.dppclk, + clk_register_dump.dprefclk, + clk_register_dump.dcfclk, + clk_register_dump.dtbclk, + clk_register_dump.fclk, + pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2], + mall_ss_size_bytes, + p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3], + disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0], + disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1], + disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2], + disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]); + } +} + static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr, struct dc_state *context, int ref_dtbclk_khz) @@ -324,10 +517,12 @@ static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr if (!use_hpo_encoder) continue; - otg_master->clock_source->funcs->program_pix_clk( + if (otg_master->stream_res.pix_clk_params.controller_id > CONTROLLER_ID_UNDEFINED) + otg_master->clock_source->funcs->program_pix_clk( otg_master->clock_source, &otg_master->stream_res.pix_clk_params, - dccg->ctx->dc->link_srv->dp_get_encoding_format(&otg_master->link_config.dp_link_settings), + dccg->ctx->dc->link_srv->dp_get_encoding_format( + &otg_master->link_config.dp_link_settings), &otg_master->pll_settings); } } @@ -738,12 +933,12 @@ static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned static unsigned int dcn401_build_update_bandwidth_clocks_sequence( struct clk_mgr *clk_mgr_base, struct dc_state *context, + struct dc_clocks *new_clocks, bool safe_to_lower) { struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal); struct dc *dc = clk_mgr_base->ctx->dc; - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence; bool enter_display_off = false; bool update_active_fclk = false; @@ -1025,13 +1220,13 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( static unsigned int dcn401_build_update_display_clocks_sequence( struct clk_mgr *clk_mgr_base, struct dc_state *context, + struct dc_clocks *new_clocks, bool safe_to_lower) { struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal); struct dc *dc = clk_mgr_base->ctx->dc; struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence; bool force_reset = false; bool update_dispclk = false; @@ -1182,6 +1377,7 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, /* build bandwidth related clocks update sequence */ num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base, context, + &context->bw_ctx.bw.dcn.clk, safe_to_lower); /* execute sequence */ @@ -1190,10 +1386,15 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, /* build display related clocks update sequence */ num_steps = dcn401_build_update_display_clocks_sequence(clk_mgr_base, context, + &context->bw_ctx.bw.dcn.clk, safe_to_lower); /* execute sequence */ dcn401_execute_block_sequence(clk_mgr_base, num_steps); + + if (dc->config.enable_auto_dpm_test_logs) + dcn401_auto_dpm_test_log(&context->bw_ctx.bw.dcn.clk, TO_CLK_MGR_INTERNAL(clk_mgr_base), context); + } @@ -1218,59 +1419,6 @@ static uint32_t dcn401_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_m return dc_fixpt_floor(pll_req); } -static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, - struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) -{ - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - uint32_t dprefclk_did = 0; - uint32_t dcfclk_did = 0; - uint32_t dtbclk_did = 0; - uint32_t dispclk_did = 0; - uint32_t dppclk_did = 0; - uint32_t target_div = 0; - - /* DFS Slice 0 is used for DISPCLK */ - dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL); - /* DFS Slice 1 is used for DPPCLK */ - dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL); - /* DFS Slice 2 is used for DPREFCLK */ - dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL); - /* DFS Slice 3 is used for DCFCLK */ - dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL); - /* DFS Slice 4 is used for DTBCLK */ - dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL); - - /* Convert DISPCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dispclk_did); - //Get dispclk in khz - regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DISPCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dppclk_did); - //Get dppclk in khz - regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DPREFCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dprefclk_did); - //Get dprefclk in khz - regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DCFCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dcfclk_did); - //Get dcfclk in khz - regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DTBCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dtbclk_did); - //Get dtbclk in khz - regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; -} - static void dcn401_clock_read_ss_info(struct clk_mgr_internal *clk_mgr) { struct dc_bios *bp = clk_mgr->base.ctx->dc_bios; @@ -1330,33 +1478,34 @@ static void dcn401_notify_wm_ranges(struct clk_mgr *clk_mgr_base) static void dcn401_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool current_mode) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + const struct dc *dc = clk_mgr->base.ctx->dc; + struct dc_state *context = dc->current_state; + struct dc_clocks new_clocks; + int num_steps; if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) return; + /* build clock update */ + memcpy(&new_clocks, &clk_mgr_base->clks, sizeof(struct dc_clocks)); + if (current_mode) { - if (clk_mgr_base->clks.p_state_change_support) - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); - else - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->max_memclk_mhz); + new_clocks.dramclk_khz = context->bw_ctx.bw.dcn.clk.dramclk_khz; + new_clocks.idle_dramclk_khz = context->bw_ctx.bw.dcn.clk.idle_dramclk_khz; + new_clocks.p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; } else { - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz); + new_clocks.dramclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz * 1000; + new_clocks.idle_dramclk_khz = new_clocks.dramclk_khz; + new_clocks.p_state_change_support = true; } -} -/* Set max memclk to highest DPM value */ -static void dcn401_set_hard_max_memclk(struct clk_mgr *clk_mgr_base) -{ - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base, + context, + &new_clocks, + true); - if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - return; - - dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->max_memclk_mhz); + /* execute sequence */ + dcn401_execute_block_sequence(clk_mgr_base, num_steps); } /* Get current memclk states, update bounding box */ @@ -1487,7 +1636,6 @@ static struct clk_mgr_funcs dcn401_funcs = { .init_clocks = dcn401_init_clocks, .notify_wm_ranges = dcn401_notify_wm_ranges, .set_hard_min_memclk = dcn401_set_hard_min_memclk, - .set_hard_max_memclk = dcn401_set_hard_max_memclk, .get_memclk_states_from_smu = dcn401_get_memclk_states_from_smu, .are_clock_states_equal = dcn401_are_clock_states_equal, .enable_pme_wa = dcn401_enable_pme_wa, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 85a2ef82afa5..c8dabb081b3d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1254,7 +1254,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context); if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); dc_update_visual_confirm_color(dc, context, pipe); } @@ -1351,80 +1352,6 @@ static void disable_vbios_mode_if_required( } } -/** - * wait_for_blank_complete - wait for all active OPPs to finish pending blank - * pattern updates - * - * @dc: [in] dc reference - * @context: [in] hardware context in use - */ -static void wait_for_blank_complete(struct dc *dc, - struct dc_state *context) -{ - struct pipe_ctx *opp_head; - struct dce_hwseq *hws = dc->hwseq; - int i; - - if (!hws->funcs.wait_for_blank_complete) - return; - - for (i = 0; i < MAX_PIPES; i++) { - opp_head = &context->res_ctx.pipe_ctx[i]; - - if (!resource_is_pipe_type(opp_head, OPP_HEAD) || - dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM) - continue; - - hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp); - } -} - -static void wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context) -{ - struct pipe_ctx *otg_master; - struct timing_generator *tg; - int i; - - for (i = 0; i < MAX_PIPES; i++) { - otg_master = &context->res_ctx.pipe_ctx[i]; - if (!resource_is_pipe_type(otg_master, OTG_MASTER) || - dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM) - continue; - tg = otg_master->stream_res.tg; - if (tg->funcs->wait_odm_doublebuffer_pending_clear) - tg->funcs->wait_odm_doublebuffer_pending_clear(tg); - } - - /* ODM update may require to reprogram blank pattern for each OPP */ - wait_for_blank_complete(dc, context); -} - -static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) -{ - int i; - PERF_TRACE(); - for (i = 0; i < MAX_PIPES; i++) { - int count = 0; - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) - continue; - - /* Timeout 100 ms */ - while (count < 100000) { - /* Must set to false to start with, due to OR in update function */ - pipe->plane_state->status.is_flip_pending = false; - dc->hwss.update_pending_status(pipe); - if (!pipe->plane_state->status.is_flip_pending) - break; - udelay(1); - count++; - } - ASSERT(!pipe->plane_state->status.is_flip_pending); - } - PERF_TRACE(); -} - /* Public functions */ struct dc *dc_create(const struct dc_init_data *init_params) @@ -1822,10 +1749,18 @@ bool dc_validate_boot_timing(const struct dc *dc, tg->funcs->get_optc_source(tg, &numOdmPipes, &id_src[0], &id_src[1]); - if (numOdmPipes == 2) + if (numOdmPipes == 2) { pix_clk_100hz *= 2; - if (numOdmPipes == 4) + } else if (numOdmPipes == 4) { pix_clk_100hz *= 4; + } else if (se && se->funcs->get_pixels_per_cycle) { + uint32_t pixels_per_cycle = se->funcs->get_pixels_per_cycle(se); + + if (pixels_per_cycle != 1 && !dc->debug.enable_dp_dig_pixel_rate_div_policy) + return false; + + pix_clk_100hz *= pixels_per_cycle; + } // Note: In rare cases, HW pixclk may differ from crtc's pixclk // slightly due to rounding issues in 10 kHz units. @@ -2100,12 +2035,12 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c if (context->stream_count > get_seamless_boot_stream_count(context) || context->stream_count == 0) { /* Must wait for no flips to be pending before doing optimize bw */ - wait_for_no_pipes_pending(dc, context); + hwss_wait_for_no_pipes_pending(dc, context); /* * optimized dispclk depends on ODM setup. Need to wait for ODM * update pending complete before optimizing bandwidth. */ - wait_for_odm_update_pending_complete(dc, context); + hwss_wait_for_odm_update_pending_complete(dc, context); /* pplib is notified if disp_num changed */ dc->hwss.optimize_bandwidth(dc, context); /* Need to do otg sync again as otg could be out of sync due to otg @@ -2716,6 +2651,10 @@ static enum surface_update_type check_update_surfaces_for_stream( overall_type = UPDATE_TYPE_FULL; } + if (stream_update && stream_update->hw_cursor_req) { + overall_type = UPDATE_TYPE_FULL; + } + /* some stream updates require passive update */ if (stream_update) { union stream_update_flags *su_flags = &stream_update->stream->update_flags; @@ -3011,6 +2950,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_infopacket) stream->vrr_infopacket = *update->vrr_infopacket; + if (update->hw_cursor_req) + stream->hw_cursor_req = *update->hw_cursor_req; + if (update->allow_freesync) stream->allow_freesync = *update->allow_freesync; @@ -3704,7 +3646,8 @@ static void commit_planes_for_stream_fast(struct dc *dc, struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); if (dc->debug.visual_confirm) dc_update_visual_confirm_color(dc, context, pipe); @@ -3739,7 +3682,7 @@ static void commit_planes_for_stream_fast(struct dc *dc, surface_count, stream, context); - } else { + } else if (stream_status) { build_dmub_cmd_list(dc, srf_updates, surface_count, @@ -3769,47 +3712,6 @@ static void commit_planes_for_stream_fast(struct dc *dc, top_pipe_to_program->stream->update_flags.raw = 0; } -static void wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) -{ -/* - * This function calls HWSS to wait for any potentially double buffered - * operations to complete. It should be invoked as a pre-amble prior - * to full update programming before asserting any HW locks. - */ - int pipe_idx; - int opp_inst; - int opp_count = dc->res_pool->res_cap->num_opp; - struct hubp *hubp; - int mpcc_inst; - const struct pipe_ctx *pipe_ctx; - - for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) { - pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx]; - - if (!pipe_ctx->stream) - continue; - - if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear) - pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg); - - hubp = pipe_ctx->plane_res.hubp; - if (!hubp) - continue; - - mpcc_inst = hubp->inst; - // MPCC inst is equal to pipe index in practice - for (opp_inst = 0; opp_inst < opp_count; opp_inst++) { - if ((dc->res_pool->opps[opp_inst] != NULL) && - (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) { - dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst); - dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false; - break; - } - } - } - wait_for_odm_update_pending_complete(dc, dc_context); -} - static void commit_planes_for_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, @@ -3833,13 +3735,14 @@ static void commit_planes_for_stream(struct dc *dc, dc_z10_restore(dc); if (update_type == UPDATE_TYPE_FULL) - wait_for_outstanding_hw_updates(dc, context); + hwss_process_outstanding_hw_updates(dc, dc->current_state); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); if (dc->debug.visual_confirm) dc_update_visual_confirm_color(dc, context, pipe); @@ -4127,7 +4030,8 @@ static void commit_planes_for_stream(struct dc *dc, } if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) - if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { + if (top_pipe_to_program && + top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { top_pipe_to_program->stream_res.tg->funcs->wait_for_state( top_pipe_to_program->stream_res.tg, CRTC_STATE_VACTIVE); @@ -4335,7 +4239,8 @@ static void backup_and_set_minimal_pipe_split_policy(struct dc *dc, dc->debug.force_disable_subvp = true; for (i = 0; i < context->stream_count; i++) { policy->force_odm[i] = context->streams[i]->debug.force_odm_combine_segments; - context->streams[i]->debug.force_odm_combine_segments = 0; + if (context->streams[i]->debug.allow_transition_for_forced_odm) + context->streams[i]->debug.force_odm_combine_segments = 0; } } @@ -4686,7 +4591,7 @@ static bool commit_minimal_transition_state(struct dc *dc, return true; } -static void populate_fast_updates(struct dc_fast_update *fast_update, +void populate_fast_updates(struct dc_fast_update *fast_update, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_update *stream_update) @@ -4696,6 +4601,9 @@ static void populate_fast_updates(struct dc_fast_update *fast_update, if (stream_update) { fast_update[0].out_transfer_func = stream_update->out_transfer_func; fast_update[0].output_csc_transform = stream_update->output_csc_transform; + } else { + fast_update[0].out_transfer_func = NULL; + fast_update[0].output_csc_transform = NULL; } for (i = 0; i < surface_count; i++) { @@ -4729,6 +4637,26 @@ static bool fast_updates_exist(struct dc_fast_update *fast_update, int surface_c return false; } +bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count) +{ + int i; + + if (fast_update[0].out_transfer_func || + fast_update[0].output_csc_transform) + return true; + + for (i = 0; i < surface_count; i++) { + if (fast_update[i].input_csc_color_matrix || + fast_update[i].gamma || + fast_update[i].gamut_remap_matrix || + fast_update[i].coeff_reduction_factor || + fast_update[i].cursor_csc_color_matrix) + return true; + } + + return false; +} + static bool full_update_required(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, @@ -5382,7 +5310,8 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const if (allow == dc->idle_optimizations_allowed) return; - if (dc->hwss.apply_idle_power_optimizations && dc->hwss.apply_idle_power_optimizations(dc, allow)) + if (dc->hwss.apply_idle_power_optimizations && dc->clk_mgr != NULL && + dc->hwss.apply_idle_power_optimizations(dc, allow)) dc->idle_optimizations_allowed = allow; } @@ -5451,9 +5380,10 @@ static void blank_and_force_memclk(struct dc *dc, bool apply, unsigned int memcl hubp->funcs->set_blank_regs(hubp, true); } } - - dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz); - dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz); + if (dc->clk_mgr->funcs->set_max_memclk) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz); + if (dc->clk_mgr->funcs->set_min_memclk) + dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz); for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; @@ -5502,7 +5432,7 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable) if (enable && !dc->clk_mgr->dc_mode_softmax_enabled) { if (p_state_change_support) { - if (funcMin <= softMax) + if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk) dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, softMax); // else: No-Op } else { @@ -5512,7 +5442,7 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable) } } else if (!enable && dc->clk_mgr->dc_mode_softmax_enabled) { if (p_state_change_support) { - if (funcMin <= softMax) + if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk) dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, maxDPM); // else: No-Op } else { @@ -5563,6 +5493,9 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc) */ bool dc_is_dmub_outbox_supported(struct dc *dc) { + if (!dc->caps.dmcub_support) + return false; + switch (dc->ctx->asic_id.chip_family) { case FAMILY_YELLOW_CARP: diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 87e36d51c56d..7ee2be8f82c4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -636,57 +636,59 @@ void hwss_build_fast_sequence(struct dc *dc, while (current_pipe) { current_mpc_pipe = current_pipe; while (current_mpc_pipe) { - if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state && current_mpc_pipe->plane_state->update_flags.raw) { - block_sequence[*num_steps].params.set_flip_control_gsl_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate; - block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL; - (*num_steps)++; - } - if (dc->hwss.program_triplebuffer && dc->debug.enable_tri_buf && current_mpc_pipe->plane_state->update_flags.raw) { - block_sequence[*num_steps].params.program_triplebuffer_params.dc = dc; - block_sequence[*num_steps].params.program_triplebuffer_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.program_triplebuffer_params.enableTripleBuffer = current_mpc_pipe->plane_state->triplebuffer_flips; - block_sequence[*num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER; - (*num_steps)++; - } - if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) { - if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) && - stream_status->mall_stream_config.type == SUBVP_MAIN) { - block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; - block_sequence[*num_steps].params.subvp_save_surf_addr.addr = ¤t_mpc_pipe->plane_state->address; - block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index; - block_sequence[*num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR; + if (current_mpc_pipe->plane_state) { + if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state->update_flags.raw) { + block_sequence[*num_steps].params.set_flip_control_gsl_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate; + block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL; + (*num_steps)++; + } + if (dc->hwss.program_triplebuffer && dc->debug.enable_tri_buf && current_mpc_pipe->plane_state->update_flags.raw) { + block_sequence[*num_steps].params.program_triplebuffer_params.dc = dc; + block_sequence[*num_steps].params.program_triplebuffer_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.program_triplebuffer_params.enableTripleBuffer = current_mpc_pipe->plane_state->triplebuffer_flips; + block_sequence[*num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER; + (*num_steps)++; + } + if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) { + if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) && + stream_status->mall_stream_config.type == SUBVP_MAIN) { + block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; + block_sequence[*num_steps].params.subvp_save_surf_addr.addr = ¤t_mpc_pipe->plane_state->address; + block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index; + block_sequence[*num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR; + (*num_steps)++; + } + + block_sequence[*num_steps].params.update_plane_addr_params.dc = dc; + block_sequence[*num_steps].params.update_plane_addr_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = HUBP_UPDATE_PLANE_ADDR; (*num_steps)++; } - block_sequence[*num_steps].params.update_plane_addr_params.dc = dc; - block_sequence[*num_steps].params.update_plane_addr_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = HUBP_UPDATE_PLANE_ADDR; - (*num_steps)++; - } + if (hws->funcs.set_input_transfer_func && current_mpc_pipe->plane_state->update_flags.bits.gamma_change) { + block_sequence[*num_steps].params.set_input_transfer_func_params.dc = dc; + block_sequence[*num_steps].params.set_input_transfer_func_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.set_input_transfer_func_params.plane_state = current_mpc_pipe->plane_state; + block_sequence[*num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC; + (*num_steps)++; + } - if (hws->funcs.set_input_transfer_func && current_mpc_pipe->plane_state->update_flags.bits.gamma_change) { - block_sequence[*num_steps].params.set_input_transfer_func_params.dc = dc; - block_sequence[*num_steps].params.set_input_transfer_func_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.set_input_transfer_func_params.plane_state = current_mpc_pipe->plane_state; - block_sequence[*num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC; - (*num_steps)++; - } - - if (dc->hwss.program_gamut_remap && current_mpc_pipe->plane_state->update_flags.bits.gamut_remap_change) { - block_sequence[*num_steps].params.program_gamut_remap_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_PROGRAM_GAMUT_REMAP; - (*num_steps)++; - } - if (current_mpc_pipe->plane_state->update_flags.bits.input_csc_change) { - block_sequence[*num_steps].params.setup_dpp_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_SETUP_DPP; - (*num_steps)++; - } - if (current_mpc_pipe->plane_state->update_flags.bits.coeff_reduction_change) { - block_sequence[*num_steps].params.program_bias_and_scale_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE; - (*num_steps)++; + if (dc->hwss.program_gamut_remap && current_mpc_pipe->plane_state->update_flags.bits.gamut_remap_change) { + block_sequence[*num_steps].params.program_gamut_remap_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_PROGRAM_GAMUT_REMAP; + (*num_steps)++; + } + if (current_mpc_pipe->plane_state->update_flags.bits.input_csc_change) { + block_sequence[*num_steps].params.setup_dpp_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_SETUP_DPP; + (*num_steps)++; + } + if (current_mpc_pipe->plane_state->update_flags.bits.coeff_reduction_change) { + block_sequence[*num_steps].params.program_bias_and_scale_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE; + (*num_steps)++; + } } if (hws->funcs.set_output_transfer_func && current_mpc_pipe->stream->update_flags.bits.out_tf) { block_sequence[*num_steps].params.set_output_transfer_func_params.dc = dc; @@ -901,12 +903,12 @@ void hwss_program_bias_and_scale(union block_sequence_params *params) struct pipe_ctx *pipe_ctx = params->program_bias_and_scale_params.pipe_ctx; struct dpp *dpp = pipe_ctx->plane_res.dpp; struct dc_plane_state *plane_state = pipe_ctx->plane_state; - struct dc_bias_and_scale bns_params = {0}; + struct dc_bias_and_scale bns_params = plane_state->bias_and_scale; //TODO :for CNVC set scale and bias registers if necessary - build_prescale_params(&bns_params, plane_state); - if (dpp->funcs->dpp_program_bias_and_scale) + if (dpp->funcs->dpp_program_bias_and_scale) { dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); + } } void hwss_power_on_mpc_mem_pwr(union block_sequence_params *params) @@ -976,3 +978,126 @@ void get_surface_tile_visual_confirm_color( break; } } + +/** + * hwss_wait_for_all_blank_complete - wait for all active OPPs to finish pending blank + * pattern updates + * + * @dc: [in] dc reference + * @context: [in] hardware context in use + */ +void hwss_wait_for_all_blank_complete(struct dc *dc, + struct dc_state *context) +{ + struct pipe_ctx *opp_head; + struct dce_hwseq *hws = dc->hwseq; + int i; + + if (!hws->funcs.wait_for_blank_complete) + return; + + for (i = 0; i < MAX_PIPES; i++) { + opp_head = &context->res_ctx.pipe_ctx[i]; + + if (!resource_is_pipe_type(opp_head, OPP_HEAD) || + dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM) + continue; + + hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp); + } +} + +void hwss_wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *otg_master; + struct timing_generator *tg; + int i; + + for (i = 0; i < MAX_PIPES; i++) { + otg_master = &context->res_ctx.pipe_ctx[i]; + if (!resource_is_pipe_type(otg_master, OTG_MASTER) || + dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM) + continue; + tg = otg_master->stream_res.tg; + if (tg->funcs->wait_odm_doublebuffer_pending_clear) + tg->funcs->wait_odm_doublebuffer_pending_clear(tg); + } + + /* ODM update may require to reprogram blank pattern for each OPP */ + hwss_wait_for_all_blank_complete(dc, context); +} + +void hwss_wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) +{ + int i; + for (i = 0; i < MAX_PIPES; i++) { + int count = 0; + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) + continue; + + /* Timeout 100 ms */ + while (count < 100000) { + /* Must set to false to start with, due to OR in update function */ + pipe->plane_state->status.is_flip_pending = false; + dc->hwss.update_pending_status(pipe); + if (!pipe->plane_state->status.is_flip_pending) + break; + udelay(1); + count++; + } + ASSERT(!pipe->plane_state->status.is_flip_pending); + } +} + +void hwss_wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +{ +/* + * This function calls HWSS to wait for any potentially double buffered + * operations to complete. It should be invoked as a pre-amble prior + * to full update programming before asserting any HW locks. + */ + int pipe_idx; + int opp_inst; + int opp_count = dc->res_pool->res_cap->num_opp; + struct hubp *hubp; + int mpcc_inst; + const struct pipe_ctx *pipe_ctx; + + for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) { + pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx]; + + if (!pipe_ctx->stream) + continue; + + if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear) + pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg); + + hubp = pipe_ctx->plane_res.hubp; + if (!hubp) + continue; + + mpcc_inst = hubp->inst; + // MPCC inst is equal to pipe index in practice + for (opp_inst = 0; opp_inst < opp_count; opp_inst++) { + if ((dc->res_pool->opps[opp_inst] != NULL) && + (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) { + dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst); + dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false; + break; + } + } + } + hwss_wait_for_odm_update_pending_complete(dc, dc_context); +} + +void hwss_process_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +{ + /* wait for outstanding updates */ + hwss_wait_for_outstanding_hw_updates(dc, dc_context); + + /* perform outstanding post update programming */ + if (dc->hwss.program_outstanding_updates) + dc->hwss.program_outstanding_updates(dc, dc_context); +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index bcb5267b5a6b..ef585a89847b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -342,11 +342,6 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc, res_pool->ref_clocks.xtalin_clock_inKhz; res_pool->ref_clocks.dchub_ref_clock_inKhz = res_pool->ref_clocks.xtalin_clock_inKhz; - if (dc->debug.using_dml2) - if (res_pool->hubbub && res_pool->hubbub->funcs->get_dchub_ref_freq) - res_pool->hubbub->funcs->get_dchub_ref_freq(res_pool->hubbub, - res_pool->ref_clocks.dccg_ref_clock_inKhz, - &res_pool->ref_clocks.dchub_ref_clock_inKhz); } else ASSERT_CRITICAL(false); } @@ -3241,6 +3236,8 @@ static bool are_stream_backends_same( bool dc_is_stream_unchanged( struct dc_stream_state *old_stream, struct dc_stream_state *stream) { + if (!old_stream || !stream) + return false; if (!are_stream_backends_same(old_stream, stream)) return false; @@ -3771,8 +3768,10 @@ static bool planes_changed_for_existing_stream(struct dc_state *context, } } - if (!stream_status) + if (!stream_status) { ASSERT(0); + return false; + } for (i = 0; i < set_count; i++) if (set[i].stream == stream) @@ -5164,7 +5163,7 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy( sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; if (sec_pipe->stream->timing.flags.DSC == 1) { #if defined(CONFIG_DRM_AMD_DC_FP) - dcn20_acquire_dsc(dc, &state->res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); + dcn20_acquire_dsc(dc, &state->res_ctx, &sec_pipe->stream_res.dsc, sec_pipe->stream_res.opp->inst); #endif ASSERT(sec_pipe->stream_res.dsc); if (sec_pipe->stream_res.dsc == NULL) @@ -5271,3 +5270,44 @@ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuratio dml2_options->svp_pstate.callbacks.remove_phantom_streams_and_planes = &dc_state_remove_phantom_streams_and_planes; dml2_options->svp_pstate.callbacks.release_phantom_streams_and_planes = &dc_state_release_phantom_streams_and_planes; } + +/* Returns number of DET segments allocated for a given OTG_MASTER pipe */ +int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master) +{ + struct pipe_ctx *opp_heads[MAX_PIPES]; + struct pipe_ctx *dpp_pipes[MAX_PIPES]; + + int dpp_count = 0; + int det_segments = 0; + + if (!otg_master->stream) + return 0; + + int slice_count = resource_get_opp_heads_for_otg_master(otg_master, + &state->res_ctx, opp_heads); + + for (int slice_idx = 0; slice_idx < slice_count; slice_idx++) { + if (opp_heads[slice_idx]->plane_state) { + dpp_count = resource_get_dpp_pipes_for_opp_head( + opp_heads[slice_idx], + &state->res_ctx, + dpp_pipes); + for (int dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++) + det_segments += dpp_pipes[dpp_idx]->hubp_regs.det_size; + } + } + return det_segments; +} + +bool resource_is_hpo_acquired(struct dc_state *context) +{ + int i; + + for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { + if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) { + return true; + } + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c index cd6570a1e20e..fe9f99f1bdf9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c @@ -61,6 +61,7 @@ void dc_stat_get_dmub_notification(const struct dc *dc, struct dmub_notification /* For HPD/HPD RX, convert dpia port index into link index */ if (notify->type == DMUB_NOTIFICATION_HPD || notify->type == DMUB_NOTIFICATION_HPD_IRQ || + notify->type == DMUB_NOTIFICATION_AUX_REPLY || notify->type == DMUB_NOTIFICATION_DPIA_NOTIFICATION || notify->type == DMUB_NOTIFICATION_SET_CONFIG_REPLY) { notify->link_index = diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index e990346e51f6..2597e3fd562b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -211,10 +211,16 @@ struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *p #ifdef CONFIG_DRM_AMD_DC_FP if (dc->debug.using_dml2) { dml2_opt->use_clock_dc_limits = false; - dml2_create(dc, dml2_opt, &state->bw_ctx.dml2); + if (!dml2_create(dc, dml2_opt, &state->bw_ctx.dml2)) { + dc_state_release(state); + return NULL; + } dml2_opt->use_clock_dc_limits = true; - dml2_create(dc, dml2_opt, &state->bw_ctx.dml2_dc_power_source); + if (!dml2_create(dc, dml2_opt, &state->bw_ctx.dml2_dc_power_source)) { + dc_state_release(state); + return NULL; + } } #endif @@ -961,10 +967,10 @@ bool dc_state_is_fams2_in_use( bool is_fams2_in_use = false; if (state) - is_fams2_in_use |= state->bw_ctx.bw.dcn.fams2_stream_count > 0; + is_fams2_in_use |= state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; if (dc->current_state) - is_fams2_in_use |= dc->current_state->bw_ctx.bw.dcn.fams2_stream_count > 0; + is_fams2_in_use |= dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; return is_fams2_in_use; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 73cdebcd9f37..5bbc7d2daca6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.291" +#define DC_VER "3.2.297" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -261,10 +261,7 @@ struct dc_caps { bool zstate_support; bool ips_support; uint32_t num_of_internal_disp; - uint32_t max_dwb_htap; - uint32_t max_dwb_vtap; enum dp_protocol_version max_dp_protocol_version; - bool spdif_aud; unsigned int mall_size_per_mem_channel; unsigned int mall_size_total; unsigned int cursor_cache_size; @@ -309,8 +306,6 @@ struct dc_bug_wa { uint8_t dcfclk_ds: 1; } clock_update_disable_mask; bool skip_psr_ips_crtc_disable; - //Customer Specific WAs - uint32_t force_backlight_start_level; }; struct dc_dcc_surface_param { struct dc_size surface_size; @@ -466,6 +461,7 @@ struct dc_config { bool use_assr_psp_message; bool support_edp0_on_dp1; unsigned int enable_fpo_flicker_detection; + bool disable_hbr_audio_dp2; }; enum visual_confirm { @@ -513,6 +509,7 @@ enum in_game_fams_config { INGAME_FAMS_SINGLE_DISP_ENABLE, // enable in-game fams INGAME_FAMS_DISABLE, // disable in-game fams INGAME_FAMS_MULTI_DISP_ENABLE, //enable in-game fams for multi-display + INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY, //enable in-game fams for multi-display only for clamped RR strategies }; /** @@ -981,6 +978,7 @@ struct dc_debug_options { bool disable_z10; bool enable_z9_disable_interface; bool psr_skip_crtc_disable; + uint32_t ips_skip_crtc_disable_mask; union dpia_debug_options dpia_debug; bool disable_fixed_vs_aux_timeout_wa; uint32_t fixed_vs_aux_delay_config_wa; @@ -1055,6 +1053,7 @@ struct dc_debug_options { unsigned int force_sharpness; unsigned int force_lls; bool notify_dpia_hr_bw; + bool enable_ips_visual_confirm; }; @@ -1291,7 +1290,7 @@ struct dc_plane_state { struct dc_gamma gamma_correction; struct dc_transfer_func in_transfer_func; - struct dc_bias_and_scale *bias_and_scale; + struct dc_bias_and_scale bias_and_scale; struct dc_csc_transform input_csc_color_matrix; struct fixed31_32 coeff_reduction_factor; struct fixed31_32 hdr_mult; @@ -1368,7 +1367,6 @@ struct dc_plane_info { int global_alpha_value; bool input_csc_enabled; int layer_index; - bool front_buffer_rendering_active; enum chroma_cositing cositing; }; @@ -1585,6 +1583,12 @@ bool dc_acquire_release_mpc_3dlut( bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); void get_audio_check(struct audio_info *aud_modes, struct audio_check *aud_chk); + +bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count); +void populate_fast_updates(struct dc_fast_update *fast_update, + struct dc_surface_update *srf_updates, + int surface_count, + struct dc_stream_update *stream_update); /* * Set up streams and links associated to drive sinks * The streams parameter is an absolute set of all active streams. @@ -1756,6 +1760,7 @@ struct dc_link { bool dongle_mode_timing_override; bool blank_stream_on_ocs_change; bool read_dpcd204h_on_irq_hpd; + bool disable_assr_for_uhbr; } wa_flags; struct link_mst_stream_allocation_table mst_stream_alloc_table; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index ded13026c8ff..b1265124608b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -979,6 +979,9 @@ void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv) DC_LOG_DEBUG(" inbox0_rptr : %08x", diag_data.inbox0_rptr); DC_LOG_DEBUG(" inbox0_wptr : %08x", diag_data.inbox0_wptr); DC_LOG_DEBUG(" inbox0_size : %08x", diag_data.inbox0_size); + DC_LOG_DEBUG(" outbox1_rptr : %08x", diag_data.outbox1_rptr); + DC_LOG_DEBUG(" outbox1_wptr : %08x", diag_data.outbox1_wptr); + DC_LOG_DEBUG(" outbox1_size : %08x", diag_data.outbox1_size); DC_LOG_DEBUG(" is_enabled : %d", diag_data.is_dmcub_enabled); DC_LOG_DEBUG(" is_soft_reset : %d", diag_data.is_dmcub_soft_reset); DC_LOG_DEBUG(" is_secure_reset : %d", diag_data.is_dmcub_secure_reset); @@ -1282,7 +1285,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) union dmub_shared_state_ips_driver_signals new_signals; DC_LOG_IPS( - "%s wait idle (ips1_commit=%d ips2_commit=%d)", + "%s wait idle (ips1_commit=%u ips2_commit=%u)", __func__, ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1328,7 +1331,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) } DC_LOG_IPS( - "%s send allow_idle=%d (ips1_commit=%d ips2_commit=%d)", + "%s send allow_idle=%d (ips1_commit=%u ips2_commit=%u)", __func__, allow_idle, ips_fw->signals.bits.ips1_commit, @@ -1371,7 +1374,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv->driver_signals = ips_driver->signals; DC_LOG_IPS( - "%s (allow ips1=%d ips2=%d) (commit ips1=%d ips2=%d) (count rcg=%d ips1=%d ips2=%d)", + "%s (allow ips1=%u ips2=%u) (commit ips1=%u ips2=%u) (count rcg=%u ips1=%u ips2=%u)", __func__, ips_driver->signals.bits.allow_ips1, ips_driver->signals.bits.allow_ips2, @@ -1390,7 +1393,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) (!dc->debug.optimize_ips_handshake || ips_fw->signals.bits.ips2_commit || !ips_fw->signals.bits.in_idle)) { DC_LOG_IPS( - "wait IPS2 eval (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 eval (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1399,7 +1402,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (ips_fw->signals.bits.ips2_commit) { DC_LOG_IPS( - "exit IPS2 #1 (ips1_commit=%d ips2_commit=%d)", + "exit IPS2 #1 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1407,7 +1410,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); DC_LOG_IPS( - "wait IPS2 entry delay (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 entry delay (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1415,14 +1418,14 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(dc->debug.ips2_entry_delay_us); DC_LOG_IPS( - "exit IPS2 #2 (ips1_commit=%d ips2_commit=%d)", + "exit IPS2 #2 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); DC_LOG_IPS( - "wait IPS2 commit clear (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 commit clear (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1430,7 +1433,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(1); DC_LOG_IPS( - "wait hw_pwr_up (ips1_commit=%d ips2_commit=%d)", + "wait hw_pwr_up (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1438,7 +1441,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ASSERT(0); DC_LOG_IPS( - "resync inbox1 (ips1_commit=%d ips2_commit=%d)", + "resync inbox1 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1449,7 +1452,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv_notify_idle(dc, false); if (prev_driver_signals.bits.allow_ips1) { DC_LOG_IPS( - "wait for IPS1 commit clear (ips1_commit=%d ips2_commit=%d)", + "wait for IPS1 commit clear (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1457,7 +1460,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(1); DC_LOG_IPS( - "wait for IPS1 commit clear done (ips1_commit=%d ips2_commit=%d)", + "wait for IPS1 commit clear done (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); } @@ -1466,7 +1469,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true)) ASSERT(0); - DC_LOG_IPS("%s exit (count rcg=%d ips1=%d ips2=%d)", + DC_LOG_IPS("%s exit (count rcg=%u ips1=%u ips2=%u)", __func__, rcg_exit_count, ips1_exit_count, @@ -1672,22 +1675,17 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; global_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); - /* send global configuration parameters */ - global_cmd->config.global.max_allow_delay_us = 100 * 1000; //100ms - global_cmd->config.global.lock_wait_time_us = 5000; //5ms - global_cmd->config.global.recovery_timeout_us = 5000; //5ms - global_cmd->config.global.hwfq_flip_programming_delay_us = 100; //100us - - /* copy static feature configuration */ - global_cmd->config.global.features.all = dc->debug.fams2_config.all; - - /* apply feature configuration based on current driver state */ - global_cmd->config.global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2; - global_cmd->config.global.features.bits.enable = enable; - - /* construct per-stream configs */ if (enable) { - for (i = 0; i < context->bw_ctx.bw.dcn.fams2_stream_count; i++) { + /* send global configuration parameters */ + memcpy(&global_cmd->config.global, &context->bw_ctx.bw.dcn.fams2_global_config, sizeof(struct dmub_cmd_fams2_global_config)); + + /* copy static feature configuration overrides */ + global_cmd->config.global.features.bits.enable_stall_recovery = dc->debug.fams2_config.bits.enable_stall_recovery; + global_cmd->config.global.features.bits.enable_debug = dc->debug.fams2_config.bits.enable_debug; + global_cmd->config.global.features.bits.enable_offload_flip = dc->debug.fams2_config.bits.enable_offload_flip; + + /* construct per-stream configs */ + for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) { struct dmub_rb_cmd_fams2 *stream_cmd = &cmd[i+1].fams2_config; /* configure command header */ @@ -1702,12 +1700,15 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, } } - if (enable && context->bw_ctx.bw.dcn.fams2_stream_count) { + /* apply feature configuration based on current driver state */ + global_cmd->config.global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2; + global_cmd->config.global.features.bits.enable = enable; + + if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) { /* set multi pending for global, and unset for last stream cmd */ - global_cmd->config.global.num_streams = context->bw_ctx.bw.dcn.fams2_stream_count; global_cmd->header.multi_cmd_pending = 1; - cmd[context->bw_ctx.bw.dcn.fams2_stream_count].fams2_config.header.multi_cmd_pending = 0; - num_cmds += context->bw_ctx.bw.dcn.fams2_stream_count; + cmd[context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0; + num_cmds += context->bw_ctx.bw.dcn.fams2_global_config.num_streams; } dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 959ae0df1e56..c10567ec1c81 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -763,13 +763,6 @@ enum scanning_type { SCANNING_TYPE_UNDEFINED }; -enum chroma_cositing { - CHROMA_COSITING_NONE, - CHROMA_COSITING_LEFT, - CHROMA_COSITING_TOPLEFT, - CHROMA_COSITING_COUNT -}; - struct dc_crtc_timing_flags { uint32_t INTERLACE :1; uint32_t HSYNC_POSITIVE_POLARITY :1; /* when set to 1, diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 582606319764..8f85a1db5eba 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -128,6 +128,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->basic_out.always_scale = pipe_ctx->stream->ctx->dc->debug.always_scale; // Make spl input basic output info alpha_en field point to plane res scl_data lb_params alpha_en spl_in->basic_out.alpha_en = pipe_ctx->plane_res.scl_data.lb_params.alpha_en; + spl_in->basic_out.use_two_pixels_per_container = pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing); // Make spl input basic input info scaling quality field point to plane state scaling_quality populate_spltaps_from_taps(&spl_in->scaling_quality, &plane_state->scaling_quality); // Translate edge adaptive scaler preference @@ -170,7 +171,6 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl /* Translate transfer function */ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type; spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf; - } /// @brief Translate SPL output parameters to pipe context diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 8ebd7e9e776e..de9bd72ca514 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -159,6 +159,12 @@ struct test_pattern { struct dc_stream_debug_options { char force_odm_combine_segments; + /* + * When force_odm_combine_segments is non zero, allow dc to + * temporarily transition to ODM bypass when minimal transition state + * is required to prevent visual glitches showing on the screen + */ + char allow_transition_for_forced_odm; }; #define LUMINANCE_DATA_TABLE_SIZE 10 @@ -260,6 +266,8 @@ struct dc_stream_state { struct dc_cursor_attributes cursor_attributes; struct dc_cursor_position cursor_position; + bool hw_cursor_req; + uint32_t sdr_white_level; // for boosting (SDR) cursor in HDR mode /* from stream struct */ @@ -344,6 +352,7 @@ struct dc_stream_update { struct dc_cursor_attributes *cursor_attributes; struct dc_cursor_position *cursor_position; + bool *hw_cursor_req; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index c550e8997033..97279b080f3e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -590,6 +590,7 @@ enum dc_psr_state { PSR_STATE5c, PSR_STATE_HWLOCK_MGR, PSR_STATE_POLLVUPDATE, + PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME, PSR_STATE_INVALID = 0xFF }; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h index 1e0292861244..6ac2bd86c4db 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h @@ -346,11 +346,7 @@ type SYMCLK32_LE3_SRC_SEL;\ type SYMCLK32_LE2_EN;\ type SYMCLK32_LE3_EN;\ - type DP_DTO_ENABLE[MAX_PIPES];\ - type DSCCLK0_DTO_DB_EN;\ - type DSCCLK1_DTO_DB_EN;\ - type DSCCLK2_DTO_DB_EN;\ - type DSCCLK3_DTO_DB_EN; + type DP_DTO_ENABLE[MAX_PIPES]; struct dccg_shift { DCCG_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 68cd3258f4a9..7f91e48902e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -41,6 +41,1041 @@ #define DC_LOGGER \ dccg->ctx->logger +enum symclk_fe_source { + SYMCLK_FE_SYMCLK_A = 0, // Select functional clock from backend symclk A + SYMCLK_FE_SYMCLK_B, + SYMCLK_FE_SYMCLK_C, + SYMCLK_FE_SYMCLK_D, + SYMCLK_FE_SYMCLK_E, + SYMCLK_FE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk_be_source { + SYMCLK_BE_PHYCLK = 0, // Select phy clk when sym_clk_enable = 1 + SYMCLK_BE_DPIACLK_810 = 4, + SYMCLK_BE_DPIACLK_162 = 5, + SYMCLK_BE_DPIACLK_540 = 6, + SYMCLK_BE_DPIACLK_270 = 7, + SYMCLK_BE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum physymclk_source { + PHYSYMCLK_PHYCLK = 0, // Select symclk as source of clock which is output to PHY through DCIO. + PHYSYMCLK_PHYD18CLK, // Select phyd18clk as the source of clock which is output to PHY through DCIO. + PHYSYMCLK_PHYD32CLK, // Select phyd32clk as the source of clock which is output to PHY through DCIO. + PHYSYMCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dtbclk_source { + DTBCLK_DPREFCLK = 0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same) + DTBCLK_DPREFCLK_0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same) + DTBCLK_DTBCLK0, // Selects source for DTBCLK_P# as DTBCLK0 + DTBCLK_DTBCLK1, // Selects source for DTBCLK_P# as DTBCLK0 + DTBCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dppclk_clock_source { + DPP_REFCLK = 0, // refclk is selected + DPP_DCCG_DTO, // Functional clock selected is DTO tuned DPPCLK +}; + +enum dp_stream_clk_source { + DP_STREAM_DTBCLK_P0 = 0, // Selects functional for DP_STREAM_CLK as DTBCLK_P# + DP_STREAM_DTBCLK_P1, + DP_STREAM_DTBCLK_P2, + DP_STREAM_DTBCLK_P3, + DP_STREAM_DTBCLK_P4, + DP_STREAM_DTBCLK_P5, + DP_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum hdmi_char_clk { + HDMI_CHAR_PHYAD18CLK = 0, // Selects functional for hdmi_char_clk as UNIPHYA PHYD18CLK + HDMI_CHAR_PHYBD18CLK, + HDMI_CHAR_PHYCD18CLK, + HDMI_CHAR_PHYDD18CLK, + HDMI_CHAR_PHYED18CLK, + HDMI_CHAR_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum hdmi_stream_clk_source { + HDMI_STREAM_DTBCLK_P0 = 0, // Selects functional for HDMI_STREAM_CLK as DTBCLK_P# + HDMI_STREAM_DTBCLK_P1, + HDMI_STREAM_DTBCLK_P2, + HDMI_STREAM_DTBCLK_P3, + HDMI_STREAM_DTBCLK_P4, + HDMI_STREAM_DTBCLK_P5, + HDMI_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk32_se_clk_source { + SYMCLK32_SE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK + SYMCLK32_SE_PHYBD32CLK, + SYMCLK32_SE_PHYCD32CLK, + SYMCLK32_SE_PHYDD32CLK, + SYMCLK32_SE_PHYED32CLK, + SYMCLK32_SE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk32_le_clk_source { + SYMCLK32_LE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK + SYMCLK32_LE_PHYBD32CLK, + SYMCLK32_LE_PHYCD32CLK, + SYMCLK32_LE_PHYDD32CLK, + SYMCLK32_LE_PHYED32CLK, + SYMCLK32_LE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dsc_clk_source { + DSC_CLK_REF_CLK = 0, // Ref clock selected for DSC_CLK + DSC_DTO_TUNED_CK_GPU_DISCLK_3, // DTO divided clock selected as functional clock +}; + + +static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_se_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + return; + + /* SYMCLK32_ROOT_SE#_GATE_DISABLE will clock gate in DCCG */ + /* SYMCLK32_SE#_GATE_DISABLE will clock gate in HPO only */ + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_le_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_LE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_LE1_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_physymclk_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk_fe_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk_be_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* TBD add symclk_be in rcg control bits */ + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dppclk_rcg(struct dccg *dccg, + int inst, bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dpstreamclk_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK0_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK1_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK2_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK3_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_smclk32_se_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_dsc_clk_src_new(struct dccg *dccg, int inst, enum dsc_clk_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* DSCCLK#_EN=0 switches to refclock from functional clock */ + + switch (inst) { + case 0: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, src); + break; + case 1: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, src); + break; + case 2: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, src); + break; + case 3: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_se_src_new( + struct dccg *dccg, + int inst, + enum symclk32_se_clk_source src + ) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE0_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE0_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE1_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE1_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 2: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE2_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE2_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 3: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE3_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE3_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static int +dccg35_is_symclk32_se_src_functional_le_new(struct dccg *dccg, int symclk_32_se_inst, int symclk_32_le_inst) +{ + uint32_t en; + uint32_t src_sel; + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + REG_GET_2(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, &src_sel, SYMCLK32_SE3_EN, &en); + + if (en == 1 && src_sel == symclk_32_le_inst) + return 1; + + return 0; +} + + +static void dccg35_set_symclk32_le_src_new( + struct dccg *dccg, + int inst, + enum symclk32_le_clk_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE0_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src, + SYMCLK32_LE0_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE1_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src, + SYMCLK32_LE1_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dcn35_set_dppclk_src_new(struct dccg *dccg, + int inst, enum dppclk_clock_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, src); + break; + case 1: + REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, src); + break; + case 2: + REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, src); + break; + case 3: + REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, src); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dtbclk_p_src_new( + struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* If DTBCLK_P#_EN is 0 refclock is selected as functional clock + * If DTBCLK_P#_EN is 1 functional clock is selected as DTBCLK_P#_SRC_SEL + */ + + switch (inst) { + case 0: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P0_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P0_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P1_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P1_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P2_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P2_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P3_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P3_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_dpstreamclk_src_new( + struct dccg *dccg, + enum dp_stream_clk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK0_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK1_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + + break; + case 2: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK2_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + + break; + case 3: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK3_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_physymclk_src_new( + struct dccg *dccg, + enum physymclk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYASYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYBSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYCSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYDSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYESYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk_be_src_new( + struct dccg *dccg, + enum symclk_be_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE, + SYMCLKA_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKA_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE, + SYMCLKB_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKB_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE, + SYMCLKC_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKC_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE, + SYMCLKD_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKD_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE, + SYMCLKE_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKE_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + } +} + +static int dccg35_is_symclk_fe_src_functional_be(struct dccg *dccg, + int symclk_fe_inst, + int symclk_be_inst) +{ + + uint32_t en = 0; + uint32_t src_sel = 0; + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (symclk_fe_inst) { + case 0: + REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_SRC_SEL, &src_sel, SYMCLKA_FE_EN, &en); + break; + case 1: + REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, &src_sel, SYMCLKB_FE_EN, &en); + break; + case 2: + REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, &src_sel, SYMCLKC_FE_EN, &en); + break; + case 3: + REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, &src_sel, SYMCLKD_FE_EN, &en); + break; + case 4: + REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, &src_sel, SYMCLKE_FE_EN, &en); + break; + } + + if (en == 1 && src_sel == symclk_be_inst) + return 1; + + return 0; +} + +static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum symclk_fe_source src, int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE, + SYMCLKA_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKA_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE, + SYMCLKB_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKB_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE, + SYMCLKC_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKC_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE, + SYMCLKD_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKD_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE, + SYMCLKE_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKE_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); + break; + } +} + +static uint32_t dccg35_is_fe_rcg(struct dccg *dccg, int inst) +{ + uint32_t enable = 0; + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_FE_ROOT_GATE_DISABLE, &enable); + break; + case 1: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_FE_ROOT_GATE_DISABLE, &enable); + break; + case 2: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_FE_ROOT_GATE_DISABLE, &enable); + break; + case 3: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_FE_ROOT_GATE_DISABLE, &enable); + break; + case 4: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_FE_ROOT_GATE_DISABLE, &enable); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } + return enable; +} + +static uint32_t dccg35_is_symclk32_se_rcg(struct dccg *dccg, int inst) +{ + uint32_t disable_l1 = 0; + uint32_t disable_l2 = 0; + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, &disable_l2); + break; + case 1: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, &disable_l2); + break; + case 2: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, &disable_l2); + break; + case 3: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, &disable_l2); + break; + default: + BREAK_TO_DEBUGGER(); + return 0; + } + + /* return true if either block level or DCCG level gating is active */ + return (disable_l1 | disable_l2); +} + +static void dccg35_enable_symclk_fe_new( + struct dccg *dccg, + int inst, + enum symclk_fe_source src) +{ + dccg35_set_symclk_fe_rcg(dccg, inst, false); + dccg35_set_symclk_fe_src_new(dccg, src, inst); +} + +static void dccg35_disable_symclk_fe_new( + struct dccg *dccg, + int inst) +{ + dccg35_set_symclk_fe_src_new(dccg, SYMCLK_FE_REFCLK, inst); + dccg35_set_symclk_fe_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk_be_new( + struct dccg *dccg, + int inst, + enum symclk_be_source src) +{ + dccg35_set_symclk_be_rcg(dccg, inst, false); + dccg35_set_symclk_be_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk_be_new( + struct dccg *dccg, + int inst) +{ + int i; + + /* Switch from functional clock to refclock */ + dccg35_set_symclk_be_src_new(dccg, inst, SYMCLK_BE_REFCLK); + + /* Check if any other SE connected LE and disable them */ + for (i = 0; i < 4; i++) { + /* Make sure FE is not already in RCG */ + if (dccg35_is_fe_rcg(dccg, i) == 0) { + if (dccg35_is_symclk_fe_src_functional_be(dccg, i, inst)) + dccg35_disable_symclk_fe_new(dccg, i); + } + } + /* Safe to RCG SYMCLK*/ + dccg35_set_symclk_be_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk32_se_new( + struct dccg *dccg, + int inst, + enum symclk32_se_clk_source src) +{ + dccg35_set_symclk32_se_rcg(dccg, inst, false); + dccg35_set_symclk32_se_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk32_se_new( + struct dccg *dccg, + int inst) +{ + dccg35_set_symclk32_se_src_new(dccg, SYMCLK32_SE_REFCLK, inst); + dccg35_set_symclk32_se_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk32_le_new( + struct dccg *dccg, + int inst, + enum symclk32_le_clk_source src) +{ + dccg35_set_symclk32_le_rcg(dccg, inst, false); + dccg35_set_symclk32_le_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk32_le_new( + struct dccg *dccg, + int inst) +{ + int i; + + /* Switch from functional clock to refclock */ + dccg35_set_symclk32_le_src_new(dccg, inst, SYMCLK32_LE_REFCLK); + + /* Check if any SE are connected and disable SE as well */ + for (i = 0; i < 4; i++) { + /* Make sure FE is not already in RCG */ + if (dccg35_is_symclk32_se_rcg(dccg, i) == 0) { + /* Disable and SE connected to this LE before RCG */ + if (dccg35_is_symclk32_se_src_functional_le_new(dccg, i, inst)) + dccg35_disable_symclk32_se_new(dccg, i); + } + } + /* Safe to RCG SYM32_LE*/ + dccg35_set_symclk32_le_rcg(dccg, inst, true); +} + +static void dccg35_enable_physymclk_new(struct dccg *dccg, + int inst, + enum physymclk_source src) +{ + dccg35_set_physymclk_rcg(dccg, inst, false); + dccg35_set_physymclk_src_new(dccg, src, inst); +} + +static void dccg35_disable_physymclk_new(struct dccg *dccg, + int inst) +{ + dccg35_set_physymclk_src_new(dccg, PHYSYMCLK_REFCLK, inst); + dccg35_set_physymclk_rcg(dccg, inst, true); +} + +static void dccg35_enable_dpp_clk_new( + struct dccg *dccg, + int inst, + enum dppclk_clock_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* Sanitize inst before use in array de-ref */ + if (inst < 0) { + BREAK_TO_DEBUGGER(); + return; + } + dccg35_set_dppclk_rcg(dccg, inst, false); + dcn35_set_dppclk_src_new(dccg, inst, src); + /* Switch DPP clock to DTO */ + REG_SET_2(DPPCLK_DTO_PARAM[inst], 0, + DPPCLK0_DTO_PHASE, 0xFF, + DPPCLK0_DTO_MODULO, 0xFF); +} + +static void dccg35_disable_dpp_clk_new( + struct dccg *dccg, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* Sanitize inst before use in array de-ref */ + if (inst < 0) { + BREAK_TO_DEBUGGER(); + return; + } + dcn35_set_dppclk_src_new(dccg, inst, DPP_REFCLK); + REG_SET_2(DPPCLK_DTO_PARAM[inst], 0, + DPPCLK0_DTO_PHASE, 0, + DPPCLK0_DTO_MODULO, 1); + dccg35_set_dppclk_rcg(dccg, inst, true); +} + +static void dccg35_disable_dscclk_new(struct dccg *dccg, + int inst) +{ + dccg35_set_dsc_clk_src_new(dccg, inst, DSC_CLK_REF_CLK); + dccg35_set_dsc_clk_rcg(dccg, inst, true); +} + +static void dccg35_enable_dscclk_new(struct dccg *dccg, + int inst, + enum dsc_clk_source src) +{ + dccg35_set_dsc_clk_rcg(dccg, inst, false); + dccg35_set_dsc_clk_src_new(dccg, inst, src); +} + +static void dccg35_enable_dtbclk_p_new(struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + dccg35_set_dtbclk_p_rcg(dccg, inst, false); + dccg35_set_dtbclk_p_src_new(dccg, src, inst); +} + +static void dccg35_disable_dtbclk_p_new(struct dccg *dccg, + int inst) +{ + dccg35_set_dtbclk_p_src_new(dccg, DTBCLK_REFCLK, inst); + dccg35_set_dtbclk_p_rcg(dccg, inst, true); +} + +static void dccg35_disable_dpstreamclk_new(struct dccg *dccg, + int inst) +{ + dccg35_set_dpstreamclk_src_new(dccg, DP_STREAM_REFCLK, inst); + dccg35_set_dpstreamclk_rcg(dccg, inst, true); +} + +static void dccg35_enable_dpstreamclk_new(struct dccg *dccg, + enum dp_stream_clk_source src, + int inst) +{ + dccg35_set_dpstreamclk_rcg(dccg, inst, false); + dccg35_set_dpstreamclk_src_new(dccg, src, inst); +} + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -997,6 +2032,322 @@ static void dccg35_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst } } +static void dccg35_set_dpstreamclk_cb( + struct dccg *dccg, + enum streamclk_source src, + int otg_inst, + int dp_hpo_inst) +{ + + enum dtbclk_source dtb_clk_src; + enum dp_stream_clk_source dp_stream_clk_src; + + ASSERT(otg_inst >= DP_STREAM_DTBCLK_P5); + + switch (src) { + case REFCLK: + dtb_clk_src = DTBCLK_REFCLK; + dp_stream_clk_src = DP_STREAM_REFCLK; + break; + case DPREFCLK: + dtb_clk_src = DTBCLK_DPREFCLK; + dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst; + break; + case DTBCLK0: + dtb_clk_src = DTBCLK_DTBCLK0; + dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst; + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + + if (dtb_clk_src == DTBCLK_REFCLK && + dp_stream_clk_src == DP_STREAM_REFCLK) { + dccg35_disable_dtbclk_p_new(dccg, otg_inst); + dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst); + } else { + dccg35_enable_dtbclk_p_new(dccg, dtb_clk_src, otg_inst); + dccg35_enable_dpstreamclk_new(dccg, + dp_stream_clk_src, + dp_hpo_inst); + } +} + +static void dccg35_set_dpstreamclk_root_clock_gating_cb( + struct dccg *dccg, + int dp_hpo_inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Instance 0 is implied here since only one streamclock resource + * Redundant as gating when enabled is acheived through set_dpstreamclk + */ + if (power_on) + dccg35_enable_dpstreamclk_new(dccg, + DP_STREAM_REFCLK, + dp_hpo_inst); + else + dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst); +} + +static void dccg35_update_dpp_dto_cb(struct dccg *dccg, int dpp_inst, + int req_dppclk) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (dccg->ref_dppclk && req_dppclk) { + int ref_dppclk = dccg->ref_dppclk; + int modulo, phase; + + // phase / modulo = dpp pipe clk / dpp global clk + modulo = 0xff; // use FF at the end + phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk; + + if (phase > 0xff) { + ASSERT(false); + phase = 0xff; + } + + /* Enable DPP CLK DTO output */ + dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_DCCG_DTO); + + /* Program DTO */ + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, + DPPCLK0_DTO_PHASE, phase, + DPPCLK0_DTO_MODULO, modulo); + } else + dccg35_disable_dpp_clk_new(dccg, dpp_inst); + + dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk; +} + +static void dccg35_dpp_root_clock_control_cb( + struct dccg *dccg, + unsigned int dpp_inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through update_dpp_dto + */ + if (power_on) + dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_REFCLK); + else + dccg35_disable_dpp_clk_new(dccg, dpp_inst); +} + +static void dccg35_enable_symclk32_se_cb( + struct dccg *dccg, + int inst, + enum phyd32clk_clock_source phyd32clk) +{ + dccg35_enable_symclk32_se_new(dccg, inst, (enum symclk32_se_clk_source)phyd32clk); +} + +static void dccg35_disable_symclk32_se_cb(struct dccg *dccg, int inst) +{ + dccg35_disable_symclk32_se_new(dccg, inst); +} + +static void dccg35_enable_symclk32_le_cb( + struct dccg *dccg, + int inst, + enum phyd32clk_clock_source src) +{ + dccg35_enable_symclk32_le_new(dccg, inst, (enum symclk32_le_clk_source) src); +} + +static void dccg35_disable_symclk32_le_cb(struct dccg *dccg, int inst) +{ + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_symclk32_le_root_clock_gating_cb( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through disable_symclk32_le + */ + if (power_on) + dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK); + else + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_physymclk_cb( + struct dccg *dccg, + int inst, + enum physymclk_clock_source clk_src, + bool force_enable) +{ + /* force_enable = 0 indicates we can switch to ref clock */ + if (force_enable) + dccg35_enable_physymclk_new(dccg, inst, (enum physymclk_source)clk_src); + else + dccg35_disable_physymclk_new(dccg, inst); +} + +static void dccg35_set_physymclk_root_clock_gating_cb( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* Redundant RCG already done in disable_physymclk + * power_on = 1 indicates we need to ungate + */ + if (power_on) + dccg35_enable_physymclk_new(dccg, inst, PHYSYMCLK_REFCLK); + else + dccg35_disable_physymclk_new(dccg, inst); +} + +static void dccg35_set_symclk32_le_root_clock_gating( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through disable_symclk32_le + */ + if (power_on) + dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK); + else + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_dtbclk_p_src_cb( + struct dccg *dccg, + enum streamclk_source src, + uint32_t inst) +{ + if (src == DTBCLK0) + dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, inst); + else + dccg35_disable_dtbclk_p_new(dccg, inst); +} + +static void dccg35_set_dtbclk_dto_cb( + struct dccg *dccg, + const struct dtbclk_dto_params *params) +{ + /* set_dtbclk_p_src typ called earlier to switch to DTBCLK + * if params->ref_dtbclk_khz and req_dtbclk_khz are 0 switch to ref-clock + */ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* DTO Output Rate / Pixel Rate = 1/4 */ + int req_dtbclk_khz = params->pixclk_khz / 4; + + if (params->ref_dtbclk_khz && req_dtbclk_khz) { + uint32_t modulo, phase; + + dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, params->otg_inst); + + // phase / modulo = dtbclk / dtbclk ref + modulo = params->ref_dtbclk_khz * 1000; + phase = req_dtbclk_khz * 1000; + + REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo); + REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase); + + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 1); + + REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1, + 1, 100); + + /* program OTG_PIXEL_RATE_DIV for DIVK1 and DIVK2 fields */ + dccg35_set_pixel_rate_div(dccg, params->otg_inst, PIXEL_RATE_DIV_BY_1, PIXEL_RATE_DIV_BY_1); + + /* The recommended programming sequence to enable DTBCLK DTO to generate + * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should + * be set only after DTO is enabled + */ + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + PIPE_DTO_SRC_SEL[params->otg_inst], 2); + } else { + dccg35_disable_dtbclk_p_new(dccg, params->otg_inst); + + REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 0, + PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); + + REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); + REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); + } +} + +static void dccg35_disable_dscclk_cb(struct dccg *dccg, + int inst) +{ + dccg35_disable_dscclk_new(dccg, inst); +} + +static void dccg35_enable_dscclk_cb(struct dccg *dccg, int inst) +{ + dccg35_enable_dscclk_new(dccg, inst, DSC_DTO_TUNED_CK_GPU_DISCLK_3); +} + +static void dccg35_enable_symclk_se_cb(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst) +{ + /* Switch to functional clock if already not selected */ + dccg35_enable_symclk_be_new(dccg, SYMCLK_BE_PHYCLK, link_enc_inst); + + dccg35_enable_symclk_fe_new(dccg, stream_enc_inst, (enum symclk_fe_source) link_enc_inst); + +} + +static void dccg35_disable_symclk_se_cb( + struct dccg *dccg, + uint32_t stream_enc_inst, + uint32_t link_enc_inst) +{ + dccg35_disable_symclk_fe_new(dccg, stream_enc_inst); + + /* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */ +} + +static const struct dccg_funcs dccg35_funcs_new = { + .update_dpp_dto = dccg35_update_dpp_dto_cb, + .dpp_root_clock_control = dccg35_dpp_root_clock_control_cb, + .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, + .dccg_init = dccg35_init, + .set_dpstreamclk = dccg35_set_dpstreamclk_cb, + .set_dpstreamclk_root_clock_gating = dccg35_set_dpstreamclk_root_clock_gating_cb, + .enable_symclk32_se = dccg35_enable_symclk32_se_cb, + .disable_symclk32_se = dccg35_disable_symclk32_se_cb, + .enable_symclk32_le = dccg35_enable_symclk32_le_cb, + .disable_symclk32_le = dccg35_disable_symclk32_le_cb, + .set_symclk32_le_root_clock_gating = dccg35_set_symclk32_le_root_clock_gating_cb, + .set_physymclk = dccg35_set_physymclk_cb, + .set_physymclk_root_clock_gating = dccg35_set_physymclk_root_clock_gating_cb, + .set_dtbclk_dto = dccg35_set_dtbclk_dto_cb, + .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto, + .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en, + .otg_add_pixel = dccg31_otg_add_pixel, + .otg_drop_pixel = dccg31_otg_drop_pixel, + .set_dispclk_change_mode = dccg31_set_dispclk_change_mode, + .disable_dsc = dccg35_disable_dscclk_cb, + .enable_dsc = dccg35_enable_dscclk_cb, + .set_pixel_rate_div = dccg35_set_pixel_rate_div, + .get_pixel_rate_div = dccg35_get_pixel_rate_div, + .trigger_dio_fifo_resync = dccg35_trigger_dio_fifo_resync, + .set_valid_pixel_rate = dccg35_set_valid_pixel_rate, + .enable_symclk_se = dccg35_enable_symclk_se_cb, + .disable_symclk_se = dccg35_disable_symclk_se_cb, + .set_dtbclk_p_src = dccg35_set_dtbclk_p_src_cb, +}; + static const struct dccg_funcs dccg35_funcs = { .update_dpp_dto = dccg35_update_dpp_dto, .dpp_root_clock_control = dccg35_dpp_root_clock_control, @@ -1026,6 +2377,7 @@ static const struct dccg_funcs dccg35_funcs = { .enable_symclk_se = dccg35_enable_symclk_se, .disable_symclk_se = dccg35_disable_symclk_se, .set_dtbclk_p_src = dccg35_set_dtbclk_p_src, + }; struct dccg *dccg35_create( @@ -1041,6 +2393,10 @@ struct dccg *dccg35_create( BREAK_TO_DEBUGGER(); return NULL; } + (void)&dccg35_disable_symclk_be_new; + (void)&dccg35_set_symclk32_le_root_clock_gating; + (void)&dccg35_set_smclk32_se_rcg; + (void)&dccg35_funcs_new; base = &dccg_dcn->base; base->ctx = ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c index 07f1f396ba52..0b889004509a 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c @@ -730,35 +730,35 @@ void dccg401_init(struct dccg *dccg) } } -static void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst, bool enable) +static void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - uint32_t phase = enable ? 1 : 0; switch (inst) { case 0: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1, DSCCLK0_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK0_DTO_PARAM, - DSCCLK0_DTO_PHASE, phase, + DSCCLK0_DTO_PHASE, 1, DSCCLK0_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1); + break; case 1: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1, DSCCLK1_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK1_DTO_PARAM, - DSCCLK1_DTO_PHASE, phase, + DSCCLK1_DTO_PHASE, 1, DSCCLK1_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1); break; case 2: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1, DSCCLK2_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK2_DTO_PARAM, - DSCCLK2_DTO_PHASE, phase, + DSCCLK2_DTO_PHASE, 1, DSCCLK2_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1); break; case 3: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1, DSCCLK3_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK3_DTO_PARAM, - DSCCLK3_DTO_PHASE, phase, + DSCCLK3_DTO_PHASE, 1, DSCCLK3_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1); break; default: BREAK_TO_DEBUGGER(); @@ -774,15 +774,27 @@ static void dccg401_set_ref_dscclk(struct dccg *dccg, switch (dsc_inst) { case 0: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 0); + REG_UPDATE_2(DSCCLK0_DTO_PARAM, + DSCCLK0_DTO_PHASE, 0, + DSCCLK0_DTO_MODULO, 0); break; case 1: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 0); + REG_UPDATE_2(DSCCLK1_DTO_PARAM, + DSCCLK1_DTO_PHASE, 0, + DSCCLK1_DTO_MODULO, 0); break; case 2: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 0); + REG_UPDATE_2(DSCCLK2_DTO_PARAM, + DSCCLK2_DTO_PHASE, 0, + DSCCLK2_DTO_MODULO, 0); break; case 3: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 0); + REG_UPDATE_2(DSCCLK3_DTO_PARAM, + DSCCLK3_DTO_PHASE, 0, + DSCCLK3_DTO_MODULO, 0); break; default: return; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h index 8bcddc836347..a196ce9e8127 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h @@ -117,10 +117,6 @@ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_EN, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_EN, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_DTO_DB_EN, mask_sh),\ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index cf5f84fb9c69..eeed840073fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -630,6 +630,11 @@ void dce_aud_az_enable(struct audio *audio) audio->inst, value); } +void dce_aud_az_disable_hbr_audio(struct audio *audio) +{ + set_high_bit_rate_capable(audio, false); +} + void dce_aud_az_disable(struct audio *audio) { uint32_t value; @@ -1293,6 +1298,7 @@ static const struct audio_funcs funcs = { .az_enable = dce_aud_az_enable, .az_disable = dce_aud_az_disable, .az_configure = dce_aud_az_configure, + .az_disable_hbr_audio = dce_aud_az_disable_hbr_audio, .destroy = dce_aud_destroy, }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h index 539f881928d1..1b7b8b079af4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h @@ -166,6 +166,7 @@ void dce_aud_hw_init(struct audio *audio); void dce_aud_az_enable(struct audio *audio); void dce_aud_az_disable(struct audio *audio); +void dce_aud_az_disable_hbr_audio(struct audio *audio); void dce_aud_az_configure(struct audio *audio, enum signal_type signal, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index b8996d285f00..bb4ac5042c80 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -735,7 +735,15 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, (unsigned int) payload->mot); if (payload->write) dce_aux_log_payload(" write", payload->data, payload->length, 16); - ret = dce_aux_transfer_raw(ddc, payload, &operation_result); + + /* Check whether aux to be processed via dmub or dcn directly */ + if (ddc->ctx->dc->debug.enable_dmub_aux_for_legacy_ddc + || ddc->ddc_pin == NULL) { + ret = dce_aux_transfer_dmub_raw(ddc, payload, &operation_result); + } else { + ret = dce_aux_transfer_raw(ddc, payload, &operation_result); + } + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, LOG_FLAG_I2cAux_DceAux, "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d payload->reply=%u", diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index ccf153b7a467..cae18f8c1c9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -94,6 +94,8 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state) state = PSR_STATE_HWLOCK_MGR; else if (raw_state == 0x61) state = PSR_STATE_POLLVUPDATE; + else if (raw_state == 0x62) + state = PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME; else state = PSR_STATE_INVALID; @@ -363,6 +365,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->debug.bitfields.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR; copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1; copy_settings_data->debug.bitfields.force_full_frame_update = 0; + copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm; if (psr_context->su_granularity_required == 0) copy_settings_data->su_y_granularity = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 4d960dc5ce89..14f935961672 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -167,6 +167,8 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, copy_settings_data->smu_optimizations_en = link->replay_settings.replay_smu_opt_enable; copy_settings_data->replay_timing_sync_supported = link->replay_settings.config.replay_timing_sync_supported; + copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm; + copy_settings_data->flags.u32All = 0; copy_settings_data->flags.bitfields.fec_enable_status = (link->fec_state == dc_link_fec_enabled); copy_settings_data->flags.bitfields.dsc_enable_status = (pipe_ctx->stream->timing.flags.DSC == 1); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 49bcfe6ec999..fa422a8cbced 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -1955,6 +1955,7 @@ void dce110_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index 28c58f1dff2d..ee4de740aceb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -261,6 +261,7 @@ void dce110_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c index bf35dc65ca29..9837dec837ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c @@ -438,6 +438,7 @@ static void dce110_timing_generator_v_program_timing(struct timing_generator *tg int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index eb3557965781..fcf59348eb62 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -697,6 +697,7 @@ static void dce120_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c index c1a85ee374d9..e5fb0e8333e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c @@ -111,13 +111,14 @@ static void program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { if (!use_vbios) program_pix_dur(tg, timing->pix_clk_100hz); - dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios); + dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios); } static void dce60_timing_generator_enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c index 2df4654858be..003a9330c286 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c @@ -111,13 +111,14 @@ static void program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { if (!use_vbios) program_pix_dur(tg, timing->pix_clk_100hz); - dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios); + dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios); } static void dce80_timing_generator_enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 9923d0d620d4..e1f6623d4936 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -24,8 +24,6 @@ DCN10 = dcn10_ipp.o \ dcn10_hw_sequencer_debug.o \ - dcn10_opp.o \ - dcn10_mpc.o \ dcn10_cm_common.o \ AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 0b49362f71b0..eaed5d1c398a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -591,6 +591,8 @@ bool cm_helper_translate_curve_to_degamma_hw_format( i += increment) { if (j == hw_points - 1) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index b3aeabc4d605..25ba0d310d46 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -1,8 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved. -DCN20 = dcn20_mpc.o dcn20_opp.o dcn20_mmhubbub.o \ - dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o +DCN20 = dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 4c43af867d86..b17277de0340 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -23,15 +23,11 @@ # # -DCN30 := dcn30_mpc.o dcn30_vpg.o \ +DCN30 := dcn30_vpg.o \ dcn30_afmt.o \ - dcn30_dwb.o \ - dcn30_dwb_cm.o \ dcn30_cm_common.o \ dcn30_mmhubbub.o \ - - AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN30) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index b8327237ed44..f31f0e3abfc0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -28,7 +28,7 @@ #include "reg_helper.h" #include "dcn30/dcn30_dpp.h" #include "basics/conversion.h" -#include "dcn30_cm_common.h" +#include "dcn30/dcn30_cm_common.h" #include "custom_float.h" #define REG(reg) reg @@ -177,6 +177,8 @@ bool cm3_helper_translate_curve_to_hw_format( i += increment) { if (j == hw_points) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; @@ -335,6 +337,8 @@ bool cm3_helper_translate_curve_to_degamma_hw_format( i += increment) { if (j == hw_points - 1) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index dc37dbf870df..fb4814ab3f05 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -3,7 +3,7 @@ # # Makefile for dcn30. -DCN301 = dcn301_dio_link_encoder.o dcn301_panel_cntl.o +DCN301 = dcn301_panel_cntl.o AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile deleted file mode 100644 index a954e316aca2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: MIT -# -# Copyright (C) 2021 Advanced Micro Devices, Inc. All the rights reserved -# -# Authors: AMD -# -# Makefile for dcn303. - -DCN3_03 = dcn303_init.o - -AMD_DAL_DCN3_03 = $(addprefix $(AMDDALPATH)/dc/dcn303/,$(DCN3_03)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_03) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index e2601d0aba41..d510e4652c18 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -5,7 +5,7 @@ # Makefile for dcn31. DCN31 = dcn31_panel_cntl.o \ - dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \ + dcn31_apg.o \ dcn31_afmt.o dcn31_vpg.o AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile deleted file mode 100644 index 15fdcf7c6466..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved. -# -# Makefile for dcn314. - -DCN314 = dcn314_dio_stream_encoder.o - -AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN314) diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/Makefile b/drivers/gpu/drm/amd/display/dc/dcn401/Makefile deleted file mode 100644 index ded1f3140beb..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn401/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved. - -DCN401 += dcn401_dio_link_encoder.o -DCN401 += dcn401_dio_stream_encoder.o -DCN401 += dcn401_mpc.o - -AMD_DAL_DCN401 = $(addprefix $(AMDDALPATH)/dc/dcn401/,$(DCN401)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN401) diff --git a/drivers/gpu/drm/amd/display/dc/dio/Makefile b/drivers/gpu/drm/amd/display/dc/dio/Makefile index 67840e474d7a..0dfd480976f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dio/Makefile @@ -51,6 +51,15 @@ AMD_DAL_DIO_DCN30 = $(addprefix $(AMDDALPATH)/dc/dio/dcn30/,$(DIO_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN30) +############################################################################### +# DCN301 +############################################################################### +DIO_DCN301 = dcn301_dio_link_encoder.o + +AMD_DAL_DIO_DCN301 = $(addprefix $(AMDDALPATH)/dc/dio/dcn301/,$(DIO_DCN301)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN301) + ############################################################################### # DCN31 ############################################################################### @@ -60,6 +69,15 @@ AMD_DAL_DIO_DCN31 = $(addprefix $(AMDDALPATH)/dc/dio/dcn31/,$(DIO_DCN31)) AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN31) +############################################################################### +# DCN314 +############################################################################### +DIO_DCN314 = dcn314_dio_stream_encoder.o + +AMD_DAL_DIO_DCN314 = $(addprefix $(AMDDALPATH)/dc/dio/dcn314/,$(DIO_DCN314)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN314) + ############################################################################### # DCN32 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c rename to drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h rename to drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c rename to drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.h rename to drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c index 05783daa62ac..2ed382a8e79c 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c @@ -23,7 +23,6 @@ * */ - #include "reg_helper.h" #include "core_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c index 6a179e5ab417..6ab2a218b769 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c @@ -22,7 +22,6 @@ * */ - #include "dc_bios_types.h" #include "dcn30/dcn30_dio_stream_encoder.h" #include "dcn314/dcn314_dio_stream_encoder.h" @@ -392,6 +391,14 @@ static void enc35_reset_fifo(struct stream_encoder *enc, bool reset) udelay(10); } +static bool enc35_is_fifo_enabled(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t reset_val; + + REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, &reset_val); + return (reset_val == 0) ? false : true; +} void enc35_disable_fifo(struct stream_encoder *enc) { struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); @@ -415,6 +422,24 @@ void enc35_enable_fifo(struct stream_encoder *enc) REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1); } +static uint32_t enc35_get_pixels_per_cycle(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value; + + REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, &value); + + switch (value) { + case 0: + return 1; + case 1: + return 2; + default: + ASSERT_CRITICAL(false); + return 1; + } +} + static const struct stream_encoder_funcs dcn35_str_enc_funcs = { .dp_set_odm_combine = enc314_dp_set_odm_combine, @@ -465,7 +490,9 @@ static const struct stream_encoder_funcs dcn35_str_enc_funcs = { .set_input_mode = enc314_set_dig_input_mode, .enable_fifo = enc35_enable_fifo, .disable_fifo = enc35_disable_fifo, + .is_fifo_enabled = enc35_is_fifo_enabled, .map_stream_to_link = enc35_stream_encoder_map_to_link, + .get_pixels_per_cycle = enc35_get_pixels_per_cycle, }; void dcn35_dio_stream_encoder_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 34adae7ab6e8..2e4a46f1b499 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -210,4 +210,7 @@ enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link); enum dc_edid_status dm_helpers_get_sbios_edid(struct dc_link *link, struct dc_edid *edid); +bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream); +bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream); + #endif /* __DM_HELPERS__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 8a8efe408a9d..e9fea9c2162e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -1132,7 +1132,8 @@ static void dcn20_adjust_freesync_v_startup( patched_crtc_timing.v_addressable - patched_crtc_timing.v_border_top; - newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start); + /* The newVStartUp is 1 line before vsync point */ + newVstartup = asic_blank_end + 1; *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start); } @@ -1562,6 +1563,8 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc, pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width; pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 255) / 256) * 256; pipes[pipe_cnt].pipe.src.source_format = dm_444_32; + pipes[pipe_cnt].pipe.src.cur0_src_width = 0; + pipes[pipe_cnt].pipe.src.cur1_src_width = 0; pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/ pipes[pipe_cnt].pipe.dest.recout_height = pipes[pipe_cnt].pipe.src.viewport_height; /*vp_height/vratio*/ pipes[pipe_cnt].pipe.dest.full_recout_width = pipes[pipe_cnt].pipe.dest.recout_width; /*when is_hsplit != 1*/ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index 7c56ad0f8812..e7019c95ba79 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index 3d95bfa5aca2..ae5251041728 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 98502a4f0567..9e1c18b90805 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -53,7 +53,7 @@ static void calculate_ttu_cursor( static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9d399c4ce957..6f490d8d7038 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -160,8 +160,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .pct_ideal_sdp_bw_after_urgent = 90.0, .pct_ideal_fabric_bw_after_urgent = 67.0, .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, .pct_ideal_dram_bw_after_urgent_strobe = 67.0, .max_avg_sdp_bw_use_normal_percent = 80.0, .max_avg_fabric_bw_use_normal_percent = 60.0, @@ -871,8 +871,9 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, * and the max of (VBLANK blanking time, MALL region)). */ - if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) + if (drr_timing && + stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && + subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) schedulable = true; return schedulable; @@ -937,7 +938,7 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) subvp_pipe = pipe; } - if (found) { + if (found && subvp_pipe) { phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream); main_timing = &subvp_pipe->stream->timing; phantom_timing = &phantom_stream->timing; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 4297402bdab3..8839faf42207 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -139,8 +139,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .pct_ideal_sdp_bw_after_urgent = 90.0, .pct_ideal_fabric_bw_after_urgent = 67.0, .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, .pct_ideal_dram_bw_after_urgent_strobe = 67.0, .max_avg_sdp_bw_use_normal_percent = 80.0, .max_avg_fabric_bw_use_normal_percent = 60.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 410e4b671228..641a8cd019cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -523,6 +523,7 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned int vupdate_offset; unsigned int vupdate_width; unsigned int vready_offset; + unsigned int pstate_keepout; unsigned char interlaced; double pixel_rate_mhz; unsigned char synchronized_vblank_all_planes; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index dae13f202220..d8bfc85e5dcd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -39,7 +39,7 @@ static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index fea857214c0f..c4378e620cbf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -35,8 +35,6 @@ frame_warn_flag := -Wframe-larger-than=2048 endif endif -# DRIVER_BUILD is mostly used in DML2.1 source -subdir-ccflags-y += -DDRIVER_BUILD=1 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_core subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_mcg/ @@ -81,13 +79,11 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization := $(dml2_ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_ccflags) @@ -104,13 +100,11 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.o : CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_rcflags) @@ -126,13 +120,11 @@ DML21 += src/inc/dml2_debug.o DML21 += src/dml2_core/dml2_core_dcn4.o DML21 += src/dml2_core/dml2_core_factory.o DML21 += src/dml2_core/dml2_core_dcn4_calcs.o -DML21 += src/dml2_core/dml2_core_shared.o DML21 += src/dml2_dpmm/dml2_dpmm_dcn4.o DML21 += src/dml2_dpmm/dml2_dpmm_factory.o DML21 += src/dml2_mcg/dml2_mcg_dcn4.o DML21 += src/dml2_mcg/dml2_mcg_factory.o DML21 += src/dml2_pmo/dml2_pmo_dcn3.o -DML21 += src/dml2_pmo/dml2_pmo_dcn4.o DML21 += src/dml2_pmo/dml2_pmo_factory.o DML21 += src/dml2_pmo/dml2_pmo_dcn4_fams2.o DML21 += src/dml2_standalone_libraries/lib_float_math.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 06387b8b0aee..710a25dcfef0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -31,7 +31,7 @@ static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_ else soc_bb = &dml2_socbb_dcn401; - qos_params = &dml_dcn401_soc_qos_params; + qos_params = &dml_dcn4_variant_a_soc_qos_params; } /* patch soc bb */ @@ -516,7 +516,7 @@ static void populate_dml21_stream_overrides_from_stream_state( if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy || stream->debug.force_odm_combine_segments > 0) stream_desc->overrides.disable_dynamic_odm = true; - stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp; + stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp || stream->hw_cursor_req; } static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode) @@ -725,18 +725,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm const struct scaler_data *scaler_data = get_scaler_data_for_plane(dml_ctx, plane_state, context); struct dc_stream_state *stream = context->streams[stream_index]; - if (stream->cursor_attributes.color_format == CURSOR_MODE_MONO) - plane->cursor.cursor_bpp = 2; - else if (stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_1BIT_AND - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - plane->cursor.cursor_bpp = 32; - } else if (stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED) { - plane->cursor.cursor_bpp = 64; - } else - plane->cursor.cursor_bpp = 32; - + plane->cursor.cursor_bpp = 32; plane->cursor.cursor_width = 256; plane->cursor.num_cursors = 1; @@ -827,6 +816,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { plane->tdlut.setup_for_tdlut = true; + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.layout) { case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: @@ -836,6 +826,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->tdlut.tdlut_addressing_mode = dml2_tdlut_simple_linear; break; } + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.size) { case DC_CM2_GPU_MEM_SIZE_171717: plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube; @@ -844,8 +835,8 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined break; } - } else - plane->tdlut.setup_for_tdlut = false; + } + plane->tdlut.setup_for_tdlut |= dml_ctx->config.force_tdlut_enable; plane->dynamic_meta_data.enable = false; plane->dynamic_meta_data.lines_before_active_required = 0; @@ -949,6 +940,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s int stream_index, plane_index; int disp_cfg_stream_location, disp_cfg_plane_location; struct dml2_display_cfg *dml_dispcfg = &dml_ctx->v21.display_config; + unsigned int plane_count = 0; memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); @@ -958,6 +950,11 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->minimize_det_reallocation = true; dml_dispcfg->overrides.enable_subvp_implicit_pmo = true; + if (in_dc->debug.disable_unbounded_requesting) { + dml_dispcfg->overrides.hw.force_unbounded_requesting.enable = true; + dml_dispcfg->overrides.hw.force_unbounded_requesting.value = false; + } + for (stream_index = 0; stream_index < context->stream_count; stream_index++) { disp_cfg_stream_location = map_stream_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]); @@ -1002,33 +999,39 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->plane_descriptors[disp_cfg_plane_location].overrides.uclk_pstate_change_strategy = dml21_force_pstate_method_to_uclk_state_change_strategy(dml_ctx->config.pmo.force_pstate_method_values[stream_index]); } + + plane_count++; } } } + if (plane_count == 0) { + dml_dispcfg->overrides.all_streams_blanked = true; + } + return true; } void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context) { /* TODO these should be the max of active, svp prefetch and idle should be tracked seperately */ - context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dispclk_khz; - context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.dcfclk_khz; - context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.uclk_khz; - context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.fclk_khz; - context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.idle.uclk_khz; - context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.idle.fclk_khz; - context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.deepsleep_dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dispclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.uclk_khz; + context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.fclk_khz; + context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.uclk_khz; + context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.fclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.deepsleep_dcfclk_khz; context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = in_ctx->v21.mode_programming.programming->fclk_pstate_supported; context->bw_ctx.bw.dcn.clk.p_state_change_support = in_ctx->v21.mode_programming.programming->uclk_pstate_supported; - context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dtbrefclk_khz > 0; - context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dtbrefclk_khz; + context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0; + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz; } void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) { struct dml2_core_internal_display_mode_lib *mode_lib = &in_ctx->v21.dml_init.dml2_instance->core_instance.clean_me_up.mode_lib; - double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;; + double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; if (reg_set_idx >= DML2_DCHUB_WATERMARK_SET_NUM) { /* invalid register set index */ @@ -1053,16 +1056,16 @@ static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_wat switch (wm_index) { case DML2_DCHUB_WATERMARK_SET_A: - wm_regs = &watermarks->dcn4.a; + wm_regs = &watermarks->dcn4x.a; break; case DML2_DCHUB_WATERMARK_SET_B: - wm_regs = &watermarks->dcn4.b; + wm_regs = &watermarks->dcn4x.b; break; case DML2_DCHUB_WATERMARK_SET_C: - wm_regs = &watermarks->dcn4.c; + wm_regs = &watermarks->dcn4x.c; break; case DML2_DCHUB_WATERMARK_SET_D: - wm_regs = &watermarks->dcn4.d; + wm_regs = &watermarks->dcn4x.d; break; case DML2_DCHUB_WATERMARK_SET_NUM: default: @@ -1110,10 +1113,11 @@ void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_ global_sync = &stream_programming->phantom_stream.global_sync; } - pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4.vstartup_lines; - pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4.vupdate_offset_pixels; - pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4.vupdate_vupdate_width_pixels; - pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4.vready_offset_pixels; + pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4x.vstartup_lines; + pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4x.vupdate_offset_pixels; + pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4x.vupdate_vupdate_width_pixels; + pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4x.vready_offset_pixels; + pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4x.pstate_keepout_start_lines; pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; @@ -1164,3 +1168,37 @@ void dml21_get_pipe_mcache_config( mcache_pipe_config->plane1_enabled = dml21_is_plane1_enabled(pln_prog->plane_descriptor->pixel_format); } + +void dml21_set_dc_p_state_type( + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming, + bool sub_vp_enabled) +{ + switch (stream_programming->uclk_pstate_method) { + case dml2_uclk_pstate_support_method_vactive: + case dml2_uclk_pstate_support_method_fw_vactive_drr: + pipe_ctx->p_state_type = P_STATE_V_ACTIVE; + break; + case dml2_uclk_pstate_support_method_vblank: + case dml2_uclk_pstate_support_method_fw_vblank_drr: + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_V_BLANK; + break; + case dml2_uclk_pstate_support_method_fw_subvp_phantom: + case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: + pipe_ctx->p_state_type = P_STATE_SUB_VP; + break; + case dml2_uclk_pstate_support_method_fw_drr: + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_FPO; + break; + default: + pipe_ctx->p_state_type = P_STATE_UNKNOWN; + break; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h index 4cc0a1fbb93d..476a7f6e4875 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h @@ -26,4 +26,5 @@ void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_water void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); void dml21_map_hw_resources(struct dml2_context *dml_ctx); void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); +void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming, bool sub_vp_enabled); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index d276458e50fd..51d491bffa32 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -11,7 +11,6 @@ #include "dml2_core_dcn4_calcs.h" - int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id) { int i; @@ -280,6 +279,23 @@ bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) dc_is_dp_signal(pipe_ctx->stream->signal)); } + +static bool is_sub_vp_enabled(struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_paired_subvp_stream(context, pipe_ctx->stream) && + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { + return true; + } + } + return false; +} + + void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_per_stream_programming *stream_prog) { @@ -310,12 +326,16 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64; } - pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4.dppclk_khz; + pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4x.dppclk_khz; if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipe_ctx->plane_res.bw.dppclk_khz) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipe_ctx->plane_res.bw.dppclk_khz; dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx); memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[pipe_ctx->pipe_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); + + bool sub_vp_enabled = is_sub_vp_enabled(pipe_ctx->stream->ctx->dc, context); + + dml21_set_dc_p_state_type(pipe_ctx, stream_prog, sub_vp_enabled); } static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx, @@ -459,94 +479,103 @@ void dml21_build_fams2_programming(const struct dc *dc, struct dml2_context *dml_ctx) { int i, j, k; + unsigned int num_fams2_streams = 0; /* reset fams2 data */ - context->bw_ctx.bw.dcn.fams2_stream_count = 0; memset(&context->bw_ctx.bw.dcn.fams2_stream_params, 0, sizeof(struct dmub_fams2_stream_static_state) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config)); - if (!dml_ctx->v21.mode_programming.programming->fams2_required) - return; + if (dml_ctx->v21.mode_programming.programming->fams2_required) { + for (i = 0; i < context->stream_count; i++) { + int dml_stream_idx; + struct dc_stream_state *phantom_stream; + struct dc_stream_status *phantom_status; - for (i = 0; i < context->stream_count; i++) { - int dml_stream_idx; - struct dc_stream_state *phantom_stream; - struct dc_stream_status *phantom_status; + struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[num_fams2_streams]; - struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[context->bw_ctx.bw.dcn.fams2_stream_count]; + struct dc_stream_state *stream = context->streams[i]; - struct dc_stream_state *stream = context->streams[i]; - - if (context->stream_status[i].plane_count == 0 || - dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { - /* can ignore blanked or phantom streams */ - continue; - } - - dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); - if (dml_stream_idx < 0) { - ASSERT(dml_stream_idx >= 0); - continue; - } - - /* copy static state from PMO */ - memcpy(static_state, - &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params, - sizeof(struct dmub_fams2_stream_static_state)); - - /* get information from context */ - static_state->num_planes = context->stream_status[i].plane_count; - static_state->otg_inst = context->stream_status[i].primary_otg_inst; - - /* populate pipe masks for planes */ - for (j = 0; j < context->stream_status[i].plane_count; j++) { - for (k = 0; k < dc->res_pool->pipe_count; k++) { - if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { - static_state->pipe_mask |= (1 << k); - static_state->plane_pipe_masks[j] |= (1 << k); - } + if (context->stream_status[i].plane_count == 0 || + dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { + /* can ignore blanked or phantom streams */ + continue; } - } - /* get per method programming */ - switch (static_state->type) { - case FAMS2_STREAM_TYPE_VBLANK: - case FAMS2_STREAM_TYPE_VACTIVE: - case FAMS2_STREAM_TYPE_DRR: - break; - case FAMS2_STREAM_TYPE_SUBVP: - phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); - if (!phantom_stream) - break; + dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); + if (dml_stream_idx < 0) { + ASSERT(dml_stream_idx >= 0); + continue; + } - phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); + /* copy static state from PMO */ + memcpy(static_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params, + sizeof(struct dmub_fams2_stream_static_state)); - /* phantom status should always be present */ - ASSERT(phantom_status); - static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + /* get information from context */ + static_state->num_planes = context->stream_status[i].plane_count; + static_state->otg_inst = context->stream_status[i].primary_otg_inst; - /* populate pipe masks for phantom planes */ - for (j = 0; j < phantom_status->plane_count; j++) { + /* populate pipe masks for planes */ + for (j = 0; j < context->stream_status[i].plane_count; j++) { for (k = 0; k < dc->res_pool->pipe_count; k++) { if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { - static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k); - static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { + static_state->pipe_mask |= (1 << k); + static_state->plane_pipe_masks[j] |= (1 << k); } } } - break; - default: - ASSERT(false); - break; - } - context->bw_ctx.bw.dcn.fams2_stream_count++; + /* get per method programming */ + switch (static_state->type) { + case FAMS2_STREAM_TYPE_VBLANK: + case FAMS2_STREAM_TYPE_VACTIVE: + case FAMS2_STREAM_TYPE_DRR: + break; + case FAMS2_STREAM_TYPE_SUBVP: + phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); + if (!phantom_stream) + break; + + phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); + + /* phantom status should always be present */ + ASSERT(phantom_status); + static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + + /* populate pipe masks for phantom planes */ + for (j = 0; j < phantom_status->plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { + static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k); + static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + } + } + } + break; + default: + ASSERT(false); + break; + } + + num_fams2_streams++; + } } - context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_stream_count > 0; + if (num_fams2_streams > 0) { + /* copy FAMS2 configuration */ + memcpy(&context->bw_ctx.bw.dcn.fams2_global_config, + &dml_ctx->v21.mode_programming.programming->fams2_global_config, + sizeof(struct dmub_cmd_fams2_global_config)); + + context->bw_ctx.bw.dcn.fams2_global_config.num_streams = num_fams2_streams; + } + + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; } bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 41ecf00ed196..d35dd507cb9f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -66,7 +66,9 @@ static void dml21_apply_debug_options(const struct dc *in_dc, struct dml2_contex disable_fams2; pmo_options->disable_fams2 = disable_fams2; - pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming; + pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE || + in_dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY; + pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE; } static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h index 521f77b8ac44..d82c681a5402 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h @@ -72,7 +72,7 @@ static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -128,7 +128,7 @@ static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }; static const struct dml2_soc_bb dml2_socbb_dcn31 = { @@ -228,7 +228,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn31 = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -332,7 +332,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn31 = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }, .power_management_parameters = { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index fe07fcc3d0d5..8ef7977841de 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -8,7 +8,7 @@ #include "dml_top_soc_parameter_types.h" -static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { +static const struct dml2_soc_qos_parameters dml_dcn4_variant_a_soc_qos_params = { .derate_table = { .system_active_urgent = { .dram_derate_percent_pixel = 22, @@ -52,7 +52,7 @@ static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -78,7 +78,7 @@ static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }; static const struct dml2_soc_bb dml2_socbb_dcn401 = { @@ -178,7 +178,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn401 = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -282,7 +282,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn401 = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }, .power_management_parameters = { @@ -344,6 +344,9 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .config_return_buffer_segment_size_in_kbytes = 64, .meta_fifo_size_in_kentries = 22, .compressed_buffer_segment_size_in_kbytes = 64, + .max_flip_time_us = 80, + .max_flip_time_lines = 32, + .hostvm_mode = 0, .subvp_drr_scheduling_margin_us = 100, .subvp_prefetch_end_to_mall_start_us = 15, .subvp_fw_processing_delay = 15, @@ -351,14 +354,18 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .fams2 = { .max_allow_delay_us = 100 * 1000, - .scheduling_delay_us = 50, - .vertical_interrupt_ack_delay_us = 18, + .scheduling_delay_us = 125, + .vertical_interrupt_ack_delay_us = 40, .allow_programming_delay_us = 18, .min_allow_width_us = 20, .subvp_df_throttle_delay_us = 100, - .subvp_programming_delay_us = 18, + .subvp_programming_delay_us = 200, .subvp_prefetch_to_mall_delay_us = 18, - .drr_programming_delay_us = 18, + .drr_programming_delay_us = 35, + + .lock_timeout_us = 5000, + .recovery_timeout_us = 5000, + .flip_programming_delay_us = 300, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h index a25f4e5977cf..a64ec4dcf11a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_H__ #define __DML_TOP_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h index 8247289ce7d3..83fc15bf13cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __dml2_TOP_DCHUB_REGISTERS_H__ #define __dml2_TOP_DCHUB_REGISTERS_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index daae77f2672b..b132f676a68d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_DISPLAY_CFG_TYPES_H__ #define __DML_TOP_DISPLAY_CFG_TYPES_H__ @@ -411,7 +410,6 @@ struct dml2_stream_parameters { enum dml2_odm_mode odm_mode; bool disable_dynamic_odm; bool disable_subvp; - bool disable_fams2_drr; int minimum_vblank_idle_requirement_us; bool minimize_active_latency_hiding; @@ -478,6 +476,7 @@ struct dml2_display_cfg { bool max_outstanding_when_urgent_expected_disable; bool enable_subvp_implicit_pmo; //enables PMO to switch pipe uclk strategy to subvp, and generate phantom programming unsigned int best_effort_min_active_latency_hiding_us; + bool all_streams_blanked; } overrides; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h index 2f444f448770..8f624a912e78 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_POLICY_TYPES_H__ #define __DML_TOP_POLICY_TYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 065b2afab6fb..ebd8abe894a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_SOC_PARAMETER_TYPES_H__ #define __DML_TOP_SOC_PARAMETER_TYPES_H__ @@ -27,7 +26,7 @@ struct dml2_soc_derates { struct dml2_soc_derate_values system_idle_average; }; -struct dml2_dcn3_soc_qos_params { +struct dml2_dcn32x_soc_qos_params { struct { unsigned int base_latency_us; unsigned int base_latency_pixel_vm_us; @@ -53,7 +52,7 @@ struct dml2_dcn4_uclk_dpm_dependent_qos_params { unsigned int average_latency_when_non_urgent_uclk_cycles; }; -struct dml2_dcn4_soc_qos_params { +struct dml2_dcn4x_soc_qos_params { unsigned int df_qos_response_time_fclk_cycles; unsigned int max_round_trip_to_furthest_cs_fclk_cycles; unsigned int mall_overhead_fclk_cycles; @@ -69,7 +68,7 @@ struct dml2_dcn4_soc_qos_params { enum dml2_qos_param_type { dml2_qos_param_type_dcn3, - dml2_qos_param_type_dcn4 + dml2_qos_param_type_dcn4x }; struct dml2_soc_qos_parameters { @@ -81,8 +80,8 @@ struct dml2_soc_qos_parameters { } writeback; union { - struct dml2_dcn3_soc_qos_params dcn3; - struct dml2_dcn4_soc_qos_params dcn4; + struct dml2_dcn32x_soc_qos_params dcn32x; + struct dml2_dcn4x_soc_qos_params dcn4x; } qos_params; enum dml2_qos_param_type qos_type; @@ -152,6 +151,7 @@ struct dml2_soc_bb { double phy_downspread_percent; double dcn_downspread_percent; double dispclk_dppclk_vco_speed_mhz; + bool no_dfs; bool do_urgent_latency_adjustment; unsigned int mem_word_bytes; unsigned int num_dcc_mcaches; @@ -173,6 +173,7 @@ struct dml2_ip_capabilities { unsigned int meta_fifo_size_in_kentries; unsigned int compressed_buffer_segment_size_in_kbytes; unsigned int max_flip_time_us; + unsigned int max_flip_time_lines; unsigned int hostvm_mode; unsigned int subvp_drr_scheduling_margin_us; unsigned int subvp_prefetch_end_to_mall_start_us; @@ -190,6 +191,10 @@ struct dml2_ip_capabilities { unsigned int subvp_programming_delay_us; unsigned int subvp_prefetch_to_mall_delay_us; unsigned int drr_programming_delay_us; + + unsigned int lock_timeout_us; + unsigned int recovery_timeout_us; + unsigned int flip_programming_delay_us; } fams2; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index 8aa77bb190ea..1c773bbb9992 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -74,6 +74,7 @@ struct dml2_pmo_options { bool disable_drr_var; bool disable_drr_clamped; bool disable_drr_var_when_var_active; + bool disable_drr_clamped_when_var_active; bool disable_fams2; bool disable_vactive_det_fill_bw_pad; /* dml2_project_dcn4x_stage2_auto_drr_svp and above only */ bool disable_dyn_odm; @@ -228,7 +229,7 @@ struct dml2_per_plane_programming { union { struct { unsigned long dppclk_khz; - } dcn4; + } dcn4x; } min_clocks; struct dml2_mcache_surface_allocation mcache_allocation; @@ -262,7 +263,8 @@ union dml2_global_sync_programming { unsigned int vupdate_offset_pixels; unsigned int vupdate_vupdate_width_pixels; unsigned int vready_offset_pixels; - } dcn4; + unsigned int pstate_keepout_start_lines; + } dcn4x; }; struct dml2_per_stream_programming { @@ -273,7 +275,7 @@ struct dml2_per_stream_programming { unsigned long dscclk_khz; unsigned long dtbclk_khz; unsigned long phyclk_khz; - } dcn4; + } dcn4x; } min_clocks; union dml2_global_sync_programming global_sync; @@ -374,7 +376,7 @@ struct dml2_display_cfg_programming { unsigned long dispclk_khz; unsigned long dcfclk_deepsleep_khz; unsigned long dpp_ref_khz; - } dcn3; + } dcn32x; struct { struct { unsigned long uclk_khz; @@ -403,7 +405,7 @@ struct dml2_display_cfg_programming { uint32_t dpprefclk_did; uint32_t dtbrefclk_did; } divider_ids; - } dcn4; + } dcn4x; } min_clocks; bool uclk_pstate_supported; @@ -411,6 +413,7 @@ struct dml2_display_cfg_programming { /* indicates this configuration requires FW to support */ bool fams2_required; + struct dmub_cmd_fams2_global_config fams2_global_config; struct { bool supported_in_blank; // Changing to configurations where this is false requires stutter to be disabled during the transition diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 04edcde423a9..0aa4e4d343b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_internal_shared_types.h" #include "dml2_core_shared_types.h" #include "dml2_core_dcn4.h" @@ -10,7 +9,7 @@ #include "dml2_debug.h" #include "lib_float_math.h" -struct dml2_core_ip_params core_dcn4_ip_caps_base = { +static const struct dml2_core_ip_params core_dcn4_ip_caps_base = { // Hardcoded values for DCN3x .vblank_nom_default_us = 668, .remote_iommu_outstanding_translations = 256, @@ -70,6 +69,7 @@ struct dml2_core_ip_params core_dcn4_ip_caps_base = { .max_num_dp2p0_streams = 4, .imall_supported = 1, .max_flip_time_us = 80, + .max_flip_time_lines = 32, .words_per_channel = 16, .subvp_fw_processing_delay_us = 15, @@ -77,84 +77,6 @@ struct dml2_core_ip_params core_dcn4_ip_caps_base = { .subvp_swath_height_margin_lines = 16, }; -struct dml2_core_ip_params core_dcn4sw_ip_caps_base = { - .vblank_nom_default_us = 668, - .remote_iommu_outstanding_translations = 256, - .rob_buffer_size_kbytes = 192, - .config_return_buffer_size_in_kbytes = 1280, - .config_return_buffer_segment_size_in_kbytes = 64, - .compressed_buffer_segment_size_in_kbytes = 64, - .dpte_buffer_size_in_pte_reqs_luma = 68, - .dpte_buffer_size_in_pte_reqs_chroma = 36, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, - .min_pixel_chunk_size_bytes = 1024, - .writeback_chunk_size_kbytes = 8, - .line_buffer_size_bits = 1171920, - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - - //Number of pipes after DCN Pipe harvesting - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dispclk_ramp_margin_percent = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .cursor_buffer_size = 24, - .cursor_chunk_size = 2, - .dispclk_delay_subtotal = 125, - .max_inter_dcn_tile_repeaters = 8, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .writeback_line_buffer_buffer_size = 0, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 5760, - .dsc422_native_support = true, - .dcc_supported = true, - .ptoi_supported = false, - - .cursor_64bpp_support = true, - .dynamic_metadata_vm_enabled = false, - - .max_num_hdmi_frl_outputs = 1, - .max_num_dp2p0_outputs = 4, - .max_num_dp2p0_streams = 4, - .imall_supported = 1, - .max_flip_time_us = 80, - .words_per_channel = 16, - - .subvp_fw_processing_delay_us = 15, - .subvp_pstate_allow_width_us = 20, - .subvp_swath_height_margin_lines = 16, - - .dcn_mrq_present = 1, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_zs = 64, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - - .dchub_arb_to_ret_delay = 102, - .hostvm_mode = 1, -}; - static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *ip_caps, const struct dml2_core_ip_params *ip_params) { ip_caps->pipe_count = ip_params->max_num_dpp; @@ -169,6 +91,7 @@ static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *i ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries; ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes; ip_caps->max_flip_time_us = ip_params->max_flip_time_us; + ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines; ip_caps->hostvm_mode = ip_params->hostvm_mode; // FIXME_STAGE2: cleanup after adding all dv override to ip_caps @@ -192,6 +115,7 @@ static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries; ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes; ip_params->max_flip_time_us = ip_caps->max_flip_time_us; + ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines; ip_params->hostvm_mode = ip_caps->hostvm_mode; } @@ -222,6 +146,7 @@ bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out) } memcpy(&core->clean_me_up.mode_lib.soc, in_out->soc_bb, sizeof(struct dml2_soc_bb)); + memcpy(&core->clean_me_up.mode_lib.ip_caps, in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); return true; } @@ -246,10 +171,12 @@ static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *p phantom->stream_index = phantom_stream_index; phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; - phantom->composition.viewport.plane0.height = (long int unsigned) math_ceil2( - (double)phantom->composition.viewport.plane0.height * (double)phantom_stream->timing.v_active / (double)main_stream->timing.v_active, 16.0); - phantom->composition.viewport.plane1.height = (long int unsigned) math_ceil2( - (double)phantom->composition.viewport.plane1.height * (double)phantom_stream->timing.v_active / (double)main_stream->timing.v_active, 16.0); + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); phantom->immediate_flip = false; phantom->dynamic_meta_data.enable = false; phantom->cursor.num_cursors = 0; @@ -344,6 +271,8 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in // Check if FAMS2 is required if (display_cfg->stage3.performed && display_cfg->stage3.success) { programming->fams2_required = display_cfg->stage3.fams2_required; + + dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); } // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) @@ -621,7 +550,7 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out l->mode_programming_ex_params.min_clk_table = in_out->instance->minimum_clock_table; l->mode_programming_ex_params.cfg_support_info = in_out->cfg_support_info; l->mode_programming_ex_params.programming = in_out->programming; - l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4.active.uclk_khz, + l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4x.active.uclk_khz, &core->clean_me_up.mode_lib.soc); result = dml2_core_calcs_mode_programming_ex(&l->mode_programming_ex_params); @@ -641,20 +570,20 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out for (plane_index = 0; plane_index < in_out->programming->display_config.num_planes; plane_index++) { in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; - if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else { - if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive; - else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank; - else - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; - } + if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else { + if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive; + else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank; + else + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; + } dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h index 235280c6dcf5..e62b2d3eeee6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_DCN4_H__ #define __DML2_CORE_DCN4_H__ bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 6f4026e396e0..805fd783131f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -8,35 +8,55 @@ #include "dml2_debug.h" #include "lib_float_math.h" #include "dml_top_types.h" -#include "dml2_core_shared.h" -#define DML_VM_PTE_ADL_PATCH_EN -//#define DML_TVM_UPDATE_EN -#define DML_TDLUT_ROW_BYTES_FIX_EN -#define DML_REG_LIMIT_CLAMP_EN #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 -static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + +static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) +{ + *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); + return dividend / divisor; +} + +static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) { dml2_printf("DML: ===================================== \n"); dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); - if (!fail_only || support->ImmediateFlipSupport == 0) - dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); - if (!fail_only || support->WritebackLatencySupport == 0) - dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); if (!fail_only || support->ScaleRatioAndTapsSupport == 0) dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); - if (!fail_only || support->P2IWith420 == 1) - dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); - if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1) - dml2_printf("DML: support: DSCOnlyIfNecessaryWithBPP = %d\n", support->DSCOnlyIfNecessaryWithBPP); - if (!fail_only || support->DSC422NativeNotSupported == 1) - dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); - if (!fail_only || support->DSCSlicesODMModeSupported == 0) - dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) @@ -45,74 +65,87 @@ static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mod dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); if (!fail_only || support->MultistreamWithHDMIOreDP == 1) dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); if (!fail_only || support->NotEnoughLanesForMSO == 1) dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); - if (!fail_only || support->NumberOfOTGSupport == 0) - dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); - if (!fail_only || support->NumberOfHDMIFRLSupport == 0) - dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); - if (!fail_only || support->NumberOfDP2p0Support == 0) - dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); - if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); - if (!fail_only || support->CursorSupport == 0) - dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); - if (!fail_only || support->PitchSupport == 0) - dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); - if (!fail_only || support->ViewportExceedsSurface == 1) - dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); - if (!fail_only || support->ExceededMALLSize == 1) - dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); - if (!fail_only || support->EnoughWritebackUnits == 0) - dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); - if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); - if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); - if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); - if (!fail_only || support->ExceededMultistreamSlots == 1) - dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); if (!fail_only || support->NotEnoughDSCUnits == 1) dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); if (!fail_only || support->NotEnoughDSCSlices == 1) dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); - if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); - if (!fail_only || support->LinkCapacitySupport == 0) - dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); if (!fail_only || support->ROBSupport == 0) dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); if (!fail_only || support->OutstandingRequestsSupport == 0) dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); - if (!fail_only || support->PTEBufferSizeNotExceeded == 0) - dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); - if (!fail_only || support->AvgBandwidthSupport == 0) - dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); - if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) - dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); - if (!fail_only || support->PrefetchSupported == 0) - dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); - if (!fail_only || support->DynamicMetadataSupported == 0) - dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); - if (!fail_only || support->VRatioInPrefetchSupported == 0) - dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); if (!fail_only || support->TotalAvailablePipesSupport == 0) dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + if (!fail_only || support->ModeSupport == 0) dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); - if (!fail_only || support->ViewportSizeSupport == 0) - dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); dml2_printf("DML: ===================================== \n"); } @@ -235,6 +268,7 @@ dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStart dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix); dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix); dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix); +dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines); dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY); dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC); dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte); @@ -480,7 +514,7 @@ static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode default: DML2_ASSERT(0); return 256; - }; + } } static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan) @@ -2343,16 +2377,16 @@ static void calculate_mcache_row_bytes( } if (p->gpuvm_enable) { - meta_per_mvmpg_per_channel = (float)vmpg_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic if (p->surf_vert && vmpg_bytes > blk_bytes) { - meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans; } *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom } else { - meta_per_mvmpg_per_channel = (float) blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans; if (!p->surf_vert) *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; @@ -2851,16 +2885,9 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels); for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->display_cfg->hostvm_enable == true) { + if (p->display_cfg->gpuvm_enable == true) { p->vm_group_bytes[k] = 512; p->dpte_group_bytes[k] = 512; - } else if (p->display_cfg->gpuvm_enable == true) { - p->vm_group_bytes[k] = 2048; - if (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes >= 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { - p->dpte_group_bytes[k] = 512; - } else { - p->dpte_group_bytes[k] = 2048; - } } else { p->vm_group_bytes[k] = 0; p->dpte_group_bytes[k] = 0; @@ -3185,7 +3212,7 @@ static double CalculateUrgentLatency( double fabric_max_transport_latency_margin) { double urgent_latency = 0; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); @@ -3196,7 +3223,7 @@ static double CalculateUrgentLatency( } } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); @@ -3226,7 +3253,7 @@ static double CalculateTripToMemory( double fabric_max_transport_latency_margin) { double trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); @@ -3235,7 +3262,7 @@ static double CalculateTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); @@ -3265,7 +3292,7 @@ static double CalculateMetaTripToMemory( double fabric_max_transport_latency_margin) { double meta_trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); } else { @@ -3273,7 +3300,7 @@ static double CalculateMetaTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); @@ -3781,8 +3808,8 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; } if (p->SwathHeightC[k] == 0) @@ -3841,7 +3868,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64; if (*p->UnboundedRequestEnabled) { *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, - (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / 64)), 1.0); + (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); @@ -3852,21 +3879,20 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch #endif *p->hw_debug5 = false; - if (!p->mrq_present) { - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (!(*p->UnboundedRequestEnabled) - && p->display_cfg->plane_descriptors[k].surface.dcc.enable - && ((p->rob_buffer_size_kbytes * 1024 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) - *p->hw_debug5 = true; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) + && p->display_cfg->plane_descriptors[k].surface.dcc.enable + && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1) + + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) + *p->hw_debug5 = true; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); - dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); - dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); - dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); - dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); - dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); + dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); + dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); + dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); + dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); + dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); #endif - } } } @@ -4559,15 +4585,6 @@ static void calculate_tdlut_setting( return; } - - if (!p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = 0; - *p->tdlut_opt_time = 0; - *p->tdlut_drain_time = 0; - *p->tdlut_bytes_per_group = 0; - return; - } - if (p->tdlut_mpc_width_flag) { tdlut_mpc_width = 33; tdlut_bytes_per_group_simple = 39*256; @@ -4616,7 +4633,7 @@ static void calculate_tdlut_setting( *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; //the delivery cycles is DispClk cycles per line * number of lines * number of slices tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; - tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / 9.0; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); } else { //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); @@ -4627,7 +4644,7 @@ static void calculate_tdlut_setting( //the tdlut is fetched during the 2 row times of prefetch. if (p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2(*p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); + *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; } @@ -4640,7 +4657,7 @@ static void calculate_tdlut_setting( dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); - dml2_printf("DML::%s: tdlut_addressing_mode = %u\n", __func__, p->tdlut_addressing_mode); + dml2_printf("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); @@ -4706,11 +4723,12 @@ static void CalculateTarb( static double CalculateTWait( long reserved_vblank_time_ns, double UrgentLatency, - double Ttrip) + double Ttrip, + double g6_temp_read_blackout_us) { double TWait; double t_urg_trip = math_max2(UrgentLatency, Ttrip); - TWait = reserved_vblank_time_ns/1000.0 + t_urg_trip; + TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns); @@ -4858,13 +4876,23 @@ static double get_urgent_bandwidth_required( } if (!exclude_this_plane) { - surface_required_bw[k] = math_max4(NumberOfDPP[k] * prefetch_vmrow_bw[k], - l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur, - l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre, - (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]); + l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; + l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; + l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; + surface_required_bw[k] = math_max4(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw); /* export peak required bandwidth for the surface */ surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); + dml2_printf("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); + dml2_printf("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); + dml2_printf("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); + dml2_printf("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); + dml2_printf("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); +#endif } else { surface_required_bw[k] = 0.0; } @@ -4873,6 +4901,8 @@ static double get_urgent_bandwidth_required( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); + dml2_printf("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); + dml2_printf("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); @@ -4886,6 +4916,8 @@ static double get_urgent_bandwidth_required( dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); + dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); @@ -4964,7 +4996,7 @@ static void CalculateExtraLatency( max_request_size_bytes = request_size_bytes_chroma[k]; } - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; *ExtraLatency = *ExtraLatency_sr; if (max_oustanding_when_urgent_expected) @@ -4980,11 +5012,14 @@ static void CalculateExtraLatency( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); + dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips); dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected); dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); @@ -5037,7 +5072,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->bytes_pp = 0.0; s->dep_bytes = 0.0; s->min_Lsw_oto = 0.0; + s->min_Lsw_equ = 0.0; s->Tsw_est1 = 0.0; + s->Tsw_est2 = 0.0; s->Tsw_est3 = 0.0; s->cursor_prefetch_bytes = 0; *p->prefetch_cursor_bw = 0; @@ -5059,7 +5096,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup); - dml2_printf("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup); dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); @@ -5092,21 +5128,15 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; s->trip_to_mem = p->Ttrip; -#ifdef DML_TVM_UPDATE_EN *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg); if (dcc_mrq_enable) *p->Tvm_trips_flip = *p->Tvm_trips; else *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; -#else - *p->Tvm_trips = p->ExtraLatencyPrefetch + s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)); - *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; -#endif *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2); -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataVMEnabled == true) { *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; @@ -5114,15 +5144,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->Tdmdl_vm = 0; *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex } -#else - if (p->DynamicMetadataVMEnabled == true) { - *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; - *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; - } else { - *p->Tdmdl_vm = 0; - *p->Tdmdl = p->TWait + p->ExtraLatencyPrefetch; // Tex - } -#endif if (p->DynamicMetadataEnable == true) { if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { @@ -5186,7 +5207,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); #endif - s->NoTimeToPrefetch = false; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); @@ -5199,14 +5219,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; } else { -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut) s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0); else - s->Tvm_trips_rounded = s->LineTime / 4.0; -#else - s->Tvm_trips_rounded = s->LineTime / 4.0; -#endif + s->Tvm_trips_rounded = s->LineTime / 4.0; *p->Tvm_trips_flip_rounded = s->LineTime / 4.0; } @@ -5235,16 +5251,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->Tno_bw = 0; } -#ifdef DML_TVM_UPDATE_EN if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3) *p->Tno_bw_flip = *p->Tno_bw; else *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip -#else - *p->Tno_bw_flip = 0; - if (p->display_cfg->gpuvm_enable == true) - *p->Tno_bw_flip = *p->Tno_bw; -#endif if (dml_is_420(p->myPipe->SourcePixelFormat)) { s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0; @@ -5258,32 +5268,28 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; -#ifdef DML_TDLUT_ROW_BYTES_FIX_EN s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; -#endif s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); + vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); if (p->setup_for_tdlut) vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); -#ifdef DML_TDLUT_ROW_BYTES_FIX_EN tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); -#else - tdlut_row_bytes = p->tdlut_pte_bytes_per_frame; -#endif -#ifdef DML_REG_LIMIT_CLAMP_EN s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); -#endif s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; if (p->display_cfg->gpuvm_enable == true) { @@ -5297,11 +5303,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); #endif } else { -#ifdef DML_TVM_UPDATE_EN s->Tvm_oto = s->Tvm_trips_rounded; -#else - s->Tvm_oto = s->LineTime / 4.0; -#endif } if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { @@ -5325,19 +5327,16 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); //Tpre_equ in line time -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; else s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; -#else - s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(s->TWait_p + p->TCalc, *p->Tdmdl - p->Ttrip)) / s->LineTime - Lo; -#endif s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); @@ -5375,6 +5374,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; +#ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime); dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup); @@ -5395,18 +5395,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); - - s->dep_bytes = math_max2(vm_bytes * p->HostVMInefficiencyFactor, p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); - - dml2_printf("DML::%s: dep_bytes: %f\n", __func__, s->dep_bytes); - dml2_printf("DML::%s: prefetch_sw_bytes: %f\n", __func__, s->prefetch_sw_bytes); dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); - - if (s->prefetch_sw_bytes < s->dep_bytes) { - s->prefetch_sw_bytes = 2 * s->dep_bytes; - dml2_printf("DML::%s: bump prefetch_sw_bytes to %f\n", __func__, s->prefetch_sw_bytes); - } + dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre)); + dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); +#endif *p->dst_y_per_vm_vblank = 0; *p->dst_y_per_row_vblank = 0; @@ -5419,7 +5413,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time // So that means prefetch bw calculated can be higher since the total time availabe for prefetch is less - if (s->dst_y_prefetch_equ > 1) { + bool min_Lsw_equ_ok = s->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; + + if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok) { s->prefetch_bw1 = 0.; s->prefetch_bw2 = 0.; s->prefetch_bw3 = 0.; @@ -5436,28 +5432,35 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw1 = 0; dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); - if ((p->VStartup == p->MaxVStartup) && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { + if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / - (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); + (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, s->Tpre_rounded); - dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_oto * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); - dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); + dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); #endif } // prefetch_bw2: VM + SW - if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0) + if (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / - (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded); - else + (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); + s->Tsw_est2 = s->prefetch_sw_bytes / s->prefetch_bw2; + } else s->prefetch_bw2 = 0; + dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); + if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { + s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); + dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); + } + // prefetch_bw3: 2*R0 + SW if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) / @@ -5467,8 +5470,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw3 = 0; dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); - if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && ((s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { - s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); } @@ -5484,6 +5487,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + dml2_printf("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); @@ -5504,9 +5508,18 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // here is to make sure equ bw wont be more agressive than the latency-based requirement. // check vm time >= vm_trips // check r0 time >= r0_trips + + double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); + + dml2_printf("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); + dml2_printf("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); + if (s->prefetch_bw1 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1 >= s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw1 >= s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1; + double row_transfer_time = total_row_bytes / s->prefetch_bw1; + dml2_printf("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case1OK = true; } } @@ -5516,8 +5529,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // check vm time >= vm_trips // check r0 time < r0_trips if (s->prefetch_bw2 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2 >= s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw2 < s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2; + double row_transfer_time = total_row_bytes / s->prefetch_bw2; + dml2_printf("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) { Case2OK = true; } } @@ -5526,8 +5542,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // check vm time < vm_trips // check r0 time >= r0_trips if (s->prefetch_bw3 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3 < s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw3 >= s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3; + double row_transfer_time = total_row_bytes / s->prefetch_bw3; + dml2_printf("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case3OK = true; } } @@ -5542,11 +5561,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw_equ = s->prefetch_bw4; } -#ifdef DML_REG_LIMIT_CLAMP_EN s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ, p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); -#endif #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK); dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK); @@ -5595,13 +5612,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->TimeForFetchingVM = s->Tvm_equ; s->TimeForFetchingRowInVBlank = s->Tr0_equ; - if (p->VStartup == p->MaxVStartup) { - *p->dst_y_per_vm_vblank = math_floor2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_floor2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } else { - *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); #endif @@ -5645,7 +5658,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); *p->VRatioPrefetchY = 0; } #ifdef __DML_VBA_DEBUG__ @@ -5668,7 +5681,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); *p->VRatioPrefetchC = 0; } #ifdef __DML_VBA_DEBUG__ @@ -5690,14 +5703,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch #endif } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); - dml2_printf("DML::%s: MyErr set, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); + dml2_printf("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); + dml2_printf("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); *p->VRatioPrefetchY = 0; *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; } - dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); @@ -5708,7 +5720,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); } else { - dml2_printf("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", + __func__, min_Lsw_equ_ok, s->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); s->NoTimeToPrefetch = true; s->TimeForFetchingVM = 0; s->TimeForFetchingRowInVBlank = 0; @@ -5740,7 +5754,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { prefetch_vm_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); + dml2_printf("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); } if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { @@ -5756,7 +5770,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { prefetch_row_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); } *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); @@ -5773,11 +5787,16 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; + *p->prefetch_vmrow_bw = 0; } dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); + return s->NoTimeToPrefetch; } @@ -6169,6 +6188,7 @@ static void CalculateFlipSchedule( unsigned int dpte_row_height_chroma, bool use_one_row_for_frame_flip, unsigned int max_flip_time_us, + unsigned int max_flip_time_lines, unsigned int per_pipe_flip_bytes, unsigned int meta_row_bytes, unsigned int meta_row_height, @@ -6183,12 +6203,13 @@ static void CalculateFlipSchedule( { struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; - l->dual_plane = dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; + l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; l->dpte_row_bytes = DPTEBytesPerRow; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); + dml2_printf("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); @@ -6239,7 +6260,8 @@ static void CalculateFlipSchedule( if (use_lb_flip_bw) { // For mode check, calculation the flip bw requirement with worst case flip time - l->max_flip_time = math_min2(l->min_row_time, math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); + l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio), + math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); //The lower bound on flip bandwidth // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required @@ -6257,7 +6279,7 @@ static void CalculateFlipSchedule( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); - dml2_printf("DML::%s: total row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_row_bytes); + dml2_printf("DML::%s: total row bytes (%d row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); @@ -6268,6 +6290,7 @@ static void CalculateFlipSchedule( dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); + dml2_printf("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); } #endif l->lb_flip_bw = math_max3(l->lb_flip_bw, @@ -6284,7 +6307,7 @@ static void CalculateFlipSchedule( *dst_y_per_vm_flip = 1; // not used *dst_y_per_row_flip = 1; // not used - *ImmediateFlipSupportedForPipe = true; + *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded); } else { if (iflip_enable) { l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) @@ -6350,6 +6373,7 @@ static void CalculateFlipSchedule( dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); + dml2_printf("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); } dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); @@ -6380,6 +6404,12 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) { + p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + } p->Watermark->g6_temp_read_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ @@ -6541,7 +6571,8 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_unsupported; if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { - if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + if (p->display_cfg->overrides.all_streams_blanked || + (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)) p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank_and_vactive; else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; @@ -6585,13 +6616,13 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k]; - if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) + if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c)); else p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, p->meta_row_height_c[k]); + dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); @@ -6856,7 +6887,8 @@ struct dml2_core_internal_g6_temp_read_blackouts_table { } entries[DML_MAX_CLK_TABLE_SIZE]; }; -struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = { +static const struct dml2_core_internal_g6_temp_read_blackouts_table + core_dcn4_g6_temp_read_blackout_table = { .entries = { { .uclk_khz = 96000, @@ -6921,6 +6953,43 @@ static double get_g6_temp_read_blackout_us( return (double)blackout_us; } +static double get_max_urgent_latency_us( + struct dml2_dcn4x_soc_qos_params *dcn4x, + double uclk_freq_mhz, + double FabricClock, + unsigned int min_clk_index) +{ + double latency; + latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz + * (1 + dcn4x->umc_max_latency_margin / 100.0) + + dcn4x->mall_overhead_fclk_cycles / FabricClock + + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock + * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0); + return latency; +} + +static void calculate_pstate_keepout_dst_lines( + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_watermarks *watermarks, + unsigned int pstate_keepout_dst_lines[]) +{ + const struct dml2_stream_parameters *stream_descriptor; + unsigned int i; + + for (i = 0; i < display_cfg->num_planes; i++) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) { + stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index]; + + pstate_keepout_dst_lines[i] = + (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz)); + + if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) { + pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1; + } + } + } +} + static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) { struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; @@ -6963,7 +7032,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000); - mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); #if defined(__DML_VBA_DEBUG__) @@ -6981,7 +7050,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); - dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); @@ -7126,7 +7194,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.WritebackLatencySupport = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && - (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024.0 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { + (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { mode_lib->ms.support.WritebackLatencySupport = false; } } @@ -7202,17 +7270,17 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #if defined(DV_BUILD) // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming. if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { - lb_buffer_size_bits_luma = 34620 * 57;; + lb_buffer_size_bits_luma = 34620 * 57; lb_buffer_size_bits_chroma = 13560 * 57; } #endif */ - mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 / (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0)); if (mode_lib->ms.BytePerPixelC[k] == 0.0) { mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; } else { - mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 / (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0)); } @@ -7231,10 +7299,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out /* Cursor Support Check */ mode_lib->ms.support.CursorSupport = true; for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].cursor.cursor_width > 0.0) { - if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) { + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) mode_lib->ms.support.CursorSupport = false; - } } } @@ -7295,7 +7362,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.ViewportExceedsSurface = false; if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) { for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || + display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { mode_lib->ms.support.ViewportExceedsSurface = true; #if defined(__DML_VBA_DEBUG__) dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); @@ -7304,11 +7372,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); #endif - if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || - display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { - mode_lib->ms.support.ViewportExceedsSurface = true; - } + } + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || + display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; } } } @@ -7466,6 +7534,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out &mode_lib->ms.OutputRate[k], &mode_lib->ms.RequiredSlots[k]); + if (s->OutputBpp[k] == 0.0) { + s->OutputBpp[k] = mode_lib->ms.OutputBpp[k]; + } + if (mode_lib->ms.RequiresDSC[k] == false) { mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC; mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC; @@ -7580,7 +7652,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, mode_lib->ip.writeback_line_buffer_buffer_size)); } @@ -7665,8 +7737,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true) mode_lib->ms.support.P2IWith420 = true; - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary && s->OutputBpp[k] != 0) - mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true; if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support) mode_lib->ms.support.DSC422NativeNotSupported = true; @@ -7819,7 +7889,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], mode_lib->ms.ODMMode[k], mode_lib->ip.maximum_dsc_bits_per_component, - mode_lib->ms.OutputBpp[k], + s->OutputBpp[k], display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, mode_lib->ms.support.NumberOfDSCSlices[k], @@ -8059,59 +8129,63 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.excess_vactive_fill_bw_c); mode_lib->ms.UrgLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = CalculateTripToMemory( mode_lib->ms.UrgLatency, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); bool cursor_not_enough_urgent_latency_hiding = 0; - calculate_cursor_req_attributes( - display_cfg->plane_descriptors[k].cursor.cursor_width, - display_cfg->plane_descriptors[k].cursor.cursor_bpp, - // output - &s->cursor_lines_per_chunk[k], - &s->cursor_bytes_per_line[k], - &s->cursor_bytes_per_chunk[k], - &s->cursor_bytes[k]); + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, - calculate_cursor_urgent_burst_factor( - mode_lib->ip.cursor_buffer_size, - display_cfg->plane_descriptors[k].cursor.cursor_width, - s->cursor_bytes_per_chunk[k], - s->cursor_lines_per_chunk[k], - line_time_us, - mode_lib->ms.UrgLatency, + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->ms.UrgLatency, + + // output + &mode_lib->ms.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } - // output - &mode_lib->ms.UrgentBurstFactorCursor[k], - &cursor_not_enough_urgent_latency_hiding); mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; #ifdef __DML_VBA_DEBUG__ @@ -8254,20 +8328,20 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; mode_lib->ms.support.avg_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); mode_lib->ms.support.avg_non_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -8287,7 +8361,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #endif } - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4 && mode_lib->ms.BytePerPixelC[k] > 0) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -8460,7 +8534,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out { mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; - calculate_hostvm_inefficiency_factor( &s->HostVMInefficiencyFactor, &s->HostVMInefficiencyFactorPrefetch, @@ -8501,10 +8574,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -8540,7 +8618,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->ms.UrgLatency, - mode_lib->ms.TripToMemory); + mode_lib->ms.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; @@ -8587,7 +8667,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; - CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[k]; CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; @@ -8669,8 +8748,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); - dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); } } @@ -8683,20 +8762,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.VRatioInPrefetchSupported = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) { + if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { mode_lib->ms.support.VRatioInPrefetchSupported = false; + dml2_printf("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); } } - s->AnyLinesForVMOrRowTooLarge = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.LinesForDPTERow[k] >= 16 || mode_lib->ms.LinesForVM[k] >= 32) { - s->AnyLinesForVMOrRowTooLarge = true; - } - } - // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok if (mode_lib->ms.support.PrefetchSupported) { for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { @@ -8845,6 +8919,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.dpte_row_height_chroma[k], mode_lib->ms.use_one_row_for_frame_flip[k], mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, s->per_pipe_flip_bytes[k], mode_lib->ms.meta_row_bytes[k], s->meta_row_height_luma[k], @@ -8932,6 +9007,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out s->mSOCParameters.USRRetrainingLatency = 0; s->mSOCParameters.SMNLatency = 0; s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); + s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; CalculateWatermarks_params->USRRetrainingRequired = false; @@ -8951,7 +9029,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; - //CalculateWatermarks_params->LBBitPerPixel = 57; // FIXME_STAGE2, need a new ip param? CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; @@ -8979,29 +9056,24 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); - } + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); + } + dml2_printf("DML::%s: Done prefetch calculation\n", __func__); // End of Prefetch Check - dml2_printf("DML::%s: Done prefetch calculation\n", __func__); + mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; //Re-ordering Buffer Support Check - mode_lib->ms.support.max_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); - - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 - / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) { + / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; } } else { - if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; @@ -9024,15 +9096,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.dram_change_vactive_det_fill_delay_us); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.max_urgent_latency_us); + dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); #endif /*Mode Support, Voltage State and SOC Configuration*/ { - // s->dram_clock_change_support = 1; - // s->f_clock_change_support = 1; - if (mode_lib->ms.support.ScaleRatioAndTapsSupport && mode_lib->ms.support.SourceFormatPixelAndScanSupport && mode_lib->ms.support.ViewportSizeSupport @@ -9043,9 +9112,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out && !mode_lib->ms.support.ExceededMultistreamSlots && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink && !mode_lib->ms.support.NotEnoughLanesForMSO - //&& mode_lib->ms.support.LinkCapacitySupport == true // FIXME_STAGE2 && !mode_lib->ms.support.P2IWith420 - && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP && !mode_lib->ms.support.DSC422NativeNotSupported && mode_lib->ms.support.DSCSlicesODMModeSupported && !mode_lib->ms.support.NotEnoughDSCUnits @@ -9113,7 +9180,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #if defined(__DML_VBA_DEBUG__) if (!mode_lib->ms.support.ModeSupport) - dml2_print_dml_mode_support_info(&mode_lib->ms.support, true); + dml2_print_mode_support_info(&mode_lib->ms.support, true); dml2_printf("DML::%s: --- DONE --- \n", __func__); #endif @@ -9132,6 +9199,10 @@ unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); + + for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) + dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); return result; @@ -9373,11 +9444,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); } -#ifdef DML_VM_PTE_ADL_PATCH_EN if (dpte_groups_per_row_luma_ub <= 2) { dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; } -#endif dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); @@ -9406,11 +9475,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); } -#ifdef DML_VM_PTE_ADL_PATCH_EN if (dpte_groups_per_row_chroma_ub <= 2) { dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; } -#endif dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); @@ -9535,17 +9602,16 @@ static void CalculateVMGroupAndRequestTimes( line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; -#ifdef DML_VM_PTE_ADL_PATCH_EN - if (num_group_per_lower_vm_stage_flip <= 2) { - num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage_flip + 1; - } + if (num_group_per_lower_vm_stage_pref > 0) + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; + else + TimePerVMGroupVBlank[k] = 0; + + if (num_group_per_lower_vm_stage_flip > 0) + TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; + else + TimePerVMGroupFlip[k] = 0; - if (num_group_per_lower_vm_stage_pref <= 2) { - num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage_pref + 1; - } -#endif - TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; - TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; if (num_req_per_lower_vm_stage_pref > 0) TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; else @@ -9599,10 +9665,6 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc bool FoundCriticalSurface = false; double LastZ8StutterPeriod = 0; - unsigned int SwathSizeCriticalSurface; - unsigned int LastChunkOfSwathSize; - unsigned int MissingPartOfLastSwathOfDETSize; - memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { @@ -9777,7 +9839,7 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) - / math_max2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); @@ -9871,19 +9933,11 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); #endif - SwathSizeCriticalSurface = (unsigned int)(l->BytePerPixelYCriticalSurface * l->SwathHeightYCriticalSurface * math_ceil2(l->SwathWidthYCriticalSurface, l->BlockWidth256BytesYCriticalSurface)); - LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024); - MissingPartOfLastSwathOfDETSize = (unsigned int)(math_ceil2(l->DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - l->DETBufferSizeYCriticalSurface); - - *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) && - (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); + *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface); dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); - dml2_printf("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize); - dml2_printf("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize); dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); #endif } @@ -9928,14 +9982,14 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); - mode_lib->mp.Dcfclk = programming->min_clocks.dcn4.active.dcfclk_khz / 1000.0; - mode_lib->mp.FabricClock = programming->min_clocks.dcn4.active.fclk_khz / 1000.0; - mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); - mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4.active.uclk_khz / 1000.0; - mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4.dpprefclk_khz / 1000.0; - s->SOCCLK = (double)programming->min_clocks.dcn4.socclk_khz / 1000; - mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); - mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table); + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; @@ -9970,18 +10024,18 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; - mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4.dppclk_khz / 1000.0; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dppclk[k] > 0); } for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4.dscclk_khz / 1000.0; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); } - mode_lib->mp.Dispclk = programming->min_clocks.dcn4.dispclk_khz / 1000.0; - mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4.deepsleep_dcfclk_khz / 1000.0; + mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dcfclk > 0); dml2_assert(mode_lib->mp.FabricClock > 0); @@ -10462,11 +10516,16 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); } + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, - s->ReorderBytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, mode_lib->ip.pixel_chunk_size_kbytes, @@ -10551,32 +10610,32 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.excess_vactive_fill_bw_c); mode_lib->mp.UrgentLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = CalculateTripToMemory( mode_lib->mp.UrgentLatency, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); @@ -10585,38 +10644,40 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.meta_trip_adder_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); for (k = 0; k < s->num_active_planes; ++k) { bool cursor_not_enough_urgent_latency_hiding = 0; - double line_time_us; + double line_time_us = 0.0; - calculate_cursor_req_attributes( - display_cfg->plane_descriptors[k].cursor.cursor_width, - display_cfg->plane_descriptors[k].cursor.cursor_bpp, + line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, - // output - &s->cursor_lines_per_chunk[k], - &s->cursor_bytes_per_line[k], - &s->cursor_bytes_per_chunk[k], - &s->cursor_bytes[k]); + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); - line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->mp.UrgentLatency, - calculate_cursor_urgent_burst_factor( - mode_lib->ip.cursor_buffer_size, - display_cfg->plane_descriptors[k].cursor.cursor_width, - s->cursor_bytes_per_chunk[k], - s->cursor_lines_per_chunk[k], - line_time_us, - mode_lib->mp.UrgentLatency, - - // output - &mode_lib->mp.UrgentBurstFactorCursor[k], - &cursor_not_enough_urgent_latency_hiding); + // output + &mode_lib->mp.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k]; CalculateUrgentBurstFactor( @@ -10676,7 +10737,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->mp.UrgentLatency, - mode_lib->mp.TripToMemory); + mode_lib->mp.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->mp.Dppclk[k]; myPipe->Dispclk = mode_lib->mp.Dispclk; @@ -10722,7 +10785,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; - CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k]; CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; @@ -10808,9 +10870,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex if (mode_lib->mp.dst_y_prefetch[k] < 2) s->DestinationLineTimesForPrefetchLessThan2 = true; - if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) + if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { s->VRatioPrefetchMoreThanMax = true; + dml2_printf("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + } if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); @@ -10994,6 +11060,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.dpte_row_height_chroma[k], mode_lib->mp.use_one_row_for_frame_flip[k], mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, s->per_pipe_flip_bytes[k], mode_lib->mp.meta_row_bytes[k], mode_lib->mp.meta_row_height[k], @@ -11143,6 +11210,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->mmSOCParameters.USRRetrainingLatency = 0; s->mmSOCParameters.SMNLatency = 0; s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); + s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; CalculateWatermarks_params->USRRetrainingRequired = false; @@ -11162,7 +11232,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY; CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC; - //CalculateWatermarks_params->LBBitPerPixel = 57; //FIXME_STAGE2 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY; CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC; CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; @@ -11203,6 +11272,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex } } + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines); + dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); @@ -11491,9 +11562,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) { + dml2_printf("DML::%s: ------------- START ----------\n", __func__); bool result = dml_core_mode_programming(in_out_params); - dml2_printf("DML::%s: ------------- START ----------\n", __func__); dml2_printf("DML::%s: result = %0d\n", __func__, result); dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); return result; @@ -12186,10 +12257,11 @@ void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg, void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index) { - out->dcn4.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); - out->dcn4.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); - out->dcn4.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); - out->dcn4.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); + out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); + out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); + out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); + out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); + out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); } void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) @@ -12197,6 +12269,25 @@ void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_disp dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index); } +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, + const struct display_configuation_with_meta *display_cfg, + struct dmub_cmd_fams2_global_config *fams2_global_config) +{ + fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required; + + if (fams2_global_config->features.bits.enable) { + fams2_global_config->features.bits.enable_stall_recovery = true; + fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START; + + fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; + fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; + fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; + fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; + + fams2_global_config->num_streams = display_cfg->display_config.num_streams; + } +} + void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, @@ -12209,6 +12300,11 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna unsigned int i; + if (display_cfg->display_config.overrides.all_streams_blanked) { + /* stream is blanked, so do nothing */ + return; + } + /* from display configuration */ fams2_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; fams2_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; @@ -12368,6 +12464,7 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp { double phantom_processing_delay_pix; unsigned int phantom_processing_delay_lines; + unsigned int phantom_min_v_active_lines; unsigned int phantom_v_active_lines; unsigned int phantom_v_startup_lines; unsigned int phantom_v_blank_lines; @@ -12377,14 +12474,16 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) * ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000)); phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total); - dml2_core_shared_div_rem(phantom_processing_delay_pix, + dml2_core_div_rem(phantom_processing_delay_pix, display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, &rem); if (rem) phantom_processing_delay_lines++; phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); - phantom_v_active_lines = phantom_processing_delay_lines + dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) + mode_lib->ip.subvp_swath_height_margin_lines; + phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) / + display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio); + phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines; // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank) phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1; @@ -12396,8 +12495,8 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp // phantom_vtotal = vactive + vblank out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines; - out->phantom_min_v_active = dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index); - out->phantom_v_startup = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); + out->phantom_min_v_active = phantom_min_v_active_lines; + out->phantom_v_startup = phantom_v_startup_lines; out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; #if defined(__DML_VBA_DEBUG__) @@ -12418,7 +12517,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport; out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport; out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420; - out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP; + out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false; out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported; out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion; out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated; @@ -12611,7 +12710,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib); - out->min_clocks.dcn4.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); + out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib); @@ -12724,13 +12823,13 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod } } - out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles - / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { out->informative.misc.ROBUrgencyAvoidance = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h index b280ab573fbb..df2d1550a14b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_DCN4_CALCS_H__ #define __DML2_CORE_DCN4_CALCS_H__ @@ -30,6 +29,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, enum dml2_uclk_pstate_support_method pstate_method, int plane_index); +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config); void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes); void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c index f56abe9ab919..28394de02885 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_core_factory.h" #include "dml2_core_dcn4.h" #include "dml2_external_lib_deps.h" @@ -11,7 +10,7 @@ bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_core_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h index 53636a8f52aa..411c514fe65c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_FACTORY_H__ #define __DML2_CORE_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c index 81f0a6f19f87..8f3c1c0b1cc1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c @@ -779,7 +779,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000; mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); - mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); #if defined(__DML_VBA_DEBUG__) @@ -1776,32 +1776,32 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou #endif mode_lib->ms.UrgLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = CalculateTripToMemory( mode_lib->ms.UrgLatency, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); @@ -1995,21 +1995,21 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; mode_lib->ms.support.avg_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); mode_lib->ms.support.avg_non_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); double outstanding_latency_us = 0; for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -2029,7 +2029,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou #endif } - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4 && mode_lib->ms.BytePerPixelC[k] > 0) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -2242,11 +2242,15 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou } double min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -2713,13 +2717,13 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou //Re-ordering Buffer Support Check mode_lib->ms.support.max_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) { mode_lib->ms.support.ROBSupport = true; @@ -2727,7 +2731,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.support.ROBSupport = false; } } else { - if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; @@ -5050,7 +5054,7 @@ static void calculate_mcache_row_bytes( unsigned int meta_per_mvmpg_per_channel_ub = 0; if (p->gpuvm_enable) { - meta_per_mvmpg_per_channel = (float)vmpg_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic if (p->surf_vert && vmpg_bytes > blk_bytes) { @@ -5059,7 +5063,7 @@ static void calculate_mcache_row_bytes( *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom } else { - meta_per_mvmpg_per_channel = (float)blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)blk_bytes / (float)256 / p->num_chans; if (!p->surf_vert) *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; @@ -5881,7 +5885,7 @@ static double CalculateUrgentLatency( double fabric_max_transport_latency_margin) { double urgent_latency = 0; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); @@ -5892,7 +5896,7 @@ static double CalculateUrgentLatency( } } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); @@ -5922,7 +5926,7 @@ static double CalculateTripToMemory( double fabric_max_transport_latency_margin) { double trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); @@ -5931,7 +5935,7 @@ static double CalculateTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); @@ -5961,7 +5965,7 @@ static double CalculateMetaTripToMemory( double fabric_max_transport_latency_margin) { double meta_trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); } else { @@ -5969,7 +5973,7 @@ static double CalculateMetaTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); @@ -6460,8 +6464,8 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->SwathHeightC[k] = l->MaximumSwathHeightC[k] / 2; l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; } if (p->SwathHeightC[k] == 0) @@ -7165,7 +7169,7 @@ static void calculate_tdlut_setting( *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; //the delivery cycles is DispClk cycles per line * number of lines * number of slices tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width / 2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; - tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / 9.0; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); } else { //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); @@ -7485,7 +7489,7 @@ static void CalculateExtraLatency( max_request_size_bytes = request_size_bytes_chroma[k]; } - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; *ExtraLatency = *ExtraLatency_sr; if (max_oustanding_when_urgent_expected) @@ -7501,11 +7505,14 @@ static void CalculateExtraLatency( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); + dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips); dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected); dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); @@ -7739,7 +7746,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; - s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); @@ -9304,6 +9310,10 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); } + if (dpte_groups_per_row_luma_ub <= 2) { + dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; + } + dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); @@ -9332,6 +9342,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); } + if (dpte_groups_per_row_chroma_ub <= 2) { + dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; + } dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); @@ -9386,8 +9399,8 @@ static void CalculateVMGroupAndRequestTimes( double TimePerVMRequestVBlank[], double TimePerVMRequestFlip[]) { - unsigned int num_group_per_lower_vm_stage = 0; - unsigned int num_req_per_lower_vm_stage = 0; + unsigned int num_group_per_lower_vm_stage = 1; + unsigned int num_req_per_lower_vm_stage = 1; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); @@ -9451,6 +9464,14 @@ static void CalculateVMGroupAndRequestTimes( double line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; + if (num_group_per_lower_vm_stage_flip <= 2) { + num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage_flip + 1; + } + + if (num_group_per_lower_vm_stage_pref <= 2) { + num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage_pref + 1; + } + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; @@ -9814,14 +9835,14 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); - mode_lib->mp.Dcfclk = programming->min_clocks.dcn4.active.dcfclk_khz / 1000.0; - mode_lib->mp.FabricClock = programming->min_clocks.dcn4.active.fclk_khz / 1000.0; - mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); - mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4.active.uclk_khz / 1000.0; - mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4.dpprefclk_khz / 1000.0; - s->SOCCLK = (double)programming->min_clocks.dcn4.socclk_khz / 1000; - mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); - mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table); + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; @@ -9856,18 +9877,18 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; - mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4.dppclk_khz / 1000.0; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dppclk[k] > 0); } for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4.dscclk_khz / 1000.0; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); } - mode_lib->mp.Dispclk = programming->min_clocks.dcn4.dispclk_khz / 1000.0; - mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4.deepsleep_dcfclk_khz / 1000.0; + mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dcfclk > 0); dml2_assert(mode_lib->mp.FabricClock > 0); @@ -10388,11 +10409,16 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); } + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, - s->ReorderBytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, mode_lib->ip.pixel_chunk_size_kbytes, @@ -10465,32 +10491,32 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.WritebackDelay[k] = mode_lib->mp.WritebackDelay[j]; mode_lib->mp.UrgentLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = CalculateTripToMemory( mode_lib->mp.UrgentLatency, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); @@ -10499,10 +10525,10 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.meta_trip_adder_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); for (k = 0; k < s->num_active_planes; ++k) { calculate_cursor_req_attributes( @@ -11945,14 +11971,14 @@ void dml2_core_shared_get_pipe_regs(const struct dml2_display_cfg *display_cfg, void dml2_core_shared_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) { - // out->min_clocks.dcn4.dscclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); // FIXME_STAGE2 - // out->min_clocks.dcn4.dtbclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); - // out->min_clocks.dcn4.phyclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + // out->min_clocks.dcn4x.dscclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); // FIXME_STAGE2 + // out->min_clocks.dcn4x.dtbclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + // out->min_clocks.dcn4x.phyclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); - out->global_sync.dcn4.vready_offset_pixels = mode_lib->mp.VReadyOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vstartup_lines = mode_lib->mp.VStartup[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vupdate_offset_pixels = mode_lib->mp.VUpdateOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vupdate_vupdate_width_pixels = mode_lib->mp.VUpdateWidthPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vready_offset_pixels = mode_lib->mp.VReadyOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vstartup_lines = mode_lib->mp.VStartup[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vupdate_offset_pixels = mode_lib->mp.VUpdateOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vupdate_vupdate_width_pixels = mode_lib->mp.VUpdateWidthPix[mode_lib->mp.pipe_plane[pipe_index]]; } void dml2_core_shared_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx) @@ -12255,7 +12281,7 @@ void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mo out->informative.misc.cstate_max_cap_mode = mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; - out->min_clocks.dcn4.dpprefclk_khz = (int unsigned)(mode_lib->mp.GlobalDPPCLK * 1000.0); + out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)(mode_lib->mp.GlobalDPPCLK * 1000.0); out->informative.qos.max_active_fclk_change_latency_supported = mode_lib->mp.MaxActiveFCLKChangeLatencySupported; @@ -12368,13 +12394,13 @@ void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mo } } - out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles - / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { out->informative.misc.ROBUrgencyAvoidance = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h deleted file mode 100644 index d76bda907ec8..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#ifndef __DML2_CORE_SHARED_H__ -#define __DML2_CORE_SHARED_H__ - -#define __DML_VBA_DEBUG__ -#define __DML2_CALCS_MAX_VRATIO_PRE_OTO__ 4.0 // 0); + return dividend / divisor; + +} + +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 1; + break; + case dml2_420_10: + val = 1; + break; + case dml2_420_12: + val = 1; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + default: + DML2_ASSERT(0); + break; + } + return val; +} + +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +{ + dml2_printf("DML: ===================================== \n"); + dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->NotEnoughDSCUnits == 1) + dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + if (!fail_only || support->ROBSupport == 0) + dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); + if (!fail_only || support->OutstandingRequestsSupport == 0) + dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) + dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + + if (!fail_only || support->ModeSupport == 0) + dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); + dml2_printf("DML: ===================================== \n"); +} + +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + + +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) +{ + for (unsigned int k = 0; k < display_cfg->num_planes; k++) { + double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { + switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { + case dml2_444: + out_bpp[k] = bpc * 3; + break; + case dml2_s422: + out_bpp[k] = bpc * 2; + break; + case dml2_n422: + out_bpp[k] = bpc * 2; + break; + case dml2_420: + default: + out_bpp[k] = bpc * 1.5; + break; + } + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { + out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; + } else { + out_bpp[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); +#endif + } +} + +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up) +{ + unsigned int remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) +{ + unsigned int num_active_pipes = 0; + + for (unsigned int k = 0; k < num_planes; k++) { + num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); +#endif + return num_active_pipes; +} + +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) +{ + unsigned int pipe_idx = 0; + + for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { + pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; + } + + for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { + for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) +{ + bool is_phantom = false; + + if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || + plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { + is_phantom = true; + } + + return is_phantom; +} + +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) +{ + switch (sw_mode) { + case (dml2_sw_linear): + return 256; break; + case (dml2_sw_256b_2d): + return 256; break; + case (dml2_sw_4kb_2d): + return 4096; break; + case (dml2_sw_64kb_2d): + return 65536; break; + case (dml2_sw_256kb_2d): + return 262144; break; + case (dml2_gfx11_sw_linear): + return 256; break; + case (dml2_gfx11_sw_64kb_d): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_t): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_x): + return 65536; break; + case (dml2_gfx11_sw_64kb_r_x): + return 65536; break; + case (dml2_gfx11_sw_256kb_d_x): + return 262144; break; + case (dml2_gfx11_sw_256kb_r_x): + return 262144; break; + default: + DML2_ASSERT(0); + return 256; + }; +} + + +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) +{ + bool is_vert = false; + if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} + + +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) +{ + int unsigned version = 0; + + if (sw_mode == dml2_sw_linear || + sw_mode == dml2_sw_256b_2d || + sw_mode == dml2_sw_4kb_2d || + sw_mode == dml2_sw_64kb_2d || + sw_mode == dml2_sw_256kb_2d) { + version = 12; + } else if (sw_mode == dml2_gfx11_sw_linear || + sw_mode == dml2_gfx11_sw_64kb_d || + sw_mode == dml2_gfx11_sw_64kb_d_t || + sw_mode == dml2_gfx11_sw_64kb_d_x || + sw_mode == dml2_gfx11_sw_64kb_r_x || + sw_mode == dml2_gfx11_sw_256kb_d_x || + sw_mode == dml2_gfx11_sw_256kb_r_x) { + version = 11; + } else { + dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML2_ASSERT(0); + } + + return version; +} + +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) +{ + unsigned int i; + unsigned int index = 0; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + + if (i == 0) + index = 0; + else + index = i - 1; + + if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || + per_uclk_dpm_params[i].minimum_uclk_khz == 0) { + break; + } + } +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, index); +#endif + return index; +} + +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) +{ + unsigned int i; + bool clk_entry_found = 0; + + for (i = 0; i < clk_table->uclk.num_clk_values; i++) { + dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + + if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { + clk_entry_found = 1; + break; + } + } + + dml2_assert(clk_entry_found); +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, i); +#endif + return i; +} + +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) + ret_val = 1; + + return ret_val; +} + +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) +{ + if (a == 0) + return 0; + + return (math_log2_approx(a) - subtrahend); +} + +static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, + const struct dml2_implicit_svp_meta *meta) +{ + memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); + + phantom->timing.v_total = meta->v_total; + phantom->timing.v_active = meta->v_active; + phantom->timing.v_front_porch = meta->v_front_porch; + phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; + phantom->timing.drr_config.enabled = false; +} + +static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, + const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) +{ + memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); + + phantom->stream_index = phantom_stream_index; + phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; + phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); + phantom->immediate_flip = false; + phantom->dynamic_meta_data.enable = false; + phantom->cursor.num_cursors = 0; + phantom->cursor.cursor_width = 0; + phantom->tdlut.setup_for_tdlut = false; +} + +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index; + const struct dml2_plane_parameters *main_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); + + if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) + return; + + /* disable unbounded requesting for all planes until stage 3 has been performed */ + if (!display_cfg->stage3.performed) { + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; + } + // Create the phantom streams + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; + scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; + + if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { + // Create the phantom stream + create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], + main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); + + // Associate this phantom stream to the main stream + scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; + + // Increment num streams + svp_expanded_display_cfg->num_streams++; + } + } + + // Create the phantom planes + for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { + main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; + + if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { + main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; + create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], + main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); + + // Associate this phantom plane to the main plane + scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; + scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; + + // Increment num planes + svp_expanded_display_cfg->num_planes++; + + // Adjust the main plane settings + svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; + } + } +} + +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + return true; + case dml2_none: + default: + return false; + } +} +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmifrl: + return true; + case dml2_hdmi: + case dml2_none: + default: + return false; + } +} + + +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_edp: + return true; + case dml2_dp2p0: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp2p0: + return true; + case dml2_dp: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + return dml2_core_utils_is_dio_dp_encoder(stream_descriptor) + || dml2_core_utils_is_hpo_dp_encoder(stream_descriptor); +} + + +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + return true; + case dml2_dp_rate_na: + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + return true; + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + case dml2_dp_rate_na: + default: + return false; + } +} + +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode) +{ + switch (odm_mode) { + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + return true; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_combine_2to1: + case dml2_odm_mode_combine_3to1: + case dml2_odm_mode_combine_4to1: + default: + return false; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h new file mode 100644 index 000000000000..a5cc6a07167a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_UTILS_H__ +#define __DML2_CORE_UTILS_H__ +#include "dml2_internal_shared_types.h" +#include "dml2_debug.h" +#include "lib_float_math.h" + +double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format); +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only); +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg); +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up); +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info); +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane); +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode); +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan); +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode); +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params); +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table); +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format); +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend); +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch); +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode); + +#endif /* __DML2_CORE_UTILS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index c94c4f32c957..8869ea089312 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_dpmm_dcn4.h" #include "dml2_internal_shared_types.h" #include "dml_top_types.h" @@ -83,9 +82,9 @@ static void calculate_system_active_minimums(struct dml2_dpmm_map_mode_to_soc_dp get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); - in_out->programming->min_clocks.dcn4.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); - in_out->programming->min_clocks.dcn4.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); } static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -123,9 +122,9 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); } static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -147,9 +146,9 @@ static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_ get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); - in_out->programming->min_clocks.dcn4.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); - in_out->programming->min_clocks.dcn4.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); } static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double margin, unsigned long vco_freq_khz, unsigned long *rounded_khz, uint32_t *divider_id) @@ -204,6 +203,26 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma return true; } +static bool round_to_non_dfs_granularity(unsigned long dispclk_khz, unsigned long dpprefclk_khz, unsigned long dtbrefclk_khz, + unsigned long *rounded_dispclk_khz, unsigned long *rounded_dpprefclk_khz, unsigned long *rounded_dtbrefclk_khz) +{ + unsigned long pll_frequency_khz; + + pll_frequency_khz = (unsigned long) math_max2(600000, math_ceil2(math_max3(dispclk_khz, dpprefclk_khz, dtbrefclk_khz), 1000)); + + *rounded_dispclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dispclk_khz, 32); + + *rounded_dpprefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dpprefclk_khz, 32); + + if (dtbrefclk_khz > 0) { + *rounded_dtbrefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dtbrefclk_khz, 32); + } else { + *rounded_dtbrefclk_khz = 0; + } + + return true; +} + static bool round_up_and_copy_to_next_dpm(unsigned long min_value, unsigned long *rounded_value, const struct dml2_clk_table *clock_table) { bool result = false; @@ -233,25 +252,25 @@ static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_progr { bool result; - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.uclk_khz, &state_table->uclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz, &state_table->uclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk); return result; } @@ -263,12 +282,12 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro result = false; for (index = 0; index < state_table->uclk.num_clk_values; index++) { - if (display_cfg->min_clocks.dcn4.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.active.fclk_khz <= state_table->fclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) { - display_cfg->min_clocks.dcn4.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.active.fclk_khz = state_table->fclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.active.uclk_khz = state_table->uclk.clk_values_khz[index]; + if (display_cfg->min_clocks.dcn4x.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.uclk_khz = state_table->uclk.clk_values_khz[index]; result = true; break; } @@ -277,12 +296,12 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro if (result) { result = false; for (index = 0; index < state_table->uclk.num_clk_values; index++) { - if (display_cfg->min_clocks.dcn4.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) { - display_cfg->min_clocks.dcn4.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.idle.fclk_khz = state_table->fclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.idle.uclk_khz = state_table->uclk.clk_values_khz[index]; + if (display_cfg->min_clocks.dcn4x.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.uclk_khz = state_table->uclk.clk_values_khz[index]; result = true; break; } @@ -290,9 +309,9 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro } // SVP is not supported on any coarse grained SoCs - display_cfg->min_clocks.dcn4.svp_prefetch.dcfclk_khz = 0; - display_cfg->min_clocks.dcn4.svp_prefetch.fclk_khz = 0; - display_cfg->min_clocks.dcn4.svp_prefetch.uclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz = 0; return result; } @@ -325,30 +344,30 @@ static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mo result = map_soc_min_clocks_to_dpm_coarse_grained(display_cfg, state_table); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dispclk_khz, &state_table->dispclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dispclk_khz, &state_table->dispclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.deepsleep_dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.deepsleep_dcfclk_khz, &state_table->dcfclk); for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { if (result) - result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4.dppclk_khz, &state_table->dppclk); + result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4x.dppclk_khz, &state_table->dppclk); } for (i = 0; i < display_cfg->display_config.num_streams; i++) { if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.dscclk_khz, &state_table->dscclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dscclk_khz, &state_table->dscclk); if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.dtbclk_khz, &state_table->dtbclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dtbclk_khz, &state_table->dtbclk); if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.phyclk_khz, &state_table->phyclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.phyclk_khz, &state_table->phyclk); } if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dpprefclk_khz, &state_table->dppclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dpprefclk_khz, &state_table->dppclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dtbrefclk_khz, &state_table->dtbclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dtbrefclk_khz, &state_table->dtbclk); return result; } @@ -516,15 +535,15 @@ static bool determine_power_management_features_with_fams(struct dml2_dpmm_map_m static void clamp_uclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) { - in_out->programming->min_clocks.dcn4.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; } static void clamp_fclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) { - in_out->programming->min_clocks.dcn4.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; } static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -540,14 +559,14 @@ static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_o // In NV4, there's no support for FCLK or DCFCLK DPM change before SVP prefetch starts, therefore // active minimums must be boosted to prefetch minimums - if (in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4.active.uclk_khz) - in_out->programming->min_clocks.dcn4.active.uclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4x.active.uclk_khz) + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz; - if (in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4.active.fclk_khz) - in_out->programming->min_clocks.dcn4.active.fclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4x.active.fclk_khz) + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz; - if (in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4.active.dcfclk_khz) - in_out->programming->min_clocks.dcn4.active.dcfclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4x.active.dcfclk_khz) + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz; // need some massaging for the dispclk ramping cases: dispclk_khz = mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0) * (1.0 + in_out->ip->dispclk_ramp_margin_percent / 100.0); @@ -556,34 +575,42 @@ static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_o // but still the required dispclk can be more than the maximum dispclk speed: dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dispclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dispclk_did); - // DPP Ref is always set to max of all DPP clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - if (in_out->programming->min_clocks.dcn4.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) - in_out->programming->min_clocks.dcn4.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; - } - - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4.dpprefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dpprefclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dpprefclk_did); - - for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - in_out->programming->plane_programming[i].min_clocks.dcn4.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4.dpprefclk_khz / 255.0 - * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4.dpprefclk_khz, 1.0)); + if (in_out->programming->min_clocks.dcn4x.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; } + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = (unsigned long) (in_out->programming->min_clocks.dcn4x.dpprefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); // DTB Ref is always set to max of all DTB clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - if (in_out->programming->min_clocks.dcn4.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) - in_out->programming->min_clocks.dcn4.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; + if (in_out->programming->min_clocks.dcn4x.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; + } + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); + + if (in_out->soc_bb->no_dfs) { + round_to_non_dfs_granularity((unsigned long)dispclk_khz, in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, + &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz); + } else { + add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); } - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4.dtbrefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dtbrefclk_did); - in_out->programming->min_clocks.dcn4.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; - in_out->programming->min_clocks.dcn4.socclk_khz = mode_support_result->global.socclk_khz; + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 + * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); + } + + in_out->programming->min_clocks.dcn4x.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; + in_out->programming->min_clocks.dcn4x.socclk_khz = mode_support_result->global.socclk_khz; result = map_min_clocks_to_dpm(mode_support_result, in_out->programming, &in_out->soc_bb->clk_table); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h index 3afb69dfd040..b165c58dfd11 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DPMM_DCN4_H__ #define __DML2_DPMM_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c index 2c983daf2dad..3861bc6c9621 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_dpmm_factory.h" #include "dml2_dpmm_dcn4.h" #include "dml2_external_lib_deps.h" @@ -21,7 +20,7 @@ bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_dpmm_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h index 80b44b4c2e68..20ba2e446f1d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DPMM_FACTORY_H__ #define __DML2_DPMM_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c index 5d8887ac766d..f4b1a7d02d42 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_mcg_dcn4.h" #include "dml_top_soc_parameter_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h index 19d178651435..02da6f45cbf7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_MCG_DCN4_H__ #define __DML2_MCG_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c index 55085b85f8ed..c60b8fe90819 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_mcg_factory.h" #include "dml2_mcg_dcn4.h" #include "dml2_external_lib_deps.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h index 5dfdfed04e22..ad307deca3b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_MCG_FACTORY_H__ #define __DML2_MCG_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index 671f9ac2627c..8e68a8094658 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -2,22 +2,17 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_pmo_factory.h" #include "dml2_pmo_dcn3.h" static void sort(double *list_a, int list_a_size) { - double temp; // For all elements b[i] in list_b[] for (int i = 0; i < list_a_size - 1; i++) { // Find the first element of list_a that's larger than b[i] for (int j = i; j < list_a_size - 1; j++) { - if (list_a[j] > list_a[j + 1]) { - temp = list_a[j]; - list_a[j] = list_a[j + 1]; - list_a[j + 1] = temp; - } + if (list_a[j] > list_a[j + 1]) + swap(list_a[j], list_a[j + 1]); } } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h index cc350f88d4d2..f00bd9e72a86 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_DCN3_H__ #define __DML2_PMO_DCN3_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c deleted file mode 100644 index 8952dd7e36cb..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c +++ /dev/null @@ -1,1250 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#include "dml2_pmo_factory.h" -#include "dml2_pmo_dcn4.h" - -static const int MIN_VACTIVE_MARGIN_US = 100; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding -static const int SUBVP_DRR_MARGIN_US = 100; - -static const enum dml2_pmo_pstate_strategy full_strategy_list_1_display[][4] = { - // VActive Preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VBlank - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Finally DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, -}; - -static const int full_strategy_list_1_display_size = sizeof(full_strategy_list_1_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_2_display[][4] = { - // VActive only is preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VActive + VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VBlank only - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP + VBlank - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP + SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Finally DRR + DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, -}; - -static const int full_strategy_list_2_display_size = sizeof(full_strategy_list_2_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_3_display[][4] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // All VActive - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 1 VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, - -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 2 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, - -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // VActive + 3 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // All VBlank - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // All DRR -}; - -static const int full_strategy_list_3_display_size = sizeof(full_strategy_list_3_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_4_display[][4] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // All VActive - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // VActive + 1 VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, - -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // VActive + 2 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, - -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 3 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // All Vblank - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // All DRR -}; - -static const int full_strategy_list_4_display_size = sizeof(full_strategy_list_4_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) -{ - bool result = true; - - if (*odm_mode == dml2_odm_mode_auto) { - switch (odms_calculated) { - case 1: - *odm_mode = dml2_odm_mode_bypass; - break; - case 2: - *odm_mode = dml2_odm_mode_combine_2to1; - break; - case 3: - *odm_mode = dml2_odm_mode_combine_3to1; - break; - case 4: - *odm_mode = dml2_odm_mode_combine_4to1; - break; - default: - result = false; - break; - } - } - - if (result) { - if (*odm_mode == dml2_odm_mode_bypass) { - *odm_mode = dml2_odm_mode_combine_2to1; - } else if (*odm_mode == dml2_odm_mode_combine_2to1) { - *odm_mode = dml2_odm_mode_combine_3to1; - } else if (*odm_mode == dml2_odm_mode_combine_3to1) { - *odm_mode = dml2_odm_mode_combine_4to1; - } else { - result = false; - } - } - - return result; -} - -static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) -{ - if (*mpc_combine_factor < limit) { - (*mpc_combine_factor)++; - return true; - } - - return false; -} - -static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) -{ - unsigned int i; - int count; - - count = 0; - for (i = 0; i < display_cfg->num_planes; i++) { - if (display_cfg->plane_descriptors[i].stream_index == stream_index) - count++; - } - - return count; -} - -static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, - int free_pipes) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i; - bool result = true; - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 - if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { - in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = - in_out->cfg_support_info->plane_support_info[i].dpps_used; - // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. - if (free_pipes > 0) { - if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, - pmo->mpc_combine_limit)) { - // We've reached max pipes allocatable to a single plane, so we fail. - result = false; - break; - } else { - // Successfully added another pipe to this failing plane. - free_pipes--; - } - } else { - // No free pipes to add. - result = false; - break; - } - } else { - // If the stream of this plane needs ODM combine, no further optimization can be done. - result = false; - break; - } - } - } - - return result; -} - -bool pmo_dcn4_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i, used_pipes, free_pipes, planes_on_stream; - bool result; - - if (in_out->display_config != in_out->optimized_display_cfg) { - memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); - } - - //Count number of free pipes, and check if any odm combine is in use. - used_pipes = 0; - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; - } - free_pipes = pmo->ip_caps->pipe_count - used_pipes; - - // Optimization loop - // The goal here is to add more pipes to any planes - // which are failing mcache admissibility - result = true; - - // The optimization logic depends on whether ODM combine is enabled, and the stream count. - if (in_out->optimized_display_cfg->num_streams > 1) { - // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes - // which are not ODM combined. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } else if (in_out->optimized_display_cfg->num_streams == 1) { - // In single stream cases, we still optimize mcache failures when there's ODM combine with some - // additional logic. - - if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { - // If ODM combine is enabled, then the logic is to increase ODM combine factor. - - // Optimization for streams with > 1 ODM combine factor is only supported for single display. - planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. - if (free_pipes >= planes_on_stream) { - if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, - in_out->cfg_support_info->plane_support_info[i].dpps_used)) { - result = false; - } else { - free_pipes -= planes_on_stream; - break; - } - } else { - result = false; - break; - } - } - } - } else { - // If ODM combine is not enabled, then we can actually use the same logic as before. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } - } else { - result = true; - } - - return result; -} - -bool pmo_dcn4_initialize(struct dml2_pmo_initialize_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - pmo->soc_bb = in_out->soc_bb; - pmo->ip_caps = in_out->ip_caps; - pmo->mpc_combine_limit = 2; - pmo->odm_combine_limit = 4; - pmo->mcg_clock_table_size = in_out->mcg_clock_table_size; - - pmo->fams_params.v1.subvp.fw_processing_delay_us = 10; - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us = 50; - pmo->fams_params.v1.subvp.refresh_rate_limit_max = 175; - pmo->fams_params.v1.subvp.refresh_rate_limit_min = 0; - - pmo->options = in_out->options; - - return true; -} - -static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) -{ - /* - * Htotal, Hblank start/end, and Hsync start/end all must be divisible - * in order for the horizontal timing params to be considered divisible - * by 2. Hsync start is always 0. - */ - unsigned long h_blank_start = timing->h_total - timing->h_front_porch; - - return (timing->h_total % denominator == 0) && - (h_blank_start % denominator == 0) && - (timing->h_blank_end % denominator == 0) && - (timing->h_sync_width % denominator == 0); -} - -static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) -{ - switch (encoder_type) { - case dml2_dp: - case dml2_edp: - case dml2_dp2p0: - case dml2_none: - return true; - case dml2_hdmi: - case dml2_hdmifrl: - default: - return false; - } -} - -bool pmo_dcn4_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) -{ - unsigned int i; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - - if (in_out->instance->options->disable_dyn_odm || - (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) - return false; - - for (i = 0; i < display_config->num_planes; i++) - /* - * vmin optimization is required to be seamlessly switched off - * at any time when the new configuration is no longer - * supported. However switching from ODM combine to MPC combine - * is not always seamless. When there not enough free pipes, we - * will have to use the same secondary OPP heads as secondary - * DPP pipes in MPC combine in new state. This transition is - * expected to cause glitches. To avoid the transition, we only - * allow vmin optimization if the stream's base configuration - * doesn't require MPC combine. This condition checks if MPC - * combine is enabled. If so do not optimize the stream. - */ - if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && - mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; - - for (i = 0; i < display_config->num_streams; i++) { - if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && - in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - /* - * ODM Combine requires horizontal timing divisible by 2 so each - * ODM segment has the same size. - */ - else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - /* - * Our hardware support seamless ODM transitions for DP encoders - * only. - */ - else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - } - - return true; -} - -bool pmo_dcn4_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) -{ - bool is_vmin = true; - - if (in_out->vmin_limits->dispclk_khz > 0 && - in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) - is_vmin = false; - - return is_vmin; -} - -static int find_highest_odm_load_stream_index( - const struct dml2_display_cfg *display_config, - const struct dml2_core_mode_support_result *mode_support_result) -{ - unsigned int i; - int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; - - for (i = 0; i < display_config->num_streams; i++) { - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz - / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; - if (odm_load > highest_odm_load) { - highest_odm_load_index = i; - highest_odm_load = odm_load; - } - } - - return highest_odm_load_index; -} - -bool pmo_dcn4_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) -{ - int stream_index; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - unsigned int odms_used; - struct dml2_stream_parameters *stream_descriptor; - bool optimizable = false; - - /* - * highest odm load stream must be optimizable to continue as dispclk is - * bounded by it. - */ - stream_index = find_highest_odm_load_stream_index(display_config, - mode_support_result); - - if (stream_index < 0 || - in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) - return false; - - odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; - if ((int)odms_used >= in_out->instance->odm_combine_limit) - return false; - - memcpy(in_out->optimized_display_config, - in_out->base_display_config, - sizeof(struct display_configuation_with_meta)); - - stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; - while (!optimizable && increase_odm_combine_factor( - &stream_descriptor->overrides.odm_mode, - odms_used)) { - switch (stream_descriptor->overrides.odm_mode) { - case dml2_odm_mode_combine_2to1: - optimizable = true; - break; - case dml2_odm_mode_combine_3to1: - /* - * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 3. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_combine_4to1: - /* - * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 4. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_auto: - case dml2_odm_mode_bypass: - case dml2_odm_mode_split_1to2: - case dml2_odm_mode_mso_1to2: - case dml2_odm_mode_mso_1to4: - default: - break; - } - } - - return optimizable; -} - -static bool are_timings_trivially_synchronizable(const struct display_configuation_with_meta *display_config, int mask) -{ - unsigned char i; - bool identical = true; - bool contains_drr = false; - unsigned char remap_array[DML2_MAX_PLANES]; - unsigned char remap_array_size = 0; - - // Create a remap array to enable simple iteration through only masked stream indicies - for (i = 0; i < display_config->display_config.num_streams; i++) { - if (mask & (0x1 << i)) { - remap_array[remap_array_size++] = i; - } - } - - // 0 or 1 display is always trivially synchronizable - if (remap_array_size <= 1) - return true; - - for (i = 1; i < remap_array_size; i++) { - if (memcmp(&display_config->display_config.stream_descriptors[remap_array[i - 1]].timing, - &display_config->display_config.stream_descriptors[remap_array[i]].timing, - sizeof(struct dml2_timing_cfg))) { - identical = false; - break; - } - } - - for (i = 0; i < remap_array_size; i++) { - if (display_config->display_config.stream_descriptors[remap_array[i]].timing.drr_config.enabled) { - contains_drr = true; - break; - } - } - - return !contains_drr && identical; -} - -static void set_bit_in_bitfield(unsigned int *bit_field, unsigned int bit_offset) -{ - *bit_field = *bit_field | (0x1 << bit_offset); -} - -static bool is_bit_set_in_bitfield(unsigned int bit_field, unsigned int bit_offset) -{ - if (bit_field & (0x1 << bit_offset)) - return true; - - return false; -} - -static bool are_all_timings_drr_enabled(const struct display_configuation_with_meta *display_config, int mask) -{ - unsigned char i; - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(mask, i)) { - if (!display_config->display_config.stream_descriptors[i].timing.drr_config.enabled) - return false; - } - } - - return true; -} - -static void insert_into_candidate_list(const enum dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) -{ - int stream_index; - - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = true; - - for (stream_index = 0; stream_index < stream_count; stream_index++) { - scratch->pmo_dcn4.per_stream_pstate_strategy[scratch->pmo_dcn4.num_pstate_candidates][stream_index] = per_stream_pstate_strategy[stream_index]; - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank) - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = false; - } - - scratch->pmo_dcn4.num_pstate_candidates++; -} - -static bool all_planes_match_strategy(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_strategy strategy) -{ - unsigned char i; - enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; - - if (strategy == dml2_pmo_pstate_strategy_vactive) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; - else if (strategy == dml2_pmo_pstate_strategy_vblank) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; - else if (strategy == dml2_pmo_pstate_strategy_fw_svp) - matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; - else if (strategy == dml2_pmo_pstate_strategy_fw_drr) - matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto && - display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != matching_strategy) - return false; - } - } - - return true; -} - -static bool subvp_subvp_schedulable(struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - unsigned char *svp_stream_indicies, char svp_stream_count) -{ - struct dml2_pmo_scratch *s = &pmo->scratch; - int i; - int microschedule_lines, time_us, refresh_hz; - int max_microschedule_us = 0; - int vactive1_us, vactive2_us, vblank1_us, vblank2_us; - - const struct dml2_timing_cfg *svp_timing1 = 0; - const struct dml2_implicit_svp_meta *svp_meta1 = 0; - - const struct dml2_timing_cfg *svp_timing2 = 0; - - if (svp_stream_count <= 1) - return true; - else if (svp_stream_count > 2) - return false; - - /* Loop to calculate the maximum microschedule time between the two SubVP pipes, - * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. - */ - for (i = 0; i < svp_stream_count; i++) { - svp_timing1 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[i]].timing; - svp_meta1 = &s->pmo_dcn4.stream_svp_meta[svp_stream_indicies[i]]; - - microschedule_lines = svp_meta1->v_active; - - // Round up when calculating microschedule time (+ 1 at the end) - time_us = (int)((microschedule_lines * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us + pmo->fams_params.v1.subvp.fw_processing_delay_us + 1); - - if (time_us > max_microschedule_us) - max_microschedule_us = time_us; - - refresh_hz = (int)((double)(svp_timing1->pixel_clock_khz * 1000) / (svp_timing1->v_total * svp_timing1->h_total)); - - if (refresh_hz < pmo->fams_params.v1.subvp.refresh_rate_limit_min || - refresh_hz > pmo->fams_params.v1.subvp.refresh_rate_limit_max) { - return false; - } - } - - svp_timing1 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[0]].timing; - svp_meta1 = &s->pmo_dcn4.stream_svp_meta[svp_stream_indicies[0]]; - - vactive1_us = (int)((svp_timing1->v_active * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000); - - vblank1_us = (int)(((svp_timing1->v_total - svp_timing1->v_active) * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000); - - svp_timing2 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[1]].timing; - - vactive2_us = (int)((svp_timing2->v_active * svp_timing2->h_total) / (double)(svp_timing2->pixel_clock_khz * 1000) * 1000000); - - vblank2_us = (int)(((svp_timing2->v_total - svp_timing2->v_active) * svp_timing2->h_total) / (double)(svp_timing2->pixel_clock_khz * 1000) * 1000000); - - if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && - (vactive2_us - vblank1_us) / 2 > max_microschedule_us) - return true; - - return false; -} - -static bool validate_svp_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask) -{ - bool result = false; - unsigned char stream_index; - - unsigned char svp_stream_indicies[2] = { 0 }; - unsigned char svp_stream_count = 0; - - // Find the SVP streams, store only the first 2, but count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - if (svp_stream_count < 2) - svp_stream_indicies[svp_stream_count] = stream_index; - - svp_stream_count++; - } - } - - if (svp_stream_count == 1) { - result = true; // 1 SVP is always co_functional - } else if (svp_stream_count == 2) { - result = subvp_subvp_schedulable(pmo, display_cfg, svp_stream_indicies, svp_stream_count); - } - - return result; -} - -static bool validate_drr_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int drr_stream_mask) -{ - unsigned char stream_index; - int drr_stream_count = 0; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(drr_stream_mask, stream_index)) { - drr_stream_count++; - } - } - - return drr_stream_count <= 4; -} - -static bool validate_svp_drr_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask, int drr_stream_mask) -{ - unsigned char stream_index; - int drr_stream_count = 0; - int svp_stream_count = 0; - - int prefetch_us = 0; - int mall_region_us = 0; - int drr_frame_us = 0; // nominal frame time - int subvp_active_us = 0; - int stretched_drr_us = 0; - int drr_stretched_vblank_us = 0; - int max_vblank_mallregion = 0; - - const struct dml2_timing_cfg *svp_timing = 0; - const struct dml2_timing_cfg *drr_timing = 0; - const struct dml2_implicit_svp_meta *svp_meta = 0; - - bool schedulable = false; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - svp_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; - svp_stream_count++; - } - if (is_bit_set_in_bitfield(drr_stream_mask, stream_index)) { - drr_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - drr_stream_count++; - } - } - - if (svp_stream_count == 1 && drr_stream_count == 1 && svp_timing != drr_timing) { - prefetch_us = (int)((svp_meta->v_total - svp_meta->v_front_porch) - * svp_timing->h_total / (double)(svp_timing->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us); - - subvp_active_us = (int)(svp_timing->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - drr_frame_us = (int)(drr_timing->v_total * drr_timing->h_total / - (double)(drr_timing->pixel_clock_khz * 1000) * 1000000); - - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = (int)(svp_meta->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - - drr_stretched_vblank_us = (int)((drr_timing->v_total - drr_timing->v_active) * drr_timing->h_total / - (double)(drr_timing->pixel_clock_khz * 1000) * 1000000 + (stretched_drr_us - drr_frame_us)); - - max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; - - /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the - * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis - * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - * and the max of (VBLANK blanking time, MALL region)). - */ - if (stretched_drr_us < (1 / (double)drr_timing->drr_config.min_refresh_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) - schedulable = true; - } - - return schedulable; -} - -static bool validate_svp_vblank_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask, int vblank_stream_mask) -{ - unsigned char stream_index; - int vblank_stream_count = 0; - int svp_stream_count = 0; - - const struct dml2_timing_cfg *svp_timing = 0; - const struct dml2_timing_cfg *vblank_timing = 0; - const struct dml2_implicit_svp_meta *svp_meta = 0; - - int prefetch_us = 0; - int mall_region_us = 0; - int vblank_frame_us = 0; - int subvp_active_us = 0; - int vblank_blank_us = 0; - int max_vblank_mallregion = 0; - - bool schedulable = false; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - svp_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; - svp_stream_count++; - } - if (is_bit_set_in_bitfield(vblank_stream_mask, stream_index)) { - vblank_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - vblank_stream_count++; - } - } - - if (svp_stream_count == 1 && vblank_stream_count > 0) { - // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe - // Also include the prefetch end to mallstart delay time - prefetch_us = (int)((svp_meta->v_total - svp_meta->v_front_porch) * svp_timing->h_total - / (double)(svp_timing->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us); - - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = (int)(svp_meta->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - vblank_frame_us = (int)(vblank_timing->v_total * vblank_timing->h_total / - (double)(vblank_timing->pixel_clock_khz * 1000) * 1000000); - - vblank_blank_us = (int)((vblank_timing->v_total - vblank_timing->v_active) * vblank_timing->h_total / - (double)(vblank_timing->pixel_clock_khz * 1000) * 1000000); - - subvp_active_us = (int)(svp_timing->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; - - // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - // and the max of (VBLANK blanking time, MALL region) - // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) - if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) - schedulable = true; - } - return schedulable; -} - -static bool validate_drr_vblank_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int drr_stream_mask, int vblank_stream_mask) -{ - return false; -} - -static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[4]) -{ - struct dml2_pmo_scratch *s = &pmo->scratch; - - unsigned char stream_index = 0; - - unsigned int svp_count = 0; - unsigned int svp_stream_mask = 0; - unsigned int drr_count = 0; - unsigned int drr_stream_mask = 0; - unsigned int vactive_count = 0; - unsigned int vactive_stream_mask = 0; - unsigned int vblank_count = 0; - unsigned int vblank_stream_mask = 0; - - bool strategy_matches_forced_requirements = true; - - bool admissible = false; - - // Tabulate everything - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - - if (!all_planes_match_strategy(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], - per_stream_pstate_strategy[stream_index])) { - strategy_matches_forced_requirements = false; - break; - } - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - svp_count++; - set_bit_in_bitfield(&svp_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - drr_count++; - set_bit_in_bitfield(&drr_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vactive) { - vactive_count++; - set_bit_in_bitfield(&vactive_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank) { - vblank_count++; - set_bit_in_bitfield(&vblank_stream_mask, stream_index); - } - } - - if (!strategy_matches_forced_requirements) - return false; - - // Check for trivial synchronization for vblank - if (vblank_count > 0 && (pmo->options->disable_vblank || !are_timings_trivially_synchronizable(display_cfg, vblank_stream_mask))) - return false; - - if (svp_count > 0 && pmo->options->disable_svp) - return false; - - if (drr_count > 0 && (pmo->options->disable_drr_var || !are_all_timings_drr_enabled(display_cfg, drr_stream_mask))) - return false; - - // Validate for FAMS admissibiliy - if (svp_count == 0 && drr_count == 0) { - // No FAMS - admissible = true; - } else { - admissible = false; - if (svp_count > 0 && drr_count == 0 && vactive_count == 0 && vblank_count == 0) { - // All SVP - admissible = validate_svp_cofunctionality(pmo, display_cfg, svp_stream_mask); - } else if (svp_count == 0 && drr_count > 0 && vactive_count == 0 && vblank_count == 0) { - // All DRR - admissible = validate_drr_cofunctionality(pmo, display_cfg, drr_stream_mask); - } else if (svp_count > 0 && drr_count > 0 && vactive_count == 0 && vblank_count == 0) { - // SVP + DRR - admissible = validate_svp_drr_cofunctionality(pmo, display_cfg, svp_stream_mask, drr_stream_mask); - } else if (svp_count > 0 && drr_count == 0 && vactive_count == 0 && vblank_count > 0) { - // SVP + VBlank - admissible = validate_svp_vblank_cofunctionality(pmo, display_cfg, svp_stream_mask, vblank_stream_mask); - } else if (svp_count == 0 && drr_count > 0 && vactive_count == 0 && vblank_count > 0) { - // DRR + VBlank - admissible = validate_drr_vblank_cofunctionality(pmo, display_cfg, drr_stream_mask, vblank_stream_mask); - } - } - - return admissible; -} - -static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) -{ - unsigned char i; - int min_vactive_margin_us = 0xFFFFFFF; - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active < min_vactive_margin_us) - min_vactive_margin_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active; - } - } - - return min_vactive_margin_us; -} - -bool pmo_dcn4_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; - struct dml2_pmo_scratch *s = &pmo->scratch; - - struct display_configuation_with_meta *display_config; - const struct dml2_plane_parameters *plane_descriptor; - const enum dml2_pmo_pstate_strategy (*strategy_list)[4] = 0; - unsigned int strategy_list_size = 0; - unsigned int plane_index, stream_index, i; - - state->performed = true; - - display_config = in_out->base_display_config; - display_config->display_config.overrides.enable_subvp_implicit_pmo = true; - - memset(s, 0, sizeof(struct dml2_pmo_scratch)); - - pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size - 1; - pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - - // First build the stream plane mask (array of bitfields indexed by stream, indicating plane mapping) - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - plane_descriptor = &display_config->display_config.plane_descriptors[plane_index]; - - set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index); - - state->pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; - } - - // Figure out which streams can do vactive, and also build up implicit SVP meta - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= - MIN_VACTIVE_MARGIN_US) - set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index); - - s->pmo_dcn4.stream_svp_meta[stream_index].valid = true; - s->pmo_dcn4.stream_svp_meta[stream_index].v_active = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_active; - s->pmo_dcn4.stream_svp_meta[stream_index].v_total = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_total; - s->pmo_dcn4.stream_svp_meta[stream_index].v_front_porch = 1; - } - - switch (display_config->display_config.num_streams) { - case 1: - strategy_list = full_strategy_list_1_display; - strategy_list_size = full_strategy_list_1_display_size; - break; - case 2: - strategy_list = full_strategy_list_2_display; - strategy_list_size = full_strategy_list_2_display_size; - break; - case 3: - strategy_list = full_strategy_list_3_display; - strategy_list_size = full_strategy_list_3_display_size; - break; - case 4: - strategy_list = full_strategy_list_4_display; - strategy_list_size = full_strategy_list_4_display_size; - break; - default: - strategy_list_size = 0; - break; - } - - if (strategy_list_size == 0) - return false; - - s->pmo_dcn4.num_pstate_candidates = 0; - - for (i = 0; i < strategy_list_size && i < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { - if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, strategy_list[i])) { - insert_into_candidate_list(strategy_list[i], display_config->display_config.num_streams, s); - } - } - - if (s->pmo_dcn4.num_pstate_candidates > 0) { - // There's this funny case... - // If the first entry in the candidate list is all vactive, then we can consider it "tested", so the current index is 0 - // Otherwise the current index should be -1 because we run the optimization at least once - s->pmo_dcn4.cur_pstate_candidate = 0; - for (i = 0; i < display_config->display_config.num_streams; i++) { - if (s->pmo_dcn4.per_stream_pstate_strategy[0][i] != dml2_pmo_pstate_strategy_vactive) { - s->pmo_dcn4.cur_pstate_candidate = -1; - break; - } - } - return true; - } else { - return false; - } -} - -static void reset_display_configuration(struct display_configuation_with_meta *display_config) -{ - unsigned int plane_index; - unsigned int stream_index; - struct dml2_plane_parameters *plane; - - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - display_config->stage3.stream_svp_meta[stream_index].valid = false; - } - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Unset SubVP - plane->overrides.legacy_svp_config = dml2_svp_mode_override_auto; - - // Remove reserve time - plane->overrides.reserved_vblank_time_ns = 0; - - // Reset strategy to auto - plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_not_supported; - } -} - -static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - struct dml2_plane_parameters *plane; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Setup DRR - plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_drr; - } - } -} - -static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - int stream_index = -1; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom; - } - } - - if (stream_index >= 0) { - display_config->stage3.stream_svp_meta[stream_index].valid = true; - display_config->stage3.stream_svp_meta[stream_index].v_active = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_active; - display_config->stage3.stream_svp_meta[stream_index].v_total = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_total; - display_config->stage3.stream_svp_meta[stream_index].v_front_porch = 1; - } -} - -static void setup_planes_for_vblank_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - struct dml2_plane_parameters *plane; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Setup reserve time - plane->overrides.reserved_vblank_time_ns = 400 * 1000; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; - } - } -} - -static void setup_planes_for_vactive_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; - } - } -} - -static bool setup_display_config(struct display_configuation_with_meta *display_config, struct dml2_pmo_scratch *scratch, int strategy_index) -{ - bool success = true; - unsigned char stream_index; - - reset_display_configuration(display_config); - - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_na) { - success = false; - break; - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vblank) { - setup_planes_for_vblank_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - setup_planes_for_svp_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - setup_planes_for_drr_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vactive) { - setup_planes_for_vactive_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } - } - - return success; -} - -static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) -{ - int min_time_us = 0xFFFFFF; - unsigned char plane_index = 0; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - if (min_time_us > (display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000)) - min_time_us = display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; - } - } - return min_time_us; -} - -bool pmo_dcn4_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) -{ - bool p_state_supported = true; - unsigned int stream_index; - struct dml2_pmo_scratch *s = &in_out->instance->scratch; - - if (s->pmo_dcn4.cur_pstate_candidate < 0) - return false; - - for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { - - if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vactive) { - if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_US) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vblank) { - if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < - in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - if (!all_planes_match_strategy(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr)) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_na) { - p_state_supported = false; - break; - } - } - - return p_state_supported; -} - -bool pmo_dcn4_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) -{ - bool success = false; - struct dml2_pmo_scratch *s = &in_out->instance->scratch; - - memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); - - if (in_out->last_candidate_failed) { - if (s->pmo_dcn4.allow_state_increase_for_strategy[s->pmo_dcn4.cur_pstate_candidate] && - s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index) { - s->pmo_dcn4.cur_latency_index++; - - success = true; - } - } - - if (!success) { - s->pmo_dcn4.cur_latency_index = s->pmo_dcn4.min_latency_index; - s->pmo_dcn4.cur_pstate_candidate++; - - if (s->pmo_dcn4.cur_pstate_candidate < s->pmo_dcn4.num_pstate_candidates) { - success = true; - } - } - - if (success) { - in_out->optimized_display_config->stage3.min_clk_index_for_latency = s->pmo_dcn4.cur_latency_index; - setup_display_config(in_out->optimized_display_config, &in_out->instance->scratch, in_out->instance->scratch.pmo_dcn4.cur_pstate_candidate); - } - - return success; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h deleted file mode 100644 index 09cacc933d21..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#ifndef __DML2_PMO_DCN4_H__ -#define __DML2_PMO_DCN4_H__ - -#include "dml2_internal_shared_types.h" - -bool pmo_dcn4_initialize(struct dml2_pmo_initialize_in_out *in_out); - -bool pmo_dcn4_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); - -bool pmo_dcn4_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); -bool pmo_dcn4_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); -bool pmo_dcn4_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); - -bool pmo_dcn4_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); -bool pmo_dcn4_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); -bool pmo_dcn4_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); - -bool pmo_dcn4_unit_test(void); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 603036df68ba..30767f330fd4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1,122 +1,181 @@ -/* -* Copyright 2022 Advanced Micro Devices, Inc. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE. -* -* Authors: AMD -* -*/ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. #include "dml2_pmo_factory.h" -#include "dml2_pmo_dcn4.h" #include "dml2_debug.h" #include "lib_float_math.h" #include "dml2_pmo_dcn4_fams2.h" static const double MIN_VACTIVE_MARGIN_PCT = 0.25; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding -static const enum dml2_pmo_pstate_strategy base_strategy_list_1_display[][PMO_DCN4_MAX_DISPLAYS] = { +static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = { // VActive Preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then VBlank - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, - // Finally DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + // Then DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / sizeof(struct dml2_pmo_pstate_strategy); -static const enum dml2_pmo_pstate_strategy base_strategy_list_2_display[][PMO_DCN4_MAX_DISPLAYS] = { +static const struct dml2_pmo_pstate_strategy base_strategy_list_2_display[] = { // VActive only is preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then VActive + VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then VBlank only - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then SVP + VBlank - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then SVP + DRR - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then SVP + SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then DRR + VActive - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - // Then DRR + VBlank - //{ dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + // Then DRR + DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - // Finally DRR + DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / sizeof(struct dml2_pmo_pstate_strategy); -static const enum dml2_pmo_pstate_strategy base_strategy_list_3_display[][PMO_DCN4_MAX_DISPLAYS] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // All VActive +static const struct dml2_pmo_pstate_strategy base_strategy_list_3_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // VActive + 1 VBlank + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 2 VBlank + // All VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // All VBlank + // All DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // VBlank + 1 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // VBlank + 2 DRR - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // All DRR + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / sizeof(struct dml2_pmo_pstate_strategy); -static const enum dml2_pmo_pstate_strategy base_strategy_list_4_display[][PMO_DCN4_MAX_DISPLAYS] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // All VActive +static const struct dml2_pmo_pstate_strategy base_strategy_list_4_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, + .allow_state_increase = true, + }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, // VActive + 1 VBlank + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = false, + }, - //{ dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 2 VBlank + // All Vblank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = false, + }, - //{ dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 3 VBlank + // All DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, + .allow_state_increase = true, + }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // All Vblank - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 1 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 2 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 3 DRR - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // All DRR + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / sizeof(struct dml2_pmo_pstate_strategy); static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) @@ -296,9 +355,9 @@ bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_o return result; } -static enum dml2_pmo_pstate_strategy convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_strategy base_strategy) +static enum dml2_pmo_pstate_method convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_method base_strategy) { - enum dml2_pmo_pstate_strategy variant_strategy = 0; + enum dml2_pmo_pstate_method variant_strategy = 0; switch (base_strategy) { case dml2_pmo_pstate_strategy_vactive: @@ -327,11 +386,9 @@ static enum dml2_pmo_pstate_strategy convert_strategy_to_drr_variant(const enum return variant_strategy; } -static enum dml2_pmo_pstate_strategy(*get_expanded_strategy_list( - struct dml2_pmo_init_data *init_data, - int stream_count))[PMO_DCN4_MAX_DISPLAYS] +static struct dml2_pmo_pstate_strategy *get_expanded_strategy_list(struct dml2_pmo_init_data *init_data, int stream_count) { - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; switch (stream_count) { case 1: @@ -361,23 +418,23 @@ static unsigned int get_num_expanded_strategies( } static void insert_strategy_into_expanded_list( - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_init_data *init_data) { - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; expanded_strategy_list = get_expanded_strategy_list(init_data, stream_count); if (expanded_strategy_list) { - memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++], - per_stream_pstate_strategy, - sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); + memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + + init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++; } } static void expand_base_strategy(struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy base_strategy_list[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *base_strategy, unsigned int stream_count) { bool skip_to_next_stream; @@ -386,19 +443,21 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, unsigned int i, j; unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; unsigned int stream_iteration_indices[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - enum dml2_pmo_pstate_strategy cur_strategy_list[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + struct dml2_pmo_pstate_strategy cur_strategy_list = { 0 }; /* determine number of displays per method */ for (i = 0; i < stream_count; i++) { /* increment the count of the earliest index with the same method */ for (j = 0; j < stream_count; j++) { - if (base_strategy_list[i] == base_strategy_list[j]) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { num_streams_per_method[j] = num_streams_per_method[j] + 1; break; } } } + cur_strategy_list.allow_state_increase = base_strategy->allow_state_increase; + i = 0; /* uses a while loop instead of recursion to build permutations of base strategy */ while (stream_iteration_indices[0] < stream_count) { @@ -409,12 +468,12 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, /* determine what to do for this iteration */ if (stream_iteration_indices[i] < stream_count && num_streams_per_method[stream_iteration_indices[i]] != 0) { /* decrement count and assign method */ - cur_strategy_list[i] = base_strategy_list[stream_iteration_indices[i]]; + cur_strategy_list.per_stream_pstate_method[i] = base_strategy->per_stream_pstate_method[stream_iteration_indices[i]]; num_streams_per_method[stream_iteration_indices[i]] -= 1; if (i >= stream_count - 1) { /* insert into strategy list */ - insert_strategy_into_expanded_list(cur_strategy_list, stream_count, &pmo->init_data); + insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, &pmo->init_data); expanded_strategy_added = true; } else { /* skip to next stream */ @@ -450,55 +509,122 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, } } -static void expand_variant_strategy(struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy base_strategy_list[PMO_DCN4_MAX_DISPLAYS], + +static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_strategy, + const struct dml2_pmo_pstate_strategy *variant_strategy, + unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS], + unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], unsigned int stream_count) { + bool valid = true; unsigned int i; - bool variant_found = false; - enum dml2_pmo_pstate_strategy cur_strategy_list[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - - /* setup variant list as base to start */ - memcpy(cur_strategy_list, base_strategy_list, sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); - + /* check all restrictions are met */ for (i = 0; i < stream_count; i++) { - cur_strategy_list[i] = convert_strategy_to_drr_variant(base_strategy_list[i]); + /* vblank + vblank_drr variants are invalid */ + if (base_strategy->per_stream_pstate_method[i] == dml2_pmo_pstate_strategy_vblank && + ((num_streams_per_base_method[i] > 0 && num_streams_per_variant_method[i] > 0) || + num_streams_per_variant_method[i] > 1)) { + valid = false; + break; + } + } - if (cur_strategy_list[i] != base_strategy_list[i]) { - variant_found = true; + return valid; +} + +static void expand_variant_strategy(struct dml2_pmo_instance *pmo, + const struct dml2_pmo_pstate_strategy *base_strategy, + unsigned int stream_count) +{ + bool variant_found; + unsigned int i, j; + unsigned int method_index; + unsigned int stream_index; + unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + enum dml2_pmo_pstate_method per_stream_variant_method[DML2_MAX_PLANES]; + struct dml2_pmo_pstate_strategy variant_strategy = { 0 }; + + /* determine number of displays per method */ + for (i = 0; i < stream_count; i++) { + /* increment the count of the earliest index with the same method */ + for (j = 0; j < stream_count; j++) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { + num_streams_per_method[j] = num_streams_per_method[j] + 1; + break; + } } - if (i == stream_count - 1 && variant_found) { - insert_strategy_into_expanded_list(cur_strategy_list, stream_count, &pmo->init_data); + per_stream_variant_method[i] = convert_strategy_to_drr_variant(base_strategy->per_stream_pstate_method[i]); + } + memcpy(num_streams_per_base_method, num_streams_per_method, sizeof(unsigned int) * PMO_DCN4_MAX_DISPLAYS); + + memcpy(&variant_strategy, base_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + + method_index = 0; + /* uses a while loop instead of recursion to build permutations of base strategy */ + while (num_streams_per_base_method[0] > 0 || method_index != 0) { + if (method_index == stream_count) { + /* construct variant strategy */ + variant_found = false; + stream_index = 0; + + for (i = 0; i < stream_count; i++) { + for (j = 0; j < num_streams_per_base_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = base_strategy->per_stream_pstate_method[i]; + } + + for (j = 0; j < num_streams_per_variant_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = per_stream_variant_method[i]; + if (base_strategy->per_stream_pstate_method[i] != per_stream_variant_method[i]) { + variant_found = true; + } + } + } + + if (variant_found && is_variant_method_valid(base_strategy, &variant_strategy, num_streams_per_base_method, num_streams_per_variant_method, stream_count)) { + expand_base_strategy(pmo, &variant_strategy, stream_count); + } + + /* rollback to earliest method with bases remaining */ + for (method_index = stream_count - 1; method_index > 0; method_index--) { + if (num_streams_per_base_method[method_index]) { + /* bases remaining */ + break; + } else { + /* reset counters */ + num_streams_per_base_method[method_index] = num_streams_per_method[method_index]; + num_streams_per_variant_method[method_index] = 0; + } + } + } + + if (num_streams_per_base_method[method_index]) { + num_streams_per_base_method[method_index]--; + num_streams_per_variant_method[method_index]++; + + method_index++; + } else if (method_index != 0) { + method_index++; } } } static void expand_base_strategies( struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy(*base_strategies_list)[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *base_strategies_list, const unsigned int num_base_strategies, unsigned int stream_count) { unsigned int i; - unsigned int num_pre_variant_strategies; - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS]; /* expand every explicit base strategy (except all DRR) */ - for (i = 0; i < num_base_strategies - 1; i++) { - expand_base_strategy(pmo, base_strategies_list[i], stream_count); + for (i = 0; i < num_base_strategies; i++) { + expand_base_strategy(pmo, &base_strategies_list[i], stream_count); + expand_variant_strategy(pmo, &base_strategies_list[i], stream_count); } - - /* expand base strategies to DRR variants */ - num_pre_variant_strategies = get_num_expanded_strategies(&pmo->init_data, stream_count); - expanded_strategy_list = get_expanded_strategy_list(&pmo->init_data, stream_count); - for (i = 0; i < num_pre_variant_strategies; i++) { - expand_variant_strategy(pmo, expanded_strategy_list[i], stream_count); - } - - /* add back all DRR */ - insert_strategy_into_expanded_list(base_strategies_list[num_base_strategies - 1], stream_count, &pmo->init_data); } bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) @@ -591,6 +717,8 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) &in_out->base_display_config->display_config; const struct dml2_core_mode_support_result *mode_support_result = &in_out->base_display_config->mode_support_result; + struct dml2_optimization_stage4_state *state = + &in_out->base_display_config->stage4; if (in_out->instance->options->disable_dyn_odm || (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) @@ -611,28 +739,30 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) */ if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; for (i = 0; i < display_config->num_streams; i++) { if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * ODM Combine requires horizontal timing divisible by 2 so each * ODM segment has the same size. */ else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * Our hardware support seamless ODM transitions for DP encoders * only. */ else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; } + state->performed = true; + return true; } @@ -783,6 +913,7 @@ static void build_synchronized_timing_groups( /* clear all group masks */ memset(s->pmo_dcn4.synchronized_timing_group_masks, 0, sizeof(s->pmo_dcn4.synchronized_timing_group_masks)); memset(s->pmo_dcn4.group_is_drr_enabled, 0, sizeof(s->pmo_dcn4.group_is_drr_enabled)); + memset(s->pmo_dcn4.group_is_drr_active, 0, sizeof(s->pmo_dcn4.group_is_drr_active)); memset(s->pmo_dcn4.group_line_time_us, 0, sizeof(s->pmo_dcn4.group_line_time_us)); s->pmo_dcn4.num_timing_groups = 0; @@ -804,6 +935,8 @@ static void build_synchronized_timing_groups( /* if drr is in use, timing is not sychnronizable */ if (master_timing->drr_config.enabled) { s->pmo_dcn4.group_is_drr_enabled[timing_group_idx] = true; + s->pmo_dcn4.group_is_drr_active[timing_group_idx] = !master_timing->drr_config.disallowed && + (master_timing->drr_config.drr_active_fixed || master_timing->drr_config.drr_active_variable); continue; } @@ -883,7 +1016,7 @@ static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, stream_descriptor = &display_config->display_config.stream_descriptors[i]; stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[i]; - if (!stream_descriptor->timing.drr_config.enabled || stream_descriptor->overrides.disable_fams2_drr) + if (!stream_descriptor->timing.drr_config.enabled) return false; /* cannot support required vtotal */ @@ -966,35 +1099,24 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, return true; } -static void insert_into_candidate_list(const enum dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) +static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) { - int stream_index; - - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = true; - - for (stream_index = 0; stream_index < stream_count; stream_index++) { - scratch->pmo_dcn4.per_stream_pstate_strategy[scratch->pmo_dcn4.num_pstate_candidates][stream_index] = per_stream_pstate_strategy[stream_index]; - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = false; - } - + scratch->pmo_dcn4.pstate_strategy_candidates[scratch->pmo_dcn4.num_pstate_candidates] = *pstate_strategy; scratch->pmo_dcn4.num_pstate_candidates++; } -static bool all_planes_match_strategy(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_strategy strategy) +static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_method method) { unsigned char i; enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; - if (strategy == dml2_pmo_pstate_strategy_vactive || strategy == dml2_pmo_pstate_strategy_fw_vactive_drr) + if (method == dml2_pmo_pstate_strategy_vactive || method == dml2_pmo_pstate_strategy_fw_vactive_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; - else if (strategy == dml2_pmo_pstate_strategy_vblank || strategy == dml2_pmo_pstate_strategy_fw_vblank_drr) + else if (method == dml2_pmo_pstate_strategy_vblank || method == dml2_pmo_pstate_strategy_fw_vblank_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; - else if (strategy == dml2_pmo_pstate_strategy_fw_svp) + else if (method == dml2_pmo_pstate_strategy_fw_svp) matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; - else if (strategy == dml2_pmo_pstate_strategy_fw_drr) + else if (method == dml2_pmo_pstate_strategy_fw_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; for (i = 0; i < DML2_MAX_PLANES; i++) { @@ -1026,12 +1148,12 @@ static void build_method_scheduling_params( static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( struct dml2_pmo_instance *pmo, - enum dml2_pmo_pstate_strategy stream_pstate_strategy, + enum dml2_pmo_pstate_method stream_pstate_method, int stream_idx) { struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta = NULL; - switch (stream_pstate_strategy) { + switch (stream_pstate_method) { case dml2_pmo_pstate_strategy_vactive: case dml2_pmo_pstate_strategy_fw_vactive_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vactive.common; @@ -1062,7 +1184,7 @@ static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( static bool is_timing_group_schedulable( struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *pstate_strategy, const unsigned int timing_group_idx, struct dml2_fams2_per_method_common_meta *group_fams2_meta) { @@ -1081,7 +1203,7 @@ static bool is_timing_group_schedulable( } /* init allow start and end lines for timing group */ - stream_method_fams2_meta = get_per_method_common_meta(pmo, per_stream_pstate_strategy[base_stream_idx], base_stream_idx); + stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx); if (!stream_method_fams2_meta) return false; @@ -1090,9 +1212,9 @@ static bool is_timing_group_schedulable( group_fams2_meta->period_us = stream_method_fams2_meta->period_us; for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) { if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { - stream_method_fams2_meta = get_per_method_common_meta(pmo, per_stream_pstate_strategy[i], i); + stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); if (!stream_method_fams2_meta) - continue; + return false; if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) { /* set group allow start to larger otg vline */ @@ -1122,7 +1244,7 @@ static bool is_timing_group_schedulable( static bool is_config_schedulable( struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS]) + const struct dml2_pmo_pstate_strategy *pstate_strategy) { unsigned int i, j; bool schedulable; @@ -1145,7 +1267,7 @@ static bool is_config_schedulable( for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { s->pmo_dcn4.sorted_group_gtl_disallow_index[i] = i; s->pmo_dcn4.sorted_group_gtl_period_index[i] = i; - if (!is_timing_group_schedulable(pmo, display_cfg, per_stream_pstate_strategy, i, &s->pmo_dcn4.group_common_fams2_meta[i])) { + if (!is_timing_group_schedulable(pmo, display_cfg, pstate_strategy, i, &s->pmo_dcn4.group_common_fams2_meta[i])) { /* synchronized timing group was not schedulable */ schedulable = false; break; @@ -1247,7 +1369,7 @@ static bool is_config_schedulable( unsigned int sorted_ip1 = s->pmo_dcn4.sorted_group_gtl_period_index[i + 1]; if (s->pmo_dcn4.group_common_fams2_meta[sorted_i].allow_time_us < s->pmo_dcn4.group_common_fams2_meta[sorted_ip1].period_us || - s->pmo_dcn4.group_is_drr_enabled[sorted_ip1]) { + (s->pmo_dcn4.group_is_drr_enabled[sorted_ip1] && s->pmo_dcn4.group_is_drr_active[sorted_ip1])) { schedulable = false; break; } @@ -1259,8 +1381,8 @@ static bool is_config_schedulable( /* STAGE 4: When using HW exclusive modes, check disallow alignments are within allowed threshold */ if (s->pmo_dcn4.num_timing_groups == 2 && - !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, per_stream_pstate_strategy[0]) && - !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, per_stream_pstate_strategy[1])) { + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[0]) && + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[1])) { double period_ratio; double max_shift_us; double shift_per_period; @@ -1289,44 +1411,48 @@ static bool is_config_schedulable( } static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy stream_pstate_strategy, - unsigned int stream_index) + const struct display_configuation_with_meta *display_cfg, + const enum dml2_pmo_pstate_method stream_pstate_method, + unsigned int stream_index) { const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index]; bool strategy_matches_drr_requirements = true; /* check if strategy is compatible with stream drr capability and strategy */ - if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_strategy) && + if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && display_cfg->display_config.num_streams > 1 && stream_descriptor->timing.drr_config.enabled && (stream_descriptor->timing.drr_config.drr_active_fixed || stream_descriptor->timing.drr_config.drr_active_variable)) { /* DRR is active, so config may become unschedulable */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_strategy) && - is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && + is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && stream_descriptor->timing.drr_config.enabled && stream_descriptor->timing.drr_config.drr_active_variable) { /* DRR is variable, fw exclusive methods require DRR to be clamped */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && pmo->options->disable_drr_var_when_var_active && stream_descriptor->timing.drr_config.enabled && stream_descriptor->timing.drr_config.drr_active_variable) { /* DRR variable is active, but policy blocks DRR for p-state when this happens */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && (pmo->options->disable_drr_var || !stream_descriptor->timing.drr_config.enabled || stream_descriptor->timing.drr_config.disallowed)) { /* DRR variable strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_strategy) && - (pmo->options->disable_drr_clamped || - !stream_descriptor->timing.drr_config.enabled)) { + } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && + (pmo->options->disable_drr_clamped || + (!stream_descriptor->timing.drr_config.enabled || + (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable)) || + (pmo->options->disable_drr_clamped_when_var_active && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable))) { /* DRR fixed strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && pmo->options->disable_fams2) { /* FW modes require FAMS2 */ strategy_matches_drr_requirements = false; @@ -1337,7 +1463,7 @@ static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS]) + const struct dml2_pmo_pstate_strategy *pstate_strategy) { struct dml2_pmo_scratch *s = &pmo->scratch; @@ -1358,28 +1484,28 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins // Tabulate everything for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (!all_planes_match_strategy(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], - per_stream_pstate_strategy[stream_index])) { + if (!all_planes_match_method(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], + pstate_strategy->per_stream_pstate_method[stream_index])) { strategy_matches_forced_requirements = false; break; } strategy_matches_drr_requirements &= - stream_matches_drr_policy(pmo, display_cfg, per_stream_pstate_strategy[stream_index], stream_index); + stream_matches_drr_policy(pmo, display_cfg, pstate_strategy->per_stream_pstate_method[stream_index], stream_index); - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { svp_count++; set_bit_in_bitfield(&svp_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { drr_count++; set_bit_in_bitfield(&drr_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vactive || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { vactive_count++; set_bit_in_bitfield(&vactive_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { vblank_count++; set_bit_in_bitfield(&vblank_stream_mask, stream_index); } @@ -1388,7 +1514,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins if (!strategy_matches_forced_requirements || !strategy_matches_drr_requirements) return false; - if (vactive_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask))) + if (vactive_count > 0 && !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask)) return false; if (vblank_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vblank(pmo, display_cfg, vblank_stream_mask))) @@ -1400,7 +1526,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins if (svp_count > 0 && (pmo->options->disable_svp || !all_timings_support_svp(pmo, display_cfg, svp_stream_mask))) return false; - return is_config_schedulable(pmo, display_cfg, per_stream_pstate_strategy); + return is_config_schedulable(pmo, display_cfg, pstate_strategy); } static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) @@ -1456,6 +1582,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, (stream_fams2_meta->nom_vtotal * timing->h_total); stream_fams2_meta->nom_frame_time_us = (double)stream_fams2_meta->nom_vtotal * stream_fams2_meta->otg_vline_time_us; + stream_fams2_meta->vblank_start = timing->v_blank_end + timing->v_active; if (stream_descriptor->timing.drr_config.enabled == true) { if (stream_descriptor->timing.drr_config.min_refresh_uhz != 0.0) { @@ -1509,7 +1636,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_vactive.common.allow_start_otg_vline = timing->v_blank_end + stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; stream_fams2_meta->method_vactive.common.allow_end_otg_vline = - timing->v_blank_end + timing->v_active - + stream_fams2_meta->vblank_start - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } else { stream_fams2_meta->method_vactive.common.allow_start_otg_vline = 0; @@ -1519,8 +1646,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, build_method_scheduling_params(&stream_fams2_meta->method_vactive.common, stream_fams2_meta); /* vblank */ - stream_fams2_meta->method_vblank.common.allow_start_otg_vline = - timing->v_blank_end + timing->v_active; + stream_fams2_meta->method_vblank.common.allow_start_otg_vline = stream_fams2_meta->vblank_start; stream_fams2_meta->method_vblank.common.allow_end_otg_vline = stream_fams2_meta->method_vblank.common.allow_start_otg_vline + 1; stream_fams2_meta->method_vblank.common.period_us = stream_fams2_meta->nom_frame_time_us; @@ -1554,8 +1680,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + stream_fams2_meta->allow_to_target_delay_otg_vlines; stream_fams2_meta->method_subvp.common.allow_end_otg_vline = - stream_fams2_meta->nom_vtotal - - timing->v_front_porch - + stream_fams2_meta->vblank_start - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; stream_fams2_meta->method_subvp.common.period_us = stream_fams2_meta->nom_frame_time_us; build_method_scheduling_params(&stream_fams2_meta->method_subvp.common, stream_fams2_meta); @@ -1564,20 +1689,21 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_drr.programming_delay_otg_vlines = (unsigned int)math_ceil(ip_caps->fams2.drr_programming_delay_us / stream_fams2_meta->otg_vline_time_us); stream_fams2_meta->method_drr.common.allow_start_otg_vline = - stream_fams2_meta->nom_vtotal + + stream_fams2_meta->vblank_start + stream_fams2_meta->allow_to_target_delay_otg_vlines; stream_fams2_meta->method_drr.common.period_us = stream_fams2_meta->nom_frame_time_us; if (display_config->display_config.num_streams <= 1) { /* only need to stretch vblank for blackout time */ stream_fams2_meta->method_drr.stretched_vtotal = - stream_fams2_meta->method_drr.common.allow_start_otg_vline + + stream_fams2_meta->nom_vtotal + + stream_fams2_meta->allow_to_target_delay_otg_vlines + stream_fams2_meta->min_allow_width_otg_vlines + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } else { /* multi display needs to always be schedulable */ stream_fams2_meta->method_drr.stretched_vtotal = - stream_fams2_meta->method_drr.common.allow_start_otg_vline + - stream_fams2_meta->nom_vtotal + + stream_fams2_meta->nom_vtotal * 2 + + stream_fams2_meta->allow_to_target_delay_otg_vlines + stream_fams2_meta->min_allow_width_otg_vlines + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } @@ -1610,7 +1736,7 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp struct display_configuation_with_meta *display_config; const struct dml2_plane_parameters *plane_descriptor; - const enum dml2_pmo_pstate_strategy(*strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + const struct dml2_pmo_pstate_strategy *strategy_list = NULL; unsigned int strategy_list_size = 0; unsigned char plane_index, stream_index, i; @@ -1622,6 +1748,10 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp memset(s, 0, sizeof(struct dml2_pmo_scratch)); + if (display_config->display_config.overrides.all_streams_blanked) { + return true; + } + pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size; pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; @@ -1651,6 +1781,9 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp build_synchronized_timing_groups(pmo, display_config); strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); + if (!strategy_list) + return false; + strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); if (strategy_list_size == 0) @@ -1659,8 +1792,8 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp s->pmo_dcn4.num_pstate_candidates = 0; for (i = 0; i < strategy_list_size && s->pmo_dcn4.num_pstate_candidates < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { - if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, strategy_list[i])) { - insert_into_candidate_list(strategy_list[i], display_config->display_config.num_streams, s); + if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, &strategy_list[i])) { + insert_into_candidate_list(&strategy_list[i], display_config->display_config.num_streams, s); } } @@ -1777,7 +1910,8 @@ static void setup_planes_for_vblank_by_mask(struct display_configuation_with_met if (is_bit_set_in_bitfield(plane_mask, plane_index)) { plane = &display_config->display_config.plane_descriptors[plane_index]; - plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); + plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0, + plane->overrides.reserved_vblank_time_ns); display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; @@ -1856,26 +1990,26 @@ static bool setup_display_config(struct display_configuation_with_meta *display_ for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (pmo->scratch.pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_na) { + if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { success = false; break; - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vactive) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive) { setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vblank) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank) { setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp) { fams2_required = true; setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { fams2_required = true; setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { fams2_required = true; setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { fams2_required = true; setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { fams2_required = true; setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); } @@ -1916,6 +2050,10 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp int MIN_VACTIVE_MARGIN_DRR = 0; int REQUIRED_RESERVED_TIME = 0; + if (in_out->base_display_config->display_config.overrides.all_streams_blanked) { + return true; + } + MIN_VACTIVE_MARGIN_VBLANK = INT_MIN; MIN_VACTIVE_MARGIN_DRR = INT_MIN; REQUIRED_RESERVED_TIME = (int)in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; @@ -1926,34 +2064,34 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { struct dml2_fams2_meta *stream_fams2_meta = &s->pmo_dcn4.stream_fams2_meta[stream_index]; - if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vactive || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vblank || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < REQUIRED_RESERVED_TIME || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - if (!all_planes_match_strategy(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) || + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_na) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { p_state_supported = false; break; } @@ -1970,8 +2108,8 @@ bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pst memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); if (in_out->last_candidate_failed) { - if (s->pmo_dcn4.allow_state_increase_for_strategy[s->pmo_dcn4.cur_pstate_candidate] && - s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].allow_state_increase && + s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index - 1) { s->pmo_dcn4.cur_latency_index++; success = true; @@ -2059,15 +2197,15 @@ bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in unsigned int i; - for (i = 0; i < in_out->base_display_config->display_config.num_streams; i++) { + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && pmo->scratch.pmo_dcn4.z8_vblank_optimizable && - in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) { + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * 1000) { success = false; break; } if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && - in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) { + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * 1000) { success = false; break; } @@ -2086,8 +2224,11 @@ bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in if (!in_out->last_candidate_failed) { if (pmo->scratch.pmo_dcn4.cur_stutter_candidate < pmo->scratch.pmo_dcn4.num_stutter_candidates) { - for (i = 0; i < in_out->optimized_display_config->display_config.num_streams; i++) { - in_out->optimized_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us = pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate]; + for (i = 0; i < in_out->optimized_display_config->display_config.num_planes; i++) { + /* take the max of the current and the optimal reserved time */ + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns = + (long)math_max2(pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate] * 1000, + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns); } success = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h index 75175d93add4..0c25bd3e9ac0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_FAMS2_DCN4_H__ #define __DML2_PMO_FAMS2_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c index e0b9ece7901d..add51d41a515 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -2,10 +2,8 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_pmo_factory.h" #include "dml2_pmo_dcn4_fams2.h" -#include "dml2_pmo_dcn4.h" #include "dml2_pmo_dcn3.h" #include "dml2_external_lib_deps.h" @@ -28,15 +26,15 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance * { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_pmo_instance)); switch (project_id) { case dml2_project_dcn4x_stage1: - out->initialize = pmo_dcn4_initialize; - out->optimize_dcc_mcache = pmo_dcn4_optimize_dcc_mcache; + out->initialize = pmo_dcn4_fams2_initialize; + out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache; result = true; break; case dml2_project_dcn4x_stage2: diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h index 9d3dc5e94be1..7218de1824cc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_FACTORY_H__ #define __DML2_PMO_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c index e73579f1a88e..e17b5ceba447 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "lib_float_math.h" #define ASSERT(condition) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h index 537cf6fd4c15..e13b0c5939b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __LIB_FLOAT_MATH_H__ #define __LIB_FLOAT_MATH_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c index 1b6dbfaa7ae8..dc8af4dd0410 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_top_optimization.h" #include "dml2_internal_shared_types.h" #include "dml_top_mcache.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h index 1536afcbf73a..9f22ab33eab1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_TOP_OPTIMIZATION_H__ #define __DML2_TOP_OPTIMIZATION_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index 2fb3e2f45e07..f9f8869cd8b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_internal_shared_types.h" #include "dml_top.h" #include "dml2_mcg_factory.h" @@ -28,6 +27,7 @@ bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out) bool result = false; memset(l, 0, sizeof(struct dml2_initialize_instance_locals)); + memset(dml, 0, sizeof(struct dml2_instance)); memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb)); @@ -96,14 +96,12 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) { struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals; - /* Borrow the build_mode_programming_locals programming struct for DPMM call. */ - struct dml2_display_cfg_programming *dpmm_programming = dml->scratch.build_mode_programming_locals.mode_programming_params.programming; + struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming; bool result = false; bool mcache_success = false; - if (dpmm_programming) - memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); + memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); @@ -130,7 +128,7 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) /* * Call DPMM to map all requirements to minimum clock state */ - if (result && dpmm_programming) { + if (result) { l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; l->dppm_map_mode_params.programming = dpmm_programming; @@ -268,9 +266,18 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); - if (vmin_success) { + if (l->optimized_display_config_with_meta.stage4.performed) { + /* + * when performed is true, optimization has applied to + * optimized_display_config_with_meta and it has passed mode + * support. However it may or may not pass the test function to + * reach actual Vmin. As long as voltage is optimized even if it + * doesn't reach Vmin level, there is still power benefit so in + * this case we will still copy this optimization into base + * display config. + */ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage4.success = true; + l->base_display_config_with_meta.stage4.success = vmin_success; } /* diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c index 7afd417071a5..a342ebfbe4e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_debug.h" #include "dml_top_mcache.h" @@ -143,12 +142,12 @@ static unsigned int count_elements_in_span(int *array, unsigned int array_size, while (span_start_index < array_size) { for (i = span_start_index; i < array_size; i++) { - if (array[i] - span_start_value > span) { + if (array[i] - span_start_value <= span) { if (i - span_start_index + 1 > greatest_element_count) { greatest_element_count = i - span_start_index + 1; } + } else break; - } } span_start_index++; @@ -208,9 +207,9 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi int temp, p0shift, p1shift; unsigned int plane_index = 0; unsigned int i; - char odm_combine_factor = 1; - char mpc_combine_factor = 1; - char num_dpps; + unsigned int odm_combine_factor; + unsigned int mpc_combine_factor; + unsigned int num_dpps; unsigned int num_boundaries; enum dml2_scaling_transform scaling_transform; const struct dml2_plane_parameters *plane; @@ -227,10 +226,10 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi plane = ¶ms->display_cfg->plane_descriptors[plane_index]; stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; - odm_combine_factor = (char)params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; + num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; if (odm_combine_factor == 1) - mpc_combine_factor = (char)params->cfg_support_info->plane_support_info[plane_index].dpps_used; + num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used; else mpc_combine_factor = 1; @@ -260,13 +259,13 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi // The last element in the unshifted boundary array will always be the first pixel outside the // plane, which means theres no mcache associated with it, so -1 num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; - if (count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, - num_boundaries, max_per_pipe_vp_p0) <= 1) { + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) { p0pass = true; } num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; - if (count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, - num_boundaries, max_per_pipe_vp_p1) <= 1) { + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) { p1pass = true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h index bb12e4c30690..7b1f6f7143d0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_MCACHE_H__ #define __DML_TOP_MCACHE_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c index de7d8a6a2d3d..e9b8e10695ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_debug.h" int dml2_printf(const char *format, ...) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h index 0403238df107..d51a1b6c62f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DEBUG_H__ #define __DML2_DEBUG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h index 5632cdacb7f4..aeac9f159fa5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_INTERNAL_SHARED_TYPES_H__ #define __DML2_INTERNAL_SHARED_TYPES_H__ @@ -107,10 +106,16 @@ struct dml2_dpmm_map_watermarks_params_in_out { struct dml2_display_cfg_programming *programming; }; +struct dml2_dpmm_scratch { + struct dml2_display_cfg_programming programming; +}; + struct dml2_dpmm_instance { bool (*map_mode_to_soc_dpm)(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); bool (*map_watermarks)(struct dml2_dpmm_map_watermarks_params_in_out *in_out); bool (*unit_test)(void); + + struct dml2_dpmm_scratch dpmm_scratch; }; /* @@ -266,6 +271,7 @@ struct dml2_fams2_meta { unsigned int contention_delay_otg_vlines; unsigned int min_allow_width_otg_vlines; unsigned int nom_vtotal; + unsigned int vblank_start; double nom_refresh_rate_hz; double nom_frame_time_us; unsigned int max_vtotal; @@ -594,7 +600,7 @@ struct dml2_pmo_optimize_for_stutter_in_out { struct display_configuation_with_meta *optimized_display_config; }; -enum dml2_pmo_pstate_strategy { +enum dml2_pmo_pstate_method { dml2_pmo_pstate_strategy_na = 0, /* hw exclusive modes */ dml2_pmo_pstate_strategy_vactive = 1, @@ -612,6 +618,11 @@ enum dml2_pmo_pstate_strategy { dml2_pmo_pstate_strategy_reserved_fw_drr_var = 22, }; +struct dml2_pmo_pstate_strategy { + enum dml2_pmo_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; + bool allow_state_increase; +}; + #define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw - dml2_pmo_pstate_strategy_na + 1)) - 1) << dml2_pmo_pstate_strategy_na) #define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) #define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_clamped - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) @@ -634,8 +645,7 @@ struct dml2_pmo_scratch { int stream_mask; } pmo_dcn3; struct { - enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[DML2_MAX_PLANES][DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; - bool allow_state_increase_for_strategy[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; + struct dml2_pmo_pstate_strategy pstate_strategy_candidates[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; int num_pstate_candidates; int cur_pstate_candidate; @@ -661,6 +671,7 @@ struct dml2_pmo_scratch { unsigned int num_timing_groups; unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; bool group_is_drr_enabled[DML2_MAX_PLANES]; + bool group_is_drr_active[DML2_MAX_PLANES]; double group_line_time_us[DML2_MAX_PLANES]; /* scheduling check locals */ @@ -676,10 +687,10 @@ struct dml2_pmo_init_data { union { struct { /* populated once during initialization */ - enum dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2 * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 24 * 2][PMO_DCN4_MAX_DISPLAYS]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 4 * 4]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 6 * 6]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 8 * 8 * 8 * 8]; unsigned int num_expanded_strategies_per_list[PMO_DCN4_MAX_DISPLAYS]; } pmo_dcn4; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h index b566f53608c6..3ba184be25d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h @@ -101,6 +101,7 @@ struct dml2_wrapper_scratch { struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; bool enable_flexible_pipe_mapping; bool plane_duplicate_exists; + unsigned int dp2_mst_stream_count; }; struct dml2_helper_det_policy_scratch { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 8b9dcee77266..7e39873832bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -733,7 +733,8 @@ static void populate_dml_timing_cfg_from_stream_state(struct dml_timing_cfg_st * } static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *out, unsigned int location, - const struct dc_stream_state *in, const struct pipe_ctx *pipe) + const struct dc_stream_state *in, const struct pipe_ctx *pipe, + unsigned int dp2_mst_stream_count) { unsigned int output_bpc; @@ -746,7 +747,7 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_DISPLAY_PORT: out->OutputEncoder[location] = dml_dp; - if (is_dp2p0_output_encoder(pipe)) + if (is_dp2p0_output_encoder(pipe, dp2_mst_stream_count)) out->OutputEncoder[location] = dml_dp2p0; break; case SIGNAL_TYPE_EDP: @@ -953,7 +954,9 @@ static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc memcpy(out, &temp_pipe->plane_res.scl_data, sizeof(*out)); } -static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_stream_state *in) +static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_stream_state *in, + const struct soc_bounding_box_st *soc) { dml_uint_t width, height; @@ -970,7 +973,7 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; - out->GPUVMMinPageSizeKBytes[location] = 256; + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; out->ViewportWidth[location] = width; out->ViewportHeight[location] = height; @@ -1007,7 +1010,9 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned out->ScalerEnabled[location] = false; } -static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, struct dc_state *context) +static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_plane_state *in, struct dc_state *context, + const struct soc_bounding_box_st *soc) { struct scaler_data *scaler_data = kzalloc(sizeof(*scaler_data), GFP_KERNEL); if (!scaler_data) @@ -1018,7 +1023,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; - out->GPUVMMinPageSizeKBytes[location] = 256; + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; out->ViewportWidth[location] = scaler_data->viewport.width; out->ViewportHeight[location] = scaler_data->viewport.height; @@ -1193,6 +1198,37 @@ static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, plane_index = 0; } } + +static unsigned int calculate_dp2_mst_stream_count(struct dc_state *context) +{ + int i, j; + unsigned int dp2_mst_stream_count = 0; + + for (i = 0; i < context->stream_count; i++) { + struct dc_stream_state *stream = context->streams[i]; + + if (!stream || stream->signal != SIGNAL_TYPE_DISPLAY_PORT_MST) + continue; + + for (j = 0; j < MAX_PIPES; j++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; + + if (!pipe_ctx || !pipe_ctx->stream) + continue; + + if (stream != pipe_ctx->stream) + continue; + + if (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc) { + dp2_mst_stream_count++; + break; + } + } + } + + return dp2_mst_stream_count; +} + static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cfg_st *out, unsigned int location, const struct dc_stream_state *in) { @@ -1255,6 +1291,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (dml2->v20.dml_core_ctx.ip.hostvm_enable) dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; + dml2->v20.scratch.dp2_mst_stream_count = calculate_dp2_mst_stream_count(context); dml2_populate_pipe_to_plane_index_mapping(dml2, context); for (i = 0; i < context->stream_count; i++) { @@ -1276,7 +1313,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); /*Call site for populate_dml_writeback_cfg_from_stream_state*/ populate_dml_writeback_cfg_from_stream_state(&dml_dispcfg->writeback, disp_cfg_stream_location, context->streams[i]); @@ -1299,7 +1336,8 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat disp_cfg_plane_location = dml_dispcfg->num_surfaces++; populate_dummy_dml_surface_cfg(&dml_dispcfg->surface, disp_cfg_plane_location, context->streams[i]); - populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location, context->streams[i]); + populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location, + context->streams[i], &dml2->v20.dml_core_ctx.soc); dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; @@ -1315,7 +1353,10 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); - populate_dml_plane_cfg_from_plane_state(&dml_dispcfg->plane, disp_cfg_plane_location, context->stream_status[i].plane_states[j], context); + populate_dml_plane_cfg_from_plane_state( + &dml_dispcfg->plane, disp_cfg_plane_location, + context->stream_status[i].plane_states[j], context, + &dml2->v20.dml_core_ctx.soc); if (stream_mall_type == SUBVP_MAIN) { dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; @@ -1337,7 +1378,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (j >= 1) { populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); switch (context->streams[i]->debug.force_odm_combine_segments) { case 2: dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h index d764773938f4..55659b22d87f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h @@ -36,6 +36,6 @@ void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states); void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out); -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe); +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe, unsigned int dp2_mst_stream_count); #endif //__DML2_TRANSLATION_HELPER_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 92238ff333a4..9e8ff3a9718e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -153,7 +153,7 @@ unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, e } } -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx, unsigned int dp2_mst_stream_count) { if (pipe_ctx == NULL || pipe_ctx->stream == NULL) return false; @@ -162,7 +162,7 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ - if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) { + if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0] && dp2_mst_stream_count > 1) { return (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc && dc_is_dp_signal(pipe_ctx->stream->signal) && @@ -181,7 +181,7 @@ bool is_dtbclk_required(const struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; - if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i])) + if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i], context->bw_ctx.dml2->v20.scratch.dp2_mst_stream_count)) return true; } return false; @@ -421,7 +421,7 @@ unsigned int dml2_calc_max_scaled_time( void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx) { - int i, j = 0;; + int i, j = 0; struct mcif_arb_params *wb_arb_params = NULL; struct dcn_bw_writeback *bw_writeback = NULL; enum mmhubbub_wbif_mode wbif_mode = PACKED_444_FP16; /*for now*/ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index d5dcc8b77281..866b0abcff1b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -575,7 +575,7 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s unsigned int lowest_state_idx = 0; out_clks.p_state_supported = true; - out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dispclk_mhz * 1000; + out_clks.dispclk_khz = 0; /* No requirement, and lowest index will generally be maximum dispclk. */ out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000; out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000; out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 023325e8f6e2..0f944fcfd5a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -236,6 +236,7 @@ struct dml2_configuration_options { bool use_clock_dc_limits; bool gpuvm_enable; + bool force_tdlut_enable; struct dml2_soc_bb *bb_from_dmub; }; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c index f2a2d53e9689..f8f6019d8304 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c @@ -684,9 +684,6 @@ void dpp1_set_degamma( BREAK_TO_DEBUGGER(); break; } - - REG_SEQ_SUBMIT(); - REG_SEQ_WAIT_DONE(); } void dpp1_degamma_ram_select( diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c index e16274fee31d..8473c694bfdc 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c @@ -59,6 +59,31 @@ void dpp35_dppclk_control( DISPCLK_R_GATE_DISABLE, 0); } +void dpp35_program_bias_and_scale_fcnv( + struct dpp *dpp_base, + struct dc_bias_and_scale *params) +{ + struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); + + if (!params->bias_and_scale_valid) { + REG_SET(FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, 0); + REG_SET(FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, 0); + REG_SET(FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, 0); + + REG_SET(FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, 0x1F000); + REG_SET(FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, 0x1F000); + REG_SET(FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, 0x1F000); + } else { + REG_SET(FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, params->bias_red); + REG_SET(FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, params->bias_green); + REG_SET(FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, params->bias_blue); + + REG_SET(FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, params->scale_red); + REG_SET(FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, params->scale_green); + REG_SET(FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, params->scale_blue); + } +} + static struct dpp_funcs dcn35_dpp_funcs = { .dpp_program_gamcor_lut = dpp3_program_gamcor_lut, .dpp_read_state = dpp30_read_state, @@ -81,7 +106,7 @@ static struct dpp_funcs dcn35_dpp_funcs = { .dpp_program_shaper_lut = NULL, // CM SHAPER block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) .dpp_program_3dlut = NULL, // CM 3DLUT block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) - .dpp_program_bias_and_scale = NULL, + .dpp_program_bias_and_scale = dpp35_program_bias_and_scale_fcnv, .dpp_cnv_set_alpha_keyer = dpp2_cnv_set_alpha_keyer, .set_cursor_attributes = dpp3_set_cursor_attributes, .set_cursor_position = dpp1_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h index 135872d88219..3ca339a16e5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h @@ -61,4 +61,7 @@ bool dpp35_construct(struct dcn3_dpp *dpp3, struct dc_context *ctx, void dpp35_set_fgcg(struct dcn3_dpp *dpp, bool enable); +void dpp35_program_bias_and_scale_fcnv(struct dpp *dpp_base, + struct dc_bias_and_scale *bias_and_scale); + #endif // __DCN35_DPP_H diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c index 7cae18fd7be9..97bf26fa3573 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c @@ -30,6 +30,7 @@ #include "basics/conversion.h" #include "dcn30/dcn30_cm_common.h" #include "dcn32/dcn32_dpp.h" +#include "dcn35/dcn35_dpp.h" #define REG(reg)\ dpp->tf_regs->reg @@ -240,7 +241,7 @@ static struct dpp_funcs dcn401_dpp_funcs = { .dpp_program_shaper_lut = NULL, // CM SHAPER block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) .dpp_program_3dlut = NULL, // CM 3DLUT block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) - .dpp_program_bias_and_scale = NULL, + .dpp_program_bias_and_scale = dpp35_program_bias_and_scale_fcnv, .dpp_cnv_set_alpha_keyer = dpp2_cnv_set_alpha_keyer, .set_cursor_attributes = dpp401_set_cursor_attributes, .set_cursor_position = dpp401_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index d0f8c9ff5232..3b6ca7974e18 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -120,11 +120,10 @@ void dpp401_set_cursor_attributes( enum dc_cursor_color_format color_format = cursor_attributes->color_format; int cur_rom_en = 0; + // DCN4 should always do Cursor degamma for Cursor Color modes if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { - cur_rom_en = 1; - } + cur_rom_en = 1; } REG_UPDATE_3(CURSOR0_CONTROL, @@ -246,16 +245,6 @@ void dpp401_set_cursor_matrix( enum dc_color_space color_space, struct dc_csc_transform cursor_csc_color_matrix) { - struct dpp_input_csc_matrix cursor_tbl_entry; - unsigned int i; - - if (cursor_csc_color_matrix.enable_adjustment == true) { - for (i = 0; i < 12; i++) - cursor_tbl_entry.regval[i] = cursor_csc_color_matrix.matrix[i]; - - cursor_tbl_entry.color_space = color_space; - dpp401_program_cursor_csc(dpp_base, color_space, &cursor_tbl_entry); - } else { - dpp401_program_cursor_csc(dpp_base, color_space, NULL); - } + //Since we don't have cursor matrix information, force bypass mode by passing in unknown color space + dpp401_program_cursor_csc(dpp_base, COLOR_SPACE_UNKNOWN, NULL); } diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c index 6acb6699f146..61678b0a5a1e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c @@ -27,7 +27,7 @@ static void dsc401_disconnect(struct display_stream_compressor *dsc); static void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc); static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); -const struct dsc_funcs dcn401_dsc_funcs = { +static const struct dsc_funcs dcn401_dsc_funcs = { .dsc_get_enc_caps = dsc401_get_enc_caps, .dsc_read_state = dsc401_read_state, .dsc_validate_stream = dsc401_validate_stream, diff --git a/drivers/gpu/drm/amd/display/dc/dwb/Makefile b/drivers/gpu/drm/amd/display/dc/dwb/Makefile index 16f7a454fed9..3952ba4cd508 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dwb/Makefile @@ -24,6 +24,15 @@ # ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN30 +############################################################################### +DWB_DCN30 = dcn30_dwb.o dcn30_dwb_cm.o + +AMD_DAL_DWB_DCN30 = $(addprefix $(AMDDALPATH)/dc/dwb/dcn30/,$(DWB_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DWB_DCN30) + ############################################################################### # DCN35 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c index b23a809999ed..d5e8294f5a16 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c @@ -21,7 +21,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ - #include "reg_helper.h" #include "dcn35_dwb.h" diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c index 46415cab23ab..928abca18a18 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c @@ -86,7 +86,13 @@ static const struct ddc_registers ddc_data_regs_dcn[] = { ddc_data_regs_dcn2(2), ddc_data_regs_dcn2(3), ddc_data_regs_dcn2(4), -// ddc_data_regs_dcn2(5), + { + // add a dummy entry for cases no such port + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + }, { // add a dummy entry for cases no such port {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, @@ -107,7 +113,13 @@ static const struct ddc_registers ddc_clk_regs_dcn[] = { ddc_clk_regs_dcn2(2), ddc_clk_regs_dcn2(3), ddc_clk_regs_dcn2(4), -// ddc_clk_regs_dcn2(5), + { + // add a dummy entry for cases no such port + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + }, { // add a dummy entry for cases no such port {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, diff --git a/drivers/gpu/drm/amd/display/dc/hpo/Makefile b/drivers/gpu/drm/amd/display/dc/hpo/Makefile index c248bd86b477..7f2c9ee0dff1 100644 --- a/drivers/gpu/drm/amd/display/dc/hpo/Makefile +++ b/drivers/gpu/drm/amd/display/dc/hpo/Makefile @@ -25,6 +25,21 @@ ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### +# DCN30 +############################################################################### + +AMD_DAL_HPO_DCN30 = $(addprefix $(AMDDALPATH)/dc/hpo/dcn30/,$(HPO_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HPO_DCN30) +############################################################################### +# DCN31 +############################################################################### +HPO_DCN31 = dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o + +AMD_DAL_HPO_DCN31 = $(addprefix $(AMDDALPATH)/dc/hpo/dcn31/,$(HPO_DCN31)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HPO_DCN31) +############################################################################### # DCN32 ############################################################################### HPO_DCN32 = dcn32_hpo_dp_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index 181041d6d177..37d26fa0b6fb 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -75,108 +75,108 @@ bool hubbub401_program_urgent_watermarks( /* Repeat for water mark set A and B */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.urgent > hubbub2->watermarks.dcn4.a.urgent) { - hubbub2->watermarks.dcn4.a.urgent = watermarks->dcn4.a.urgent; + if (safe_to_lower || watermarks->dcn4x.a.urgent > hubbub2->watermarks.dcn4x.a.urgent) { + hubbub2->watermarks.dcn4x.a.urgent = watermarks->dcn4x.a.urgent; REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0, - DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, watermarks->dcn4.a.urgent); + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, watermarks->dcn4x.a.urgent); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.urgent, watermarks->dcn4.a.urgent); - } else if (watermarks->dcn4.a.urgent < hubbub2->watermarks.dcn4.a.urgent) + watermarks->dcn4x.a.urgent, watermarks->dcn4x.a.urgent); + } else if (watermarks->dcn4x.a.urgent < hubbub2->watermarks.dcn4x.a.urgent) wm_pending = true; /* determine the transfer time for a quantity of data for a particular requestor.*/ - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_flip - > hubbub2->watermarks.dcn4.a.frac_urg_bw_flip) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_flip = watermarks->dcn4.a.frac_urg_bw_flip; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_flip + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip = watermarks->dcn4x.a.frac_urg_bw_flip; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, watermarks->dcn4.a.frac_urg_bw_flip); - } else if (watermarks->dcn4.a.frac_urg_bw_flip - < hubbub2->watermarks.dcn4.a.frac_urg_bw_flip) + DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, watermarks->dcn4x.a.frac_urg_bw_flip); + } else if (watermarks->dcn4x.a.frac_urg_bw_flip + < hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_nom - > hubbub2->watermarks.dcn4.a.frac_urg_bw_nom) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_nom = watermarks->dcn4.a.frac_urg_bw_nom; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_nom + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom = watermarks->dcn4x.a.frac_urg_bw_nom; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->dcn4.a.frac_urg_bw_nom); - } else if (watermarks->dcn4.a.frac_urg_bw_nom - < hubbub2->watermarks.dcn4.a.frac_urg_bw_nom) + DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->dcn4x.a.frac_urg_bw_nom); + } else if (watermarks->dcn4x.a.frac_urg_bw_nom + < hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_mall - > hubbub2->watermarks.dcn4.a.frac_urg_bw_mall) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_mall = watermarks->dcn4.a.frac_urg_bw_mall; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_mall + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall = watermarks->dcn4x.a.frac_urg_bw_mall; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, watermarks->dcn4.a.frac_urg_bw_mall); - } else if (watermarks->dcn4.a.frac_urg_bw_mall < hubbub2->watermarks.dcn4.a.frac_urg_bw_mall) + DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, watermarks->dcn4x.a.frac_urg_bw_mall); + } else if (watermarks->dcn4x.a.frac_urg_bw_mall < hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem) { - hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem = watermarks->dcn4.a.refcyc_per_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.a.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem) { + hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem = watermarks->dcn4x.a.refcyc_per_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, watermarks->dcn4.a.refcyc_per_trip_to_mem); - } else if (watermarks->dcn4.a.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, watermarks->dcn4x.a.refcyc_per_trip_to_mem); + } else if (watermarks->dcn4x.a.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem) { - hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem = watermarks->dcn4.a.refcyc_per_meta_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem) { + hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem = watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, 0, - DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, watermarks->dcn4.a.refcyc_per_meta_trip_to_mem); - } else if (watermarks->dcn4.a.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem); + } else if (watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.urgent > hubbub2->watermarks.dcn4.b.urgent) { - hubbub2->watermarks.dcn4.b.urgent = watermarks->dcn4.b.urgent; + if (safe_to_lower || watermarks->dcn4x.b.urgent > hubbub2->watermarks.dcn4x.b.urgent) { + hubbub2->watermarks.dcn4x.b.urgent = watermarks->dcn4x.b.urgent; REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, 0, - DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, watermarks->dcn4.b.urgent); + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, watermarks->dcn4x.b.urgent); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.urgent, watermarks->dcn4.b.urgent); - } else if (watermarks->dcn4.b.urgent < hubbub2->watermarks.dcn4.b.urgent) + watermarks->dcn4x.b.urgent, watermarks->dcn4x.b.urgent); + } else if (watermarks->dcn4x.b.urgent < hubbub2->watermarks.dcn4x.b.urgent) wm_pending = true; /* determine the transfer time for a quantity of data for a particular requestor.*/ - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_flip - > hubbub2->watermarks.dcn4.b.frac_urg_bw_flip) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_flip = watermarks->dcn4.b.frac_urg_bw_flip; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_flip + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip = watermarks->dcn4x.b.frac_urg_bw_flip; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, watermarks->dcn4.b.frac_urg_bw_flip); - } else if (watermarks->dcn4.b.frac_urg_bw_flip - < hubbub2->watermarks.dcn4.b.frac_urg_bw_flip) + DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, watermarks->dcn4x.b.frac_urg_bw_flip); + } else if (watermarks->dcn4x.b.frac_urg_bw_flip + < hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_nom - > hubbub2->watermarks.dcn4.b.frac_urg_bw_nom) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_nom = watermarks->dcn4.b.frac_urg_bw_nom; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_nom + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom = watermarks->dcn4x.b.frac_urg_bw_nom; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->dcn4.b.frac_urg_bw_nom); - } else if (watermarks->dcn4.b.frac_urg_bw_nom - < hubbub2->watermarks.dcn4.b.frac_urg_bw_nom) + DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->dcn4x.b.frac_urg_bw_nom); + } else if (watermarks->dcn4x.b.frac_urg_bw_nom + < hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_mall - > hubbub2->watermarks.dcn4.b.frac_urg_bw_mall) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_mall = watermarks->dcn4.b.frac_urg_bw_mall; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_mall + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall = watermarks->dcn4x.b.frac_urg_bw_mall; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, watermarks->dcn4.b.frac_urg_bw_mall); - } else if (watermarks->dcn4.b.frac_urg_bw_mall < hubbub2->watermarks.dcn4.b.frac_urg_bw_mall) + DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, watermarks->dcn4x.b.frac_urg_bw_mall); + } else if (watermarks->dcn4x.b.frac_urg_bw_mall < hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem) { - hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem = watermarks->dcn4.b.refcyc_per_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.b.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem) { + hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem = watermarks->dcn4x.b.refcyc_per_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, watermarks->dcn4.b.refcyc_per_trip_to_mem); - } else if (watermarks->dcn4.b.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, watermarks->dcn4x.b.refcyc_per_trip_to_mem); + } else if (watermarks->dcn4x.b.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem) { - hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem = watermarks->dcn4.b.refcyc_per_meta_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem) { + hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem = watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, 0, - DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, watermarks->dcn4.b.refcyc_per_meta_trip_to_mem); - } else if (watermarks->dcn4.b.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem); + } else if (watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem) wm_pending = true; return wm_pending; @@ -192,89 +192,89 @@ bool hubbub401_program_stutter_watermarks( bool wm_pending = false; /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.sr_enter - > hubbub2->watermarks.dcn4.a.sr_enter) { - hubbub2->watermarks.dcn4.a.sr_enter = - watermarks->dcn4.a.sr_enter; + if (safe_to_lower || watermarks->dcn4x.a.sr_enter + > hubbub2->watermarks.dcn4x.a.sr_enter) { + hubbub2->watermarks.dcn4x.a.sr_enter = + watermarks->dcn4x.a.sr_enter; REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, watermarks->dcn4x.a.sr_enter); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.sr_enter, watermarks->dcn4.a.sr_enter); + watermarks->dcn4x.a.sr_enter, watermarks->dcn4x.a.sr_enter); // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, watermarks->dcn4x.a.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, watermarks->dcn4x.a.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, watermarks->dcn4x.a.sr_enter); - } else if (watermarks->dcn4.a.sr_enter - < hubbub2->watermarks.dcn4.a.sr_enter) + } else if (watermarks->dcn4x.a.sr_enter + < hubbub2->watermarks.dcn4x.a.sr_enter) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.sr_exit - > hubbub2->watermarks.dcn4.a.sr_exit) { - hubbub2->watermarks.dcn4.a.sr_exit = - watermarks->dcn4.a.sr_exit; + if (safe_to_lower || watermarks->dcn4x.a.sr_exit + > hubbub2->watermarks.dcn4x.a.sr_exit) { + hubbub2->watermarks.dcn4x.a.sr_exit = + watermarks->dcn4x.a.sr_exit; REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, watermarks->dcn4x.a.sr_exit); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.sr_exit, watermarks->dcn4.a.sr_exit); + watermarks->dcn4x.a.sr_exit, watermarks->dcn4x.a.sr_exit); // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, watermarks->dcn4x.a.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, watermarks->dcn4x.a.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, watermarks->dcn4x.a.sr_exit); - } else if (watermarks->dcn4.a.sr_exit - < hubbub2->watermarks.dcn4.a.sr_exit) + } else if (watermarks->dcn4x.a.sr_exit + < hubbub2->watermarks.dcn4x.a.sr_exit) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.sr_enter - > hubbub2->watermarks.dcn4.b.sr_enter) { - hubbub2->watermarks.dcn4.b.sr_enter = - watermarks->dcn4.b.sr_enter; + if (safe_to_lower || watermarks->dcn4x.b.sr_enter + > hubbub2->watermarks.dcn4x.b.sr_enter) { + hubbub2->watermarks.dcn4x.b.sr_enter = + watermarks->dcn4x.b.sr_enter; REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, watermarks->dcn4x.b.sr_enter); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.sr_enter, watermarks->dcn4.b.sr_enter); + watermarks->dcn4x.b.sr_enter, watermarks->dcn4x.b.sr_enter); // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, watermarks->dcn4x.b.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, watermarks->dcn4x.b.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, watermarks->dcn4x.b.sr_enter); - } else if (watermarks->dcn4.b.sr_enter - < hubbub2->watermarks.dcn4.b.sr_enter) + } else if (watermarks->dcn4x.b.sr_enter + < hubbub2->watermarks.dcn4x.b.sr_enter) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.sr_exit - > hubbub2->watermarks.dcn4.b.sr_exit) { - hubbub2->watermarks.dcn4.b.sr_exit = - watermarks->dcn4.b.sr_exit; + if (safe_to_lower || watermarks->dcn4x.b.sr_exit + > hubbub2->watermarks.dcn4x.b.sr_exit) { + hubbub2->watermarks.dcn4x.b.sr_exit = + watermarks->dcn4x.b.sr_exit; REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, watermarks->dcn4x.b.sr_exit); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.sr_exit, watermarks->dcn4.b.sr_exit); + watermarks->dcn4x.b.sr_exit, watermarks->dcn4x.b.sr_exit); // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, watermarks->dcn4x.b.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, watermarks->dcn4x.b.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, watermarks->dcn4x.b.sr_exit); - } else if (watermarks->dcn4.b.sr_exit - < hubbub2->watermarks.dcn4.b.sr_exit) + } else if (watermarks->dcn4x.b.sr_exit + < hubbub2->watermarks.dcn4x.b.sr_exit) wm_pending = true; return wm_pending; @@ -292,116 +292,116 @@ bool hubbub401_program_pstate_watermarks( /* Section for UCLK_PSTATE_CHANGE_WATERMARKS */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.uclk_pstate - > hubbub2->watermarks.dcn4.a.uclk_pstate) { - hubbub2->watermarks.dcn4.a.uclk_pstate = - watermarks->dcn4.a.uclk_pstate; + if (safe_to_lower || watermarks->dcn4x.a.uclk_pstate + > hubbub2->watermarks.dcn4x.a.uclk_pstate) { + hubbub2->watermarks.dcn4x.a.uclk_pstate = + watermarks->dcn4x.a.uclk_pstate; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4.a.uclk_pstate); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4x.a.uclk_pstate); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.uclk_pstate, watermarks->dcn4.a.uclk_pstate); - } else if (watermarks->dcn4.a.uclk_pstate - < hubbub2->watermarks.dcn4.a.uclk_pstate) + watermarks->dcn4x.a.uclk_pstate, watermarks->dcn4x.a.uclk_pstate); + } else if (watermarks->dcn4x.a.uclk_pstate + < hubbub2->watermarks.dcn4x.a.uclk_pstate) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.uclk_pstate - > hubbub2->watermarks.dcn4.b.uclk_pstate) { - hubbub2->watermarks.dcn4.b.uclk_pstate = - watermarks->dcn4.b.uclk_pstate; + if (safe_to_lower || watermarks->dcn4x.b.uclk_pstate + > hubbub2->watermarks.dcn4x.b.uclk_pstate) { + hubbub2->watermarks.dcn4x.b.uclk_pstate = + watermarks->dcn4x.b.uclk_pstate; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4.b.uclk_pstate); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4x.b.uclk_pstate); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.uclk_pstate, watermarks->dcn4.b.uclk_pstate); - } else if (watermarks->dcn4.b.uclk_pstate - < hubbub2->watermarks.dcn4.b.uclk_pstate) + watermarks->dcn4x.b.uclk_pstate, watermarks->dcn4x.b.uclk_pstate); + } else if (watermarks->dcn4x.b.uclk_pstate + < hubbub2->watermarks.dcn4x.b.uclk_pstate) wm_pending = true; /* Section for UCLK_PSTATE_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */ - if (safe_to_lower || watermarks->dcn4.a.temp_read_or_ppt - > hubbub2->watermarks.dcn4.a.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.a.temp_read_or_ppt = - watermarks->dcn4.a.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.a.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.a.temp_read_or_ppt = + watermarks->dcn4x.a.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4.a.temp_read_or_ppt); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4x.a.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.temp_read_or_ppt, watermarks->dcn4.a.temp_read_or_ppt); - } else if (watermarks->dcn4.a.temp_read_or_ppt - < hubbub2->watermarks.dcn4.a.temp_read_or_ppt) + watermarks->dcn4x.a.temp_read_or_ppt, watermarks->dcn4x.a.temp_read_or_ppt); + } else if (watermarks->dcn4x.a.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.temp_read_or_ppt - > hubbub2->watermarks.dcn4.b.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.b.temp_read_or_ppt = - watermarks->dcn4.b.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.b.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.b.temp_read_or_ppt = + watermarks->dcn4x.b.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4.b.temp_read_or_ppt); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4x.b.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.temp_read_or_ppt, watermarks->dcn4.b.temp_read_or_ppt); - } else if (watermarks->dcn4.b.temp_read_or_ppt - < hubbub2->watermarks.dcn4.b.temp_read_or_ppt) + watermarks->dcn4x.b.temp_read_or_ppt, watermarks->dcn4x.b.temp_read_or_ppt); + } else if (watermarks->dcn4x.b.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) wm_pending = true; /* Section for FCLK_PSTATE_CHANGE_WATERMARKS */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.fclk_pstate - > hubbub2->watermarks.dcn4.a.fclk_pstate) { - hubbub2->watermarks.dcn4.a.fclk_pstate = - watermarks->dcn4.a.fclk_pstate; + if (safe_to_lower || watermarks->dcn4x.a.fclk_pstate + > hubbub2->watermarks.dcn4x.a.fclk_pstate) { + hubbub2->watermarks.dcn4x.a.fclk_pstate = + watermarks->dcn4x.a.fclk_pstate; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4.a.fclk_pstate); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4x.a.fclk_pstate); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.fclk_pstate, watermarks->dcn4.a.fclk_pstate); - } else if (watermarks->dcn4.a.fclk_pstate - < hubbub2->watermarks.dcn4.a.fclk_pstate) + watermarks->dcn4x.a.fclk_pstate, watermarks->dcn4x.a.fclk_pstate); + } else if (watermarks->dcn4x.a.fclk_pstate + < hubbub2->watermarks.dcn4x.a.fclk_pstate) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.fclk_pstate - > hubbub2->watermarks.dcn4.b.fclk_pstate) { - hubbub2->watermarks.dcn4.b.fclk_pstate = - watermarks->dcn4.b.fclk_pstate; + if (safe_to_lower || watermarks->dcn4x.b.fclk_pstate + > hubbub2->watermarks.dcn4x.b.fclk_pstate) { + hubbub2->watermarks.dcn4x.b.fclk_pstate = + watermarks->dcn4x.b.fclk_pstate; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4.b.fclk_pstate); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4x.b.fclk_pstate); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.fclk_pstate, watermarks->dcn4.b.fclk_pstate); - } else if (watermarks->dcn4.b.fclk_pstate - < hubbub2->watermarks.dcn4.b.fclk_pstate) + watermarks->dcn4x.b.fclk_pstate, watermarks->dcn4x.b.fclk_pstate); + } else if (watermarks->dcn4x.b.fclk_pstate + < hubbub2->watermarks.dcn4x.b.fclk_pstate) wm_pending = true; /* Section for FCLK_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */ - if (safe_to_lower || watermarks->dcn4.a.temp_read_or_ppt - > hubbub2->watermarks.dcn4.a.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.a.temp_read_or_ppt = - watermarks->dcn4.a.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.a.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.a.temp_read_or_ppt = + watermarks->dcn4x.a.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4.a.temp_read_or_ppt); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4x.a.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.temp_read_or_ppt, watermarks->dcn4.a.temp_read_or_ppt); - } else if (watermarks->dcn4.a.temp_read_or_ppt - < hubbub2->watermarks.dcn4.a.temp_read_or_ppt) + watermarks->dcn4x.a.temp_read_or_ppt, watermarks->dcn4x.a.temp_read_or_ppt); + } else if (watermarks->dcn4x.a.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.temp_read_or_ppt - > hubbub2->watermarks.dcn4.b.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.b.temp_read_or_ppt = - watermarks->dcn4.b.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.b.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.b.temp_read_or_ppt = + watermarks->dcn4x.b.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4.b.temp_read_or_ppt); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4x.b.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.temp_read_or_ppt, watermarks->dcn4.b.temp_read_or_ppt); - } else if (watermarks->dcn4.b.temp_read_or_ppt - < hubbub2->watermarks.dcn4.b.temp_read_or_ppt) + watermarks->dcn4x.b.temp_read_or_ppt, watermarks->dcn4x.b.temp_read_or_ppt); + } else if (watermarks->dcn4x.b.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) wm_pending = true; return wm_pending; @@ -418,29 +418,29 @@ bool hubbub401_program_usr_watermarks( bool wm_pending = false; /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.usr - > hubbub2->watermarks.dcn4.a.usr) { - hubbub2->watermarks.dcn4.a.usr = watermarks->dcn4.a.usr; + if (safe_to_lower || watermarks->dcn4x.a.usr + > hubbub2->watermarks.dcn4x.a.usr) { + hubbub2->watermarks.dcn4x.a.usr = watermarks->dcn4x.a.usr; REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, 0, - DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, watermarks->dcn4.a.usr); + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, watermarks->dcn4x.a.usr); DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.usr, watermarks->dcn4.a.usr); - } else if (watermarks->dcn4.a.usr - < hubbub2->watermarks.dcn4.a.usr) + watermarks->dcn4x.a.usr, watermarks->dcn4x.a.usr); + } else if (watermarks->dcn4x.a.usr + < hubbub2->watermarks.dcn4x.a.usr) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.usr - > hubbub2->watermarks.dcn4.b.usr) { - hubbub2->watermarks.dcn4.b.usr = watermarks->dcn4.b.usr; + if (safe_to_lower || watermarks->dcn4x.b.usr + > hubbub2->watermarks.dcn4x.b.usr) { + hubbub2->watermarks.dcn4x.b.usr = watermarks->dcn4x.b.usr; REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, 0, - DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, watermarks->dcn4.b.usr); + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, watermarks->dcn4x.b.usr); DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.usr, watermarks->dcn4.b.usr); - } else if (watermarks->dcn4.b.usr - < hubbub2->watermarks.dcn4.b.usr) + watermarks->dcn4x.b.usr, watermarks->dcn4x.b.usr); + } else if (watermarks->dcn4x.b.usr + < hubbub2->watermarks.dcn4x.b.usr) wm_pending = true; return wm_pending; @@ -1170,6 +1170,28 @@ static void dcn401_program_compbuf_segments(struct hubbub *hubbub, unsigned comp } } +static void dcn401_wait_for_det_update(struct hubbub *hubbub, int hubp_inst) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + switch (hubp_inst) { + case 0: + REG_WAIT(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, hubbub2->det0_size, 1, 100000); /* 1 vupdate at 10hz */ + break; + case 1: + REG_WAIT(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, hubbub2->det1_size, 1, 100000); + break; + case 2: + REG_WAIT(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, hubbub2->det2_size, 1, 100000); + break; + case 3: + REG_WAIT(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, hubbub2->det3_size, 1, 100000); + break; + default: + break; + } +} + static const struct hubbub_funcs hubbub4_01_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx, @@ -1192,6 +1214,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .set_request_limit = hubbub32_set_request_limit, .program_det_segments = dcn401_program_det_segments, .program_compbuf_segments = dcn401_program_compbuf_segments, + .wait_for_det_update = dcn401_wait_for_det_update, }; void hubbub401_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c index bf399819ca80..22ac2b7e49ae 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c @@ -749,7 +749,8 @@ bool hubp1_is_flip_pending(struct hubp *hubp) if (flip_pending) return true; - if (earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) + if (hubp && + earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) return true; return false; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c index 6bba020ad6fb..0637e4c552d8 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c @@ -927,7 +927,8 @@ bool hubp2_is_flip_pending(struct hubp *hubp) if (flip_pending) return true; - if (earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) + if (hubp && + earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) return true; return false; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index 771fcd0d3b99..d1f05b82b3dd 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -188,7 +188,7 @@ void hubp35_program_surface_config( hubp35_program_pixel_format(hubp, format); } -struct hubp_funcs dcn35_hubp_funcs = { +static struct hubp_funcs dcn35_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, .hubp_program_surface_flip_and_addr = hubp3_program_surface_flip_and_addr, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index eb0da6c6b87c..b1ebf5053b4f 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -725,8 +725,8 @@ void hubp401_cursor_set_position( CURSOR_ENABLE, cur_en); REG_SET_2(CURSOR_POSITION, 0, - CURSOR_X_POSITION, pos->x, - CURSOR_Y_POSITION, pos->y); + CURSOR_X_POSITION, x_pos, + CURSOR_Y_POSITION, y_pos); REG_SET_2(CURSOR_HOT_SPOT, 0, CURSOR_HOT_SPOT_X, pos->x_hotspot, @@ -990,7 +990,6 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_soft_reset = hubp31_soft_reset, .hubp_set_flip_int = hubp401_set_flip_int, .hubp_in_blank = hubp401_in_blank, - .hubp_update_force_pstate_disallow = hubp32_update_force_pstate_disallow, .phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable, .hubp_update_mall_sel = hubp401_update_mall_sel, .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 1f2eb2f727dc..d52ce58c6a98 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -949,7 +949,7 @@ void dce110_edp_backlight_control( { struct dc_context *ctx = link->ctx; struct bp_transmitter_control cntl = { 0 }; - uint8_t pwrseq_instance; + uint8_t pwrseq_instance = 0; unsigned int pre_T11_delay = OLED_PRE_T11_DELAY; unsigned int post_T7_delay = OLED_POST_T7_DELAY; @@ -1002,7 +1002,8 @@ void dce110_edp_backlight_control( */ /* dc_service_sleep_in_milliseconds(50); */ /*edp 1.2*/ - pwrseq_instance = link->panel_cntl->pwrseq_inst; + if (link->panel_cntl) + pwrseq_instance = link->panel_cntl->pwrseq_inst; if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) { if (!link->dc->config.edp_no_power_sequencing) @@ -1231,20 +1232,21 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) * has changed or they enter protection state and hang. */ msleep(60); - } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) { - if (!link->dc->config.edp_no_power_sequencing) { - /* - * Sometimes, DP receiver chip power-controlled externally by an - * Embedded Controller could be treated and used as eDP, - * if it drives mobile display. In this case, - * we shouldn't be doing power-sequencing, hence we can skip - * waiting for T9-ready. - */ - link->dc->link_srv->edp_receiver_ready_T9(link); - } } } + if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + !link->dc->config.edp_no_power_sequencing) { + /* + * Sometimes, DP receiver chip power-controlled externally by an + * Embedded Controller could be treated and used as eDP, + * if it drives mobile display. In this case, + * we shouldn't be doing power-sequencing, hence we can skip + * waiting for T9-ready. + */ + link->dc->link_srv->edp_receiver_ready_T9(link); + } + } @@ -1549,6 +1551,7 @@ static enum dc_status dce110_enable_stream_timing( 0, 0, 0, + 0, pipe_ctx->stream->signal, true); } @@ -1597,6 +1600,11 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( &audio_output.crtc_info, &pipe_ctx->stream->audio_info, &audio_output.dp_link_info); + + if (dc->config.disable_hbr_audio_dp2) + if (pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio && + dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) + pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio(pipe_ctx->stream_res.audio); } /* make sure no pipes syncd to the pipe being enabled */ @@ -1838,6 +1846,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) bool can_apply_edp_fast_boot = false; bool can_apply_seamless_boot = false; bool keep_edp_vdd_on = false; + struct dc_bios *dcb = dc->ctx->dc_bios; DC_LOGGER_INIT(); @@ -1914,13 +1923,15 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) hws->funcs.edp_backlight_control(edp_link_with_sink, false); } /*resume from S3, no vbios posting, no need to power down again*/ - clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); + if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb)) + clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); power_down_all_hw_blocks(dc); disable_vga_and_power_gate_all_controllers(dc); if (edp_link_with_sink && !keep_edp_vdd_on) dc->hwss.edp_power_control(edp_link_with_sink, false); - clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); + if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb)) + clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); } bios_set_scratch_acc_mode_change(dc->ctx->dc_bios, 1); } @@ -2340,19 +2351,6 @@ static void dce110_setup_audio_dto( } } -static bool dce110_is_hpo_enabled(struct dc_state *context) -{ - int i; - - for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { - if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) { - return true; - } - } - - return false; -} - enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, struct dc_state *context) @@ -2361,8 +2359,8 @@ enum dc_status dce110_apply_ctx_to_hw( struct dc_bios *dcb = dc->ctx->dc_bios; enum dc_status status; int i; - bool was_hpo_enabled = dce110_is_hpo_enabled(dc->current_state); - bool is_hpo_enabled = dce110_is_hpo_enabled(context); + bool was_hpo_acquired = resource_is_hpo_acquired(dc->current_state); + bool is_hpo_acquired = resource_is_hpo_acquired(context); /* reset syncd pipes from disabled pipes */ if (dc->config.use_pipe_ctx_sync_logic) @@ -2405,8 +2403,8 @@ enum dc_status dce110_apply_ctx_to_hw( dce110_setup_audio_dto(dc, context); - if (dc->hwseq->funcs.setup_hpo_hw_control && was_hpo_enabled != is_hpo_enabled) { - dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, is_hpo_enabled); + if (dc->hwseq->funcs.setup_hpo_hw_control && was_hpo_acquired != is_hpo_acquired) { + dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, is_hpo_acquired); } for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -2438,7 +2436,7 @@ enum dc_status dce110_apply_ctx_to_hw( #ifdef CONFIG_DRM_AMD_DC_FP if (hws->funcs.resync_fifo_dccg_dio) - hws->funcs.resync_fifo_dccg_dio(hws, dc, context); + hws->funcs.resync_fifo_dccg_dio(hws, dc, context, i); #endif } @@ -3312,7 +3310,6 @@ static const struct hw_sequencer_funcs dce110_funcs = { static const struct hwseq_private_funcs dce110_private_funcs = { .init_pipes = init_pipes, - .update_plane_addr = update_plane_addr, .set_input_transfer_func = dce110_set_input_transfer_func, .set_output_transfer_func = dce110_set_output_transfer_func, .power_down = dce110_power_down, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 14a902ff3b8a..01dffed4d30b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1005,6 +1005,7 @@ enum dc_status dcn10_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); @@ -1554,7 +1555,7 @@ void dcn10_init_hw(struct dc *dc) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); /* Align bw context with hw config when system resume. */ - if (dc->clk_mgr->clks.dispclk_khz != 0 && dc->clk_mgr->clks.dppclk_khz != 0) { + if (dc->clk_mgr && dc->clk_mgr->clks.dispclk_khz != 0 && dc->clk_mgr->clks.dppclk_khz != 0) { dc->current_state->bw_ctx.bw.dcn.clk.dispclk_khz = dc->clk_mgr->clks.dispclk_khz; dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz = dc->clk_mgr->clks.dppclk_khz; } @@ -1674,7 +1675,7 @@ void dcn10_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); } @@ -1697,10 +1698,10 @@ void dcn10_power_down_on_boot(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwseq->funcs.edp_backlight_control && - dc->hwss.power_down && + dc->hwseq->funcs.power_down && dc->hwss.edp_power_control) { dc->hwseq->funcs.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -1708,8 +1709,8 @@ void dcn10_power_down_on_boot(struct dc *dc) if (link->link_enc && link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down) { + dc->hwseq->funcs.power_down(dc); break; } @@ -2585,8 +2586,11 @@ static bool dcn10_is_rear_mpo_fix_required(struct pipe_ctx *pipe_ctx, enum dc_co while (top->top_pipe) top = top->top_pipe; // Traverse to top pipe_ctx - if (top->plane_state && top->plane_state->layer_index == 0) - return true; // Front MPO plane not hidden + if (top->plane_state && top->plane_state->layer_index == 0 && !top->plane_state->global_alpha) + // Global alpha used by top plane for PIP overlay + // Pre-multiplied/per-pixel alpha used by MPO + // Check top plane's global alpha to ensure layer_index > 0 not caused by PIP + return true; // MPO in use and front plane not hidden } } return false; @@ -2914,7 +2918,7 @@ static void dcn10_update_dchubp_dpp( hubp->power_gated = false; - hws->funcs.update_plane_addr(dc, pipe_ctx); + dc->hwss.update_plane_addr(dc, pipe_ctx); if (is_pipe_tree_visible(pipe_ctx)) hubp->funcs->set_blank(hubp, false); @@ -2997,7 +3001,8 @@ void dcn10_program_pipe( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); pipe_ctx->stream_res.tg->funcs->set_vtg_params( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c index a5bdac79a744..5e51e1761707 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c @@ -78,7 +78,6 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .get_clock = dcn10_get_clock, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dce110_set_backlight_level, .set_abm_immediate_disable = dce110_set_abm_immediate_disable, .set_pipe = dce110_set_pipe, @@ -92,7 +91,6 @@ static const struct hw_sequencer_funcs dcn10_funcs = { static const struct hwseq_private_funcs dcn10_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn10_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .program_pipe = dcn10_program_pipe, .update_mpcc = dcn10_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 2532ad410cb5..a80c08582932 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -909,6 +909,7 @@ enum dc_status dcn20_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); @@ -1044,7 +1045,8 @@ bool dcn20_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, /* * if above if is not executed then 'params' equal to 0 and set in bypass */ - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); return true; } @@ -1698,7 +1700,7 @@ static void dcn20_update_dchubp_dpp( plane_state->update_flags.bits.input_csc_change || plane_state->update_flags.bits.color_space_change || plane_state->update_flags.bits.coeff_reduction_change) { - struct dc_bias_and_scale bns_params = {0}; + struct dc_bias_and_scale bns_params = plane_state->bias_and_scale; // program the input csc dpp->funcs->dpp_setup(dpp, @@ -1715,7 +1717,6 @@ static void dcn20_update_dchubp_dpp( } if (dpp->funcs->dpp_program_bias_and_scale) { //TODO :for CNVC set scale and bias registers if necessary - build_prescale_params(&bns_params, plane_state); dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); } } @@ -1825,7 +1826,7 @@ static void dcn20_update_dchubp_dpp( params.subvp_save_surf_addr.subvp_index = pipe_ctx->subvp_index; hwss_subvp_save_surf_addr(¶ms); } - hws->funcs.update_plane_addr(dc, pipe_ctx); + dc->hwss.update_plane_addr(dc, pipe_ctx); } if (pipe_ctx->update_flags.bits.enable) @@ -1886,7 +1887,8 @@ static void dcn20_program_pipe( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); @@ -1921,22 +1923,28 @@ static void dcn20_program_pipe( dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); } - if (pipe_ctx->update_flags.raw || pipe_ctx->plane_state->update_flags.raw || pipe_ctx->stream->update_flags.raw) + if (pipe_ctx->update_flags.raw || + (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || + pipe_ctx->stream->update_flags.raw) dcn20_update_dchubp_dpp(dc, pipe_ctx, context); - if (pipe_ctx->update_flags.bits.enable - || pipe_ctx->plane_state->update_flags.bits.hdr_mult) + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || + pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); if (hws->funcs.populate_mcm_luts) { - hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, - pipe_ctx->plane_state->lut_bank_a); - pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; + if (pipe_ctx->plane_state) { + hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, + pipe_ctx->plane_state->lut_bank_a); + pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; + } } - if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + + if (pipe_ctx->plane_state && + (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || pipe_ctx->plane_state->update_flags.bits.gamma_change || - pipe_ctx->plane_state->update_flags.bits.lut_3d) + pipe_ctx->plane_state->update_flags.bits.lut_3d || + pipe_ctx->update_flags.bits.enable)) hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish @@ -1946,7 +1954,8 @@ static void dcn20_program_pipe( if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.plane_changed || pipe_ctx->stream->update_flags.bits.out_tf || - pipe_ctx->plane_state->update_flags.bits.output_tf_change) + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.output_tf_change)) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we @@ -1970,7 +1979,7 @@ static void dcn20_program_pipe( } /* Set ABM pipe after other pipe configurations done */ - if (pipe_ctx->plane_state->visible) { + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->visible)) { if (pipe_ctx->stream_res.abm) { dc->hwss.set_pipe(pipe_ctx); pipe_ctx->stream_res.abm->funcs->set_abm_level(pipe_ctx->stream_res.abm, @@ -2186,9 +2195,9 @@ static void post_unlock_reset_opp(struct dc *dc, * yet power gated. */ dsc->funcs->dsc_wait_disconnect_pending_clear(dsc); + dsc->funcs->dsc_disable(dsc); if (dccg->funcs->set_ref_dscclk) dccg->funcs->set_ref_dscclk(dccg, dsc->inst); - dsc->funcs->dsc_disable(dsc); } } } @@ -2283,6 +2292,9 @@ void dcn20_post_unlock_program_front_end( } } + if (!hwseq) + return; + /* P-State support transitions: * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) @@ -2290,7 +2302,7 @@ void dcn20_post_unlock_program_front_end( * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes */ - if (hwseq && hwseq->funcs.update_force_pstate) + if (hwseq->funcs.update_force_pstate) dc->hwseq->funcs.update_force_pstate(dc, context); /* Only program the MALL registers after all the main and phantom pipes @@ -2459,7 +2471,8 @@ bool dcn20_update_bandwidth( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); pipe_ctx->stream_res.tg->funcs->set_vtg_params( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false); @@ -2529,6 +2542,9 @@ bool dcn20_wait_for_blank_complete( { int counter; + if (!opp) + return false; + for (counter = 0; counter < 1000; counter++) { if (!opp->funcs->dpg_is_pending(opp)) break; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c index ef6488165b8f..32707b344f0b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c @@ -105,7 +105,6 @@ static const struct hw_sequencer_funcs dcn20_funcs = { static const struct hwseq_private_funcs dcn20_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn20_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c index a13bf6c9386e..78351408e864 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c @@ -96,7 +96,6 @@ static const struct hw_sequencer_funcs dcn201_funcs = { static const struct hwseq_private_funcs dcn201_private_funcs = { .init_pipes = NULL, - .update_plane_addr = dcn201_update_plane_addr, .plane_atomic_disconnect = dcn201_plane_atomic_disconnect, .program_pipe = dcn10_program_pipe, .update_mpcc = dcn201_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c index 3dfac372d165..e044e9e0a3a1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c @@ -93,7 +93,6 @@ static const struct hw_sequencer_funcs dcn21_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -109,7 +108,6 @@ static const struct hw_sequencer_funcs dcn21_funcs = { static const struct hwseq_private_funcs dcn21_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn20_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index eaeeade31ed7..42c52284a868 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -398,7 +398,11 @@ bool dcn30_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + else + DC_LOG_ERROR("%s: set_output_gamma function pointer is NULL.\n", __func__); + return ret; } @@ -625,7 +629,7 @@ void dcn30_init_hw(struct dc *dc) uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // Initialize the dccg @@ -731,10 +735,10 @@ void dcn30_init_hw(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -742,8 +746,8 @@ void dcn30_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } @@ -786,11 +790,12 @@ void dcn30_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); //if softmax is enabled then hardmax will be set by a different call - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->set_hard_max_memclk && + !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c index 4b32497c09d0..2a8dc40d2847 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c @@ -113,7 +113,6 @@ static const struct hw_sequencer_funcs dcn30_funcs = { static const struct hwseq_private_funcs dcn30_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c index 97e33eb7ac5a..93e49d87a67c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c @@ -111,7 +111,6 @@ static const struct hw_sequencer_funcs dcn301_funcs = { static const struct hwseq_private_funcs dcn301_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 746c522adf84..3d4b31bd9946 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -256,10 +256,10 @@ void dcn31_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index 9cb7afe0e731..56f3c70d4b55 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -98,7 +98,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -112,11 +111,11 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn31_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index 388404cdeeaa..4e93eeedfc1b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -355,14 +355,18 @@ void dcn314_calculate_pix_rate_divider( } } -void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context) +void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx) { unsigned int i; struct pipe_ctx *pipe = NULL; bool otg_disabled[MAX_PIPES] = {false}; for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) { + pipe = &context->res_ctx.pipe_ctx[i]; + } else { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + } if (pipe->top_pipe || pipe->prev_odm_pipe) continue; @@ -377,7 +381,10 @@ void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc hws->ctx->dc->res_pool->dccg->funcs->trigger_dio_fifo_resync(hws->ctx->dc->res_pool->dccg); for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) + pipe = &context->res_ctx.pipe_ctx[i]; + else + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (otg_disabled[i]) { int opp_inst[MAX_PIPES] = { pipe->stream_res.opp->inst }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h index fb4f90f61b22..2305ad282f21 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h @@ -41,7 +41,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig void dcn314_calculate_pix_rate_divider(struct dc *dc, struct dc_state *context, const struct dc_stream_state *stream); -void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context); +void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx); void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool clock_on); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index 7a8db4b81471..68e6de6b5758 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -100,7 +100,6 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -115,11 +114,11 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .calculate_pix_rate_divider = dcn314_calculate_pix_rate_divider, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn314_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 05d8f81daa06..a36e11606f90 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -582,7 +582,9 @@ bool dcn32_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + return ret; } @@ -779,7 +781,7 @@ void dcn32_init_hw(struct dc *dc) uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // Initialize the dccg @@ -901,10 +903,10 @@ void dcn32_init_hw(struct dc *dc) if (edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } } @@ -914,8 +916,8 @@ void dcn32_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } @@ -958,10 +960,11 @@ void dcn32_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->set_hard_max_memclk && + !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) @@ -982,8 +985,19 @@ void dcn32_init_hw(struct dc *dc) dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; - if (dc->ctx->dmub_srv->dmub->fw_version < + /* for DCN401 testing only */ + dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; + if (dc->caps.dmub_caps.fams_ver == 2) { + /* FAMS2 is enabled */ + dc->debug.fams2_config.bits.enable &= true; + } else if (dc->ctx->dmub_srv->dmub->fw_version < DMUB_FW_VERSION(7, 0, 35)) { + /* FAMS2 is disabled */ + dc->debug.fams2_config.bits.enable = false; + if (dc->debug.using_dml2 && dc->res_pool->funcs->update_bw_bounding_box) { + /* update bounding box if FAMS2 disabled */ + dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); + } dc->debug.force_disable_subvp = true; dc->debug.disable_fpo_optimizations = true; } @@ -1029,24 +1043,20 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, true); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; ASSERT(odm_dsc); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, true); } - dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; - dsc_cfg.pic_width *= opp_cnt; - optc_dsc_mode = dsc_optc_cfg.is_pixel_format_444 ? OPTC_DSC_ENABLED_444 : OPTC_DSC_ENABLED_NATIVE_SUBSAMPLED; - /* Enable DSC in OPTC */ DC_LOG_DSC("Setting optc DSC config for tg instance %d:", pipe_ctx->stream_res.tg->inst); pipe_ctx->stream_res.tg->funcs->set_dsc_config(pipe_ctx->stream_res.tg, @@ -1060,13 +1070,9 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) OPTC_DSC_DISABLED, 0, 0); /* only disconnect DSC block, DSC is disabled when OPP head pipe is reset */ - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, pipe_ctx->stream_res.dsc->inst, false); - dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); + dsc->funcs->dsc_disconnect(pipe_ctx->stream_res.dsc); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { ASSERT(odm_pipe->stream_res.dsc); - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_pipe->stream_res.dsc->inst, false); odm_pipe->stream_res.dsc->funcs->dsc_disconnect(odm_pipe->stream_res.dsc); } } @@ -1137,10 +1143,7 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * if (!pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe->stream_res.dsc) { struct display_stream_compressor *dsc = current_pipe_ctx->next_odm_pipe->stream_res.dsc; - struct dccg *dccg = dc->res_pool->dccg; - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, false); /* disconnect DSC block from stream */ dsc->funcs->dsc_disconnect(dsc); } @@ -1212,20 +1215,27 @@ void dcn32_calculate_pix_rate_divider( } } -void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context) +void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx) { unsigned int i; struct pipe_ctx *pipe = NULL; bool otg_disabled[MAX_PIPES] = {false}; + struct dc_state *dc_state = NULL; for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) { + pipe = &context->res_ctx.pipe_ctx[i]; + dc_state = context; + } else { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + dc_state = dc->current_state; + } if (!resource_is_pipe_type(pipe, OTG_MASTER)) continue; if ((pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal)) - && dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_PHANTOM) { + && dc_state_get_pipe_subvp_type(dc_state, pipe) != SUBVP_PHANTOM) { pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); otg_disabled[i] = true; @@ -1235,7 +1245,10 @@ void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_ hws->ctx->dc->res_pool->dccg->funcs->trigger_dio_fifo_resync(hws->ctx->dc->res_pool->dccg); for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) + pipe = &context->res_ctx.pipe_ctx[i]; + else + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (otg_disabled[i]) { int opp_inst[MAX_PIPES] = { pipe->stream_res.opp->inst }; @@ -1583,7 +1596,7 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) #ifdef CONFIG_DRM_AMD_DC_FP if (hws->funcs.resync_fifo_dccg_dio) - hws->funcs.resync_fifo_dccg_dio(hws, dc, context); + hws->funcs.resync_fifo_dccg_dio(hws, dc, context, i); #endif } } @@ -1717,6 +1730,28 @@ void dcn32_blank_phantom(struct dc *dc, hws->funcs.wait_for_blank_complete(opp); } +/* phantom stream id's can change often, but can be identical between contexts. +* This function checks for the condition the streams are identical to avoid +* redundant pipe transitions. +*/ +static bool is_subvp_phantom_topology_transition_seamless( + const struct dc_state *cur_ctx, + const struct dc_state *new_ctx, + const struct pipe_ctx *cur_pipe, + const struct pipe_ctx *new_pipe) +{ + enum mall_stream_type cur_pipe_type = dc_state_get_pipe_subvp_type(cur_ctx, cur_pipe); + enum mall_stream_type new_pipe_type = dc_state_get_pipe_subvp_type(new_ctx, new_pipe); + + const struct dc_stream_state *cur_paired_stream = dc_state_get_paired_subvp_stream(cur_ctx, cur_pipe->stream); + const struct dc_stream_state *new_paired_stream = dc_state_get_paired_subvp_stream(new_ctx, new_pipe->stream); + + return cur_pipe_type == SUBVP_PHANTOM && + cur_pipe_type == new_pipe_type && + cur_paired_stream && new_paired_stream && + cur_paired_stream->stream_id == new_paired_stream->stream_id; +} + bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc, const struct dc_state *cur_ctx, const struct dc_state *new_ctx) @@ -1735,7 +1770,8 @@ bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc, continue; else if (resource_is_pipe_type(cur_pipe, OTG_MASTER)) { if (resource_is_pipe_type(new_pipe, OTG_MASTER)) - if (cur_pipe->stream->stream_id == new_pipe->stream->stream_id) + if (cur_pipe->stream->stream_id == new_pipe->stream->stream_id || + is_subvp_phantom_topology_transition_seamless(cur_ctx, new_ctx, cur_pipe, new_pipe)) /* OTG master with the same stream is seamless */ continue; } else if (resource_is_pipe_type(cur_pipe, OPP_HEAD)) { @@ -1821,3 +1857,13 @@ void dcn32_interdependent_update_lock(struct dc *dc, dc->hwss.pipe_control_lock(dc, pipe, false); } } + +void dcn32_program_outstanding_updates(struct dc *dc, + struct dc_state *context) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + + /* update compbuf if required */ + if (hubbub->funcs->program_compbuf_size) + hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h index db562e45d6ff..cac4a08b92a4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h @@ -75,7 +75,7 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable); unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); -void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context); +void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx); void dcn32_subvp_pipe_control_lock(struct dc *dc, struct dc_state *context, @@ -133,4 +133,8 @@ void dcn32_prepare_bandwidth(struct dc *dc, void dcn32_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); + +void dcn32_program_outstanding_updates(struct dc *dc, + struct dc_state *context); + #endif /* __DC_HWSS_DCN32_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 5c50458b12cb..3422b564ae98 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -120,11 +120,11 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .blank_phantom = dcn32_blank_phantom, .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn32_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, @@ -163,7 +163,6 @@ static const struct hwseq_private_funcs dcn32_private_funcs = { .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, - .populate_mcm_luts = dcn401_populate_mcm_luts, }; void dcn32_hw_sequencer_init_functions(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index f115c7a285e7..fbbb20b9dbee 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -235,7 +235,7 @@ void dcn35_init_hw(struct dc *dc) if (hws->funcs.enable_power_gating_plane) hws->funcs.enable_power_gating_plane(dc->hwseq, true); */ - if (res_pool->hubbub->funcs->dchubbub_init) + if (res_pool->hubbub && res_pool->hubbub->funcs->dchubbub_init) res_pool->hubbub->funcs->dchubbub_init(dc->res_pool->hubbub); /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which @@ -328,10 +328,10 @@ void dcn35_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); @@ -629,10 +629,10 @@ void dcn35_power_down_on_boot(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwseq->funcs.edp_backlight_control && - dc->hwss.power_down && + dc->hwseq->funcs.power_down && dc->hwss.edp_power_control) { dc->hwseq->funcs.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -640,8 +640,8 @@ void dcn35_power_down_on_boot(struct dc *dc) if (link->link_enc && link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down) { + dc->hwseq->funcs.power_down(dc); break; } @@ -1024,9 +1024,6 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (!hpo_frl_stream_enc_acquired && !hpo_dp_stream_enc_acquired) update_state->pg_res_update[PG_HPO] = true; - if (hpo_frl_stream_enc_acquired) - update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true; - update_state->pg_res_update[PG_DWB] = true; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -1041,7 +1038,7 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (pipe_ctx->plane_res.hubp) update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->plane_res.hubp->inst] = false; - if (pipe_ctx->plane_res.dpp) + if (pipe_ctx->plane_res.dpp && pipe_ctx->plane_res.hubp) update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->plane_res.hubp->inst] = false; if (pipe_ctx->plane_res.dpp || pipe_ctx->stream_res.opp) @@ -1463,10 +1460,9 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, for (i = 0; i < num_pipes; i++) { if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { - struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; - struct dc *dc = pipe_ctx[i]->stream->ctx->dc; - - if (dc->debug.static_screen_wait_frames) { + if (pipe_ctx[i]->stream && pipe_ctx[i]->stream->ctx->dc->debug.static_screen_wait_frames) { + struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; + struct dc *dc = pipe_ctx[i]->stream->ctx->dc; unsigned int frame_rate = timing->pix_clk_100hz / (timing->h_total * timing->v_total); if (frame_rate >= 120 && dc->caps.ips_support && diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 428912f37129..2bbf1fef94fd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -101,7 +101,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -124,11 +123,11 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn35_private_funcs = { .init_pipes = dcn35_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 55e791552bca..d00822e8daa5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -100,7 +100,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -123,11 +122,12 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, + .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn351_private_funcs = { .init_pipes = dcn35_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 2c50c0f745a0..0b743669f23b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -221,8 +221,9 @@ void dcn401_init_hw(struct dc *dc) int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; + int current_dchub_ref_freq = 0; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) { + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) { dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // mark dcmode limits present if any clock has distinct AC and DC values from SMU @@ -264,6 +265,8 @@ void dcn401_init_hw(struct dc *dc) dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency, &res_pool->ref_clocks.dccg_ref_clock_inKhz); + current_dchub_ref_freq = res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; + (res_pool->hubbub->funcs->get_dchub_ref_freq)(res_pool->hubbub, res_pool->ref_clocks.dccg_ref_clock_inKhz, &res_pool->ref_clocks.dchub_ref_clock_inKhz); @@ -354,10 +357,10 @@ void dcn401_init_hw(struct dc *dc) if (edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } } @@ -367,8 +370,8 @@ void dcn401_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } @@ -413,12 +416,9 @@ void dcn401_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) - dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); - if (dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( dc->res_pool->hubbub, false, false); @@ -436,9 +436,12 @@ void dcn401_init_hw(struct dc *dc) dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver > 0; dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; dc->debug.fams2_config.bits.enable &= dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver == 2; - if (!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) { - /* update bounding box if FAMS2 disabled */ - dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); + if ((!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) + || res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq) { + /* update bounding box if FAMS2 disabled, or if dchub clk has changed */ + if (dc->clk_mgr) + dc->res_pool->funcs->update_bw_bounding_box(dc, + dc->clk_mgr->bw_params); } } } @@ -498,6 +501,7 @@ void dcn401_populate_mcm_luts(struct dc *dc, enum MCM_LUT_XABLE lut3d_xable = MCM_LUT_DISABLE; enum MCM_LUT_XABLE lut1d_xable = MCM_LUT_DISABLE; bool is_17x17x17 = true; + bool rval; dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); @@ -507,11 +511,10 @@ void dcn401_populate_mcm_luts(struct dc *dc, if (mcm_luts.lut1d_func->type == TF_TYPE_HWPWL) m_lut_params.pwl = &mcm_luts.lut1d_func->pwl; else if (mcm_luts.lut1d_func->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format( - dc->ctx, + rval = cm3_helper_translate_curve_to_hw_format( mcm_luts.lut1d_func, &dpp_base->regamma_params, false); - m_lut_params.pwl = &dpp_base->regamma_params; + m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL; } if (m_lut_params.pwl) { if (mpc->funcs->populate_lut) @@ -528,11 +531,10 @@ void dcn401_populate_mcm_luts(struct dc *dc, m_lut_params.pwl = &mcm_luts.shaper->pwl; else if (mcm_luts.shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { ASSERT(false); - cm_helper_translate_curve_to_hw_format( - dc->ctx, + rval = cm3_helper_translate_curve_to_hw_format( mcm_luts.shaper, &dpp_base->regamma_params, true); - m_lut_params.pwl = &dpp_base->regamma_params; + m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL; } if (m_lut_params.pwl) { if (mpc->funcs->populate_lut) @@ -668,47 +670,40 @@ bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, struct dpp *dpp_base = pipe_ctx->plane_res.dpp; int mpcc_id = pipe_ctx->plane_res.hubp->inst; struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; - bool result = true; + bool result; const struct pwl_params *lut_params = NULL; + bool rval; mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); pipe_ctx->plane_state->mcm_location = MPCC_MOVABLE_CM_LOCATION_BEFORE; // 1D LUT - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { - if (plane_state->blend_tf.type == TF_TYPE_HWPWL) - lut_params = &plane_state->blend_tf.pwl; - else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - &plane_state->blend_tf, - &dpp_base->regamma_params, false); - lut_params = &dpp_base->regamma_params; - } - result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id); - lut_params = NULL; + if (plane_state->blend_tf.type == TF_TYPE_HWPWL) + lut_params = &plane_state->blend_tf.pwl; + else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) { + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->blend_tf, + &dpp_base->regamma_params, false); + lut_params = rval ? &dpp_base->regamma_params : NULL; } + result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id); + lut_params = NULL; // Shaper - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { - if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL) - lut_params = &plane_state->in_shaper_func.pwl; - else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) { - // TODO: dpp_base replace - ASSERT(false); - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - &plane_state->in_shaper_func, - &dpp_base->shaper_params, true); - lut_params = &dpp_base->shaper_params; - } - - result = mpc->funcs->program_shaper(mpc, lut_params, mpcc_id); + if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL) + lut_params = &plane_state->in_shaper_func.pwl; + else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) { + // TODO: dpp_base replace + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->in_shaper_func, + &dpp_base->shaper_params, true); + lut_params = rval ? &dpp_base->shaper_params : NULL; } + result &= mpc->funcs->program_shaper(mpc, lut_params, mpcc_id); // 3D - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { + if (mpc->funcs->program_3dlut) { if (plane_state->lut3d_func.state.bits.initialized == 1) - result = mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func.lut_3d, mpcc_id); + result &= mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func.lut_3d, mpcc_id); else - result = mpc->funcs->program_3dlut(mpc, NULL, mpcc_id); + result &= mpc->funcs->program_3dlut(mpc, NULL, mpcc_id); } return result; @@ -742,7 +737,9 @@ bool dcn401_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + return ret; } @@ -871,6 +868,7 @@ enum dc_status dcn401_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); @@ -1115,10 +1113,10 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx) .mirror = pipe_ctx->plane_state->horizontal_mirror, .stream = pipe_ctx->stream }; + struct rect odm_slice_src = { 0 }; bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) || (pipe_ctx->prev_odm_pipe != NULL); int prev_odm_width = 0; - int prev_odm_offset = 0; struct pipe_ctx *prev_odm_pipe = NULL; bool mpc_combine_on = false; int bottom_pipe_x_pos = 0; @@ -1183,12 +1181,12 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx) prev_odm_pipe = pipe_ctx->prev_odm_pipe; while (prev_odm_pipe != NULL) { - prev_odm_width += prev_odm_pipe->plane_res.scl_data.recout.width; - prev_odm_offset += prev_odm_pipe->plane_res.scl_data.recout.x; + odm_slice_src = resource_get_odm_slice_src_rect(prev_odm_pipe); + prev_odm_width += odm_slice_src.width; prev_odm_pipe = prev_odm_pipe->prev_odm_pipe; } - x_pos -= (prev_odm_width + prev_odm_offset); + x_pos -= (prev_odm_width); } /* If the position is negative then we need to add to the hotspot @@ -1311,8 +1309,10 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) for (i = 0; i < dc->current_state->stream_count; i++) { /* MALL SS messaging is not supported with PSR at this time */ if (dc->current_state->streams[i] != NULL && - dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) + dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) { + DC_LOG_MALL("MALL SS not supported with PSR at this time\n"); return false; + } } memset(&cmd, 0, sizeof(cmd)); @@ -1322,8 +1322,9 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) if (enable) { if (dcn401_check_no_memory_request_for_cab(dc)) { /* 1. Check no memory request case for CAB. - * If no memory request case, send CAB_ACTION NO_DF_REQ DMUB message + * If no memory request case, send CAB_ACTION NO_DCN_REQ DMUB message */ + DC_LOG_MALL("sending CAB action NO_DCN_REQ\n"); cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ; } else { /* 2. Check if all surfaces can fit in CAB. @@ -1351,13 +1352,16 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) if (ways <= dc->caps.cache_num_ways && !mall_ss_unsupported) { cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB; cmd.cab.cab_alloc_ways = ways; + DC_LOG_MALL("cab allocation: %d ways. CAB action: DCN_SS_FIT_IN_CAB\n", ways); } else { cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB; + DC_LOG_MALL("frame does not fit in CAB: %d ways required. CAB action: DCN_SS_NOT_FIT_IN_CAB\n", ways); } } } else { /* Disable CAB */ cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_IDLE_OPTIMIZATION; + DC_LOG_MALL("idle optimization disabled\n"); } dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); @@ -1395,10 +1399,10 @@ void dcn401_prepare_bandwidth(struct dc *dc, { struct hubbub *hubbub = dc->res_pool->hubbub; bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; - unsigned int compbuf_size_kb = 0; + unsigned int compbuf_size = 0; - /* Any transition into or out of a FAMS config should disable MCLK switching first to avoid hangs */ - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + /* Any transition into P-State support should disable MCLK switching first to avoid hangs */ + if (p_state_change_support) { dc->optimized_required = true; context->bw_ctx.bw.dcn.clk.p_state_change_support = false; } @@ -1425,10 +1429,10 @@ void dcn401_prepare_bandwidth(struct dc *dc, /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_segments) { - compbuf_size_kb = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); + compbuf_size = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; + dc->wm_optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); - hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size_kb, false); + hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size, false); } if (dc->debug.fams2_config.bits.enable) { @@ -1437,7 +1441,7 @@ void dcn401_prepare_bandwidth(struct dc *dc, dcn401_fams2_global_control_lock(dc, context, false); } - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + if (p_state_change_support != context->bw_ctx.bw.dcn.clk.p_state_change_support) { /* After disabling P-State, restore the original value to ensure we get the correct P-State * on the next optimize. */ context->bw_ctx.bw.dcn.clk.p_state_change_support = p_state_change_support; @@ -1530,7 +1534,7 @@ void dcn401_fams2_update_config(struct dc *dc, struct dc_state *context, bool en if (!dc->ctx || !dc->ctx->dmub_srv || !dc->debug.fams2_config.bits.enable) return; - fams2_required = context->bw_ctx.bw.dcn.fams2_stream_count > 0; + fams2_required = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; dc_dmub_srv_fams2_update_config(dc, context, enable && fams2_required); } @@ -1542,7 +1546,6 @@ static void update_dsc_for_odm_change(struct dc *dc, struct dc_state *context, struct pipe_ctx *old_pipe; struct pipe_ctx *new_pipe; struct pipe_ctx *old_opp_heads[MAX_PIPES]; - struct dccg *dccg = dc->res_pool->dccg; struct pipe_ctx *old_otg_master; int old_opp_head_count = 0; @@ -1568,12 +1571,9 @@ static void update_dsc_for_odm_change(struct dc *dc, struct dc_state *context, for (i = 0; i < old_opp_head_count; i++) { old_pipe = old_opp_heads[i]; new_pipe = &context->res_ctx.pipe_ctx[old_pipe->pipe_idx]; - if (old_pipe->stream_res.dsc && !new_pipe->stream_res.dsc) { - dccg->funcs->set_dto_dscclk(dccg, - old_pipe->stream_res.dsc->inst, false); + if (old_pipe->stream_res.dsc && !new_pipe->stream_res.dsc) old_pipe->stream_res.dsc->funcs->dsc_disconnect( old_pipe->stream_res.dsc); - } } } } @@ -1659,7 +1659,7 @@ void dcn401_hardware_release(struct dc *dc) */ if (dc->current_state) { if ((!dc->clk_mgr->clks.p_state_change_support || - dc->current_state->bw_ctx.bw.dcn.fams2_stream_count > 0) && + dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) && dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( dc->res_pool->hubbub, true, true); @@ -1669,3 +1669,104 @@ void dcn401_hardware_release(struct dc *dc) } } +void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master) +{ + struct pipe_ctx *opp_heads[MAX_PIPES]; + struct pipe_ctx *dpp_pipes[MAX_PIPES]; + struct hubbub *hubbub = dc->res_pool->hubbub; + int dpp_count = 0; + + if (!otg_master->stream) + return; + + int slice_count = resource_get_opp_heads_for_otg_master(otg_master, + &context->res_ctx, opp_heads); + + for (int slice_idx = 0; slice_idx < slice_count; slice_idx++) { + if (opp_heads[slice_idx]->plane_state) { + dpp_count = resource_get_dpp_pipes_for_opp_head( + opp_heads[slice_idx], + &context->res_ctx, + dpp_pipes); + for (int dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++) { + struct pipe_ctx *dpp_pipe = dpp_pipes[dpp_idx]; + if (dpp_pipe && hubbub && + dpp_pipe->plane_res.hubp && + hubbub->funcs->wait_for_det_update) + hubbub->funcs->wait_for_det_update(hubbub, dpp_pipe->plane_res.hubp->inst); + } + } + } +} + +void dcn401_interdependent_update_lock(struct dc *dc, + struct dc_state *context, bool lock) +{ + unsigned int i = 0; + struct pipe_ctx *pipe = NULL; + struct timing_generator *tg = NULL; + bool pipe_unlocked[MAX_PIPES] = {0}; + + if (lock) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + tg = pipe->stream_res.tg; + + if (!resource_is_pipe_type(pipe, OTG_MASTER) || + !tg->funcs->is_tg_enabled(tg) || + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) + continue; + dc->hwss.pipe_control_lock(dc, pipe, true); + } + } else { + /* Unlock pipes based on the change in DET allocation instead of pipe index + * Prevents over allocation of DET during unlock process + * e.g. 2 pipe config with different streams with a max of 20 DET segments + * Before: After: + * - Pipe0: 10 DET segments - Pipe0: 12 DET segments + * - Pipe1: 10 DET segments - Pipe1: 8 DET segments + * If Pipe0 gets updated first, 22 DET segments will be allocated + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + tg = pipe->stream_res.tg; + int current_pipe_idx = i; + + if (!resource_is_pipe_type(pipe, OTG_MASTER) || + !tg->funcs->is_tg_enabled(tg) || + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + pipe_unlocked[i] = true; + continue; + } + + // If the same stream exists in old context, ensure the OTG_MASTER pipes for the same stream get compared + struct pipe_ctx *old_otg_master = resource_get_otg_master_for_stream(&dc->current_state->res_ctx, pipe->stream); + + if (old_otg_master) + current_pipe_idx = old_otg_master->pipe_idx; + if (resource_calculate_det_for_stream(context, pipe) < + resource_calculate_det_for_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[current_pipe_idx])) { + dc->hwss.pipe_control_lock(dc, pipe, false); + pipe_unlocked[i] = true; + dcn401_wait_for_det_buffer_update(dc, context, pipe); + } + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (pipe_unlocked[i]) + continue; + pipe = &context->res_ctx.pipe_ctx[i]; + dc->hwss.pipe_control_lock(dc, pipe, false); + } + } +} + +void dcn401_program_outstanding_updates(struct dc *dc, + struct dc_state *context) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + + /* update compbuf if required */ + if (hubbub->funcs->program_compbuf_segments) + hubbub->funcs->program_compbuf_segments(hubbub, context->bw_ctx.bw.dcn.arb_regs.compbuf_size, true); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 8e9c1c17aa66..a27e62081685 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -81,4 +81,7 @@ void dcn401_hardware_release(struct dc *dc); void dcn401_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy); +void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); +void dcn401_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); +void dcn401_program_outstanding_updates(struct dc *dc, struct dc_state *context); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 6a768702c7bd..a2ca07235c83 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -38,7 +38,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .disable_audio_stream = dce110_disable_audio_stream, .disable_plane = dcn20_disable_plane, .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn32_interdependent_update_lock, + .interdependent_update_lock = dcn401_interdependent_update_lock, .cursor_lock = dcn10_cursor_lock, .prepare_bandwidth = dcn401_prepare_bandwidth, .optimize_bandwidth = dcn401_optimize_bandwidth, @@ -99,12 +99,11 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .fams2_global_control_lock = dcn401_fams2_global_control_lock, .fams2_update_config = dcn401_fams2_update_config, .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, - .power_down = dce110_power_down, + .program_outstanding_updates = dcn401_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn401_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, @@ -115,8 +114,6 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, .enable_stream_timing = dcn401_enable_stream_timing, .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = dcn20_disable_stream_gating, - .enable_stream_gating = dcn20_enable_stream_gating, .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, .did_underflow_occur = dcn10_did_underflow_occur, .init_blank = dcn32_init_blank, @@ -136,12 +133,11 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .dccg_init = dcn20_dccg_init, .set_mcm_luts = dcn401_set_mcm_luts, .program_mall_pipe_config = dcn32_program_mall_pipe_config, - .update_force_pstate = dcn32_update_force_pstate, .update_mall_sel = dcn32_update_mall_sel, .calculate_dccg_k1_k2_values = NULL, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, - .populate_mcm_luts = dcn401_populate_mcm_luts, + .populate_mcm_luts = NULL, }; void dcn401_hw_sequencer_init_functions(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index d05be65a2256..ac9205625623 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -240,7 +240,6 @@ struct hw_sequencer_funcs { void (*program_triplebuffer)(const struct dc *dc, struct pipe_ctx *pipe_ctx, bool enableTripleBuffer); void (*update_pending_status)(struct pipe_ctx *pipe_ctx); - void (*power_down)(struct dc *dc); void (*update_dsc_pg)(struct dc *dc, struct dc_state *context, bool safe_to_disable); /* Pipe Lock Related */ @@ -460,6 +459,9 @@ struct hw_sequencer_funcs { bool enable); void (*fams2_global_control_lock_fast)(union block_sequence_params *params); void (*set_long_vtotal)(struct pipe_ctx **pipe_ctx, int num_pipes, uint32_t v_total_min, uint32_t v_total_max); + void (*program_outstanding_updates)(struct dc *dc, + struct dc_state *context); + void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable); }; void color_space_to_black_color( @@ -520,6 +522,21 @@ void hwss_build_fast_sequence(struct dc *dc, struct dc_stream_status *stream_status, struct dc_state *context); +void hwss_wait_for_all_blank_complete(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_odm_update_pending_complete(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_no_pipes_pending(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_outstanding_hw_updates(struct dc *dc, + struct dc_state *dc_context); + +void hwss_process_outstanding_hw_updates(struct dc *dc, + struct dc_state *dc_context); + void hwss_send_dmcub_cmd(union block_sequence_params *params); void hwss_program_manual_trigger(union block_sequence_params *params); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 7ac3f2a09487..0ac675456979 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -76,8 +76,6 @@ struct hwseq_private_funcs { void (*enable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx); void (*init_pipes)(struct dc *dc, struct dc_state *context); void (*reset_hw_ctx_wrap)(struct dc *dc, struct dc_state *context); - void (*update_plane_addr)(const struct dc *dc, - struct pipe_ctx *pipe_ctx); void (*plane_atomic_disconnect)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); @@ -170,7 +168,8 @@ struct hwseq_private_funcs { unsigned int *k1_div, unsigned int *k2_div); void (*resync_fifo_dccg_dio)(struct dce_hwseq *hws, struct dc *dc, - struct dc_state *context); + struct dc_state *context, + unsigned int current_pipe_idx); enum dc_status (*apply_single_controller_ctx_to_hw)( struct pipe_ctx *pipe_ctx, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 4c8e6436c7e1..bfb8b8502d20 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -534,8 +534,8 @@ struct dcn_bw_output { unsigned int legacy_svp_drr_stream_index; bool legacy_svp_drr_stream_index_valid; struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; + struct dmub_cmd_fams2_global_config fams2_global_config; struct dmub_fams2_stream_static_state fams2_stream_params[DML2_MAX_PLANES]; - unsigned fams2_stream_count; struct dml2_display_arb_regs arb_regs; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h index b6203253111c..8c18efc2aa70 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h @@ -46,6 +46,8 @@ struct audio_funcs { const struct audio_info *audio_info, const struct audio_dp_link_info *dp_link_info); + void (*az_disable_hbr_audio)(struct audio *audio); + void (*wall_dto_setup)(struct audio *audio, enum signal_type signal, const struct audio_crtc_info *crtc_info, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index d5fefce3e74b..2d06067ff36d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -29,9 +29,6 @@ #include "dc.h" #include "dm_pp_smu.h" -#define DCN_MINIMUM_DISPCLK_Khz 100000 -#define DCN_MINIMUM_DPPCLK_Khz 100000 - /* Constants */ #define DDR4_DRAM_WIDTH 64 #define WM_A 0 @@ -180,6 +177,7 @@ struct clk_state_registers_and_bypass { uint32_t dispclk; uint32_t dppclk; uint32_t dtbclk; + uint32_t fclk; uint32_t dppclk_bypass; uint32_t dcfclk_bypass; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index 12282f96dfe1..c2dd061892f4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -191,7 +191,8 @@ enum dentist_divider_range { CLK_SR_DCN401(CLK0_CLK1_DFS_CNTL, CLK01, 0), \ CLK_SR_DCN401(CLK0_CLK2_DFS_CNTL, CLK01, 0), \ CLK_SR_DCN401(CLK0_CLK3_DFS_CNTL, CLK01, 0), \ - CLK_SR_DCN401(CLK0_CLK4_DFS_CNTL, CLK01, 0) + CLK_SR_DCN401(CLK0_CLK4_DFS_CNTL, CLK01, 0), \ + CLK_SR_DCN401(CLK2_CLK2_DFS_CNTL, CLK20, 0) #define CLK_COMMON_MASK_SH_LIST_DCN401(mask_sh) \ CLK_COMMON_MASK_SH_LIST_DCN321(mask_sh) @@ -235,6 +236,7 @@ struct clk_mgr_registers { uint32_t CLK1_CLK2_DFS_CNTL; uint32_t CLK1_CLK3_DFS_CNTL; uint32_t CLK1_CLK4_DFS_CNTL; + uint32_t CLK2_CLK2_DFS_CNTL; uint32_t CLK1_CLK0_CURRENT_CNT; uint32_t CLK1_CLK1_CURRENT_CNT; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index 4fb1aacee894..d619eb229a62 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -211,10 +211,7 @@ struct dccg_funcs { struct dccg *dccg, enum streamclk_source src, uint32_t otg_inst); - void (*set_dto_dscclk)( - struct dccg *dccg, - uint32_t dsc_inst, - bool enable); + void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst); void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index dd2b2864876c..67c32401893e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -227,6 +227,7 @@ struct hubbub_funcs { void (*get_mall_en)(struct hubbub *hubbub, unsigned int *mall_in_use); void (*program_det_segments)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg); void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); + void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index 27bba47186e9..41c76ba9ba56 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -217,12 +217,13 @@ enum optc_dsc_mode { }; struct dc_bias_and_scale { - uint16_t scale_red; - uint16_t bias_red; - uint16_t scale_green; - uint16_t bias_green; - uint16_t scale_blue; - uint16_t bias_blue; + uint32_t scale_red; + uint32_t bias_red; + uint32_t scale_green; + uint32_t bias_green; + uint32_t scale_blue; + uint32_t bias_blue; + bool bias_and_scale_valid; }; enum test_pattern_dyn_range { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index 5f6c7daa14d9..a8b44f398ce6 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -63,7 +63,7 @@ union dcn_watermark_set { struct dml2_dchub_watermark_regs b; struct dml2_dchub_watermark_regs c; struct dml2_dchub_watermark_regs d; - } dcn4; //dcn4+ + } dcn4x; //dcn4+ }; struct dce_watermarks { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h index 287bf8a90ff6..03cbcbb36f1c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h @@ -65,6 +65,7 @@ struct optc { int vupdate_offset; int vupdate_width; int vready_offset; + int pstate_keepout; struct dc_crtc_timing orginal_patched_timing; enum signal_type signal; }; @@ -110,6 +111,7 @@ void optc1_program_timing(struct timing_generator *optc, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios); @@ -127,7 +129,8 @@ void optc1_program_global_sync(struct timing_generator *optc, int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width); + int vupdate_width, + int pstate_keepout); bool optc1_disable_crtc(struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index e5e11c84e9e2..fe7f3137f228 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -271,7 +271,9 @@ struct stream_encoder_funcs { struct stream_encoder *enc, unsigned int pix_per_container); void (*enable_fifo)(struct stream_encoder *enc); void (*disable_fifo)(struct stream_encoder *enc); + bool (*is_fifo_enabled)(struct stream_encoder *enc); void (*map_stream_to_link)(struct stream_encoder *enc, uint32_t stream_enc_inst, uint32_t link_enc_inst); + uint32_t (*get_pixels_per_cycle)(struct stream_encoder *enc); }; struct hpo_dp_stream_encoder_state { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 0f453452234c..3d4c8bd42b49 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -172,6 +172,7 @@ struct timing_generator_funcs { int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios ); @@ -256,7 +257,8 @@ struct timing_generator_funcs { int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width); + int vupdate_width, + int pstate_keepout); void (*enable_optc_clock)(struct timing_generator *tg, bool enable); void (*program_stereo)(struct timing_generator *tg, const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 28da1dddf0a0..45262cba675e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -245,16 +245,6 @@ struct transform_funcs { void (*set_cursor_attributes)( struct transform *xfm_base, const struct dc_cursor_attributes *attr); - - bool (*transform_program_blnd_lut)( - struct transform *xfm, - const struct pwl_params *params); - bool (*transform_program_shaper_lut)( - struct transform *xfm, - const struct pwl_params *params); - bool (*transform_program_3dlut)( - struct transform *xfm, - struct tetrahedral_params *params); }; const uint16_t *get_filter_2tap_16p(void); diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 96d40d33a1f9..cd1157d225ab 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -639,4 +639,11 @@ struct dscl_prog_data *resource_get_dscl_prog_data(struct pipe_ctx *pipe_ctx); * @dml2_options: struct to hold callbacks */ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuration_options *dml2_options); + +/* + *Calculate total DET allocated for all pipes for a given OTG_MASTER pipe + */ +int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master); + +bool resource_is_hpo_acquired(struct dc_state *context); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 555c1c484cfd..ff8fe1a94965 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -67,6 +67,8 @@ static void dp_retrain_link_dp_test(struct dc_link *link, { struct pipe_ctx *pipes[MAX_PIPES]; struct dc_state *state = link->dc->current_state; + bool was_hpo_acquired = resource_is_hpo_acquired(link->dc->current_state); + bool is_hpo_acquired; uint8_t count; int i; @@ -83,6 +85,12 @@ static void dp_retrain_link_dp_test(struct dc_link *link, pipes[i]); } + if (link->dc->hwss.setup_hpo_hw_control) { + is_hpo_acquired = resource_is_hpo_acquired(state); + if (was_hpo_acquired != is_hpo_acquired) + link->dc->hwss.setup_hpo_hw_control(link->dc->hwseq, is_hpo_acquired); + } + for (i = count-1; i >= 0; i--) link_set_dpms_on(state, pipes[i]); } @@ -804,8 +812,11 @@ bool dp_set_test_pattern( break; } + if (!pipe_ctx->stream) + return false; + if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_enable) { - if (pipe_ctx->stream && should_use_dmub_lock(pipe_ctx->stream->link)) { + if (should_use_dmub_lock(pipe_ctx->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index b76737b7b9e4..3e47a6735912 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -74,7 +74,10 @@ void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx) struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; - if (stream_enc && stream_enc->funcs->disable_fifo) + if (!stream_enc) + return; + + if (stream_enc->funcs->disable_fifo) stream_enc->funcs->disable_fifo(stream_enc); if (stream_enc->funcs->set_input_mode) stream_enc->funcs->set_input_mode(stream_enc, 0); diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c index e1257404357b..cec68c5dba13 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c @@ -28,6 +28,8 @@ #include "dccg.h" #include "clk_mgr.h" +#define DC_LOGGER link->ctx->logger + void set_hpo_dp_throttled_vcp_size(struct pipe_ctx *pipe_ctx, struct fixed31_32 throttled_vcp_size) { @@ -108,6 +110,11 @@ void enable_hpo_dp_link_output(struct dc_link *link, enum clock_source_id clock_source, const struct dc_link_settings *link_settings) { + if (!link_res->hpo_dp_link_enc) { + DC_LOG_ERROR("%s: invalid hpo_dp_link_enc\n", __func__); + return; + } + if (link->dc->res_pool->dccg->funcs->set_symclk32_le_root_clock_gating) link->dc->res_pool->dccg->funcs->set_symclk32_le_root_clock_gating( link->dc->res_pool->dccg, @@ -124,6 +131,11 @@ void disable_hpo_dp_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal) { + if (!link_res->hpo_dp_link_enc) { + DC_LOG_ERROR("%s: invalid hpo_dp_link_enc\n", __func__); + return; + } + link_res->hpo_dp_link_enc->funcs->link_disable(link_res->hpo_dp_link_enc); link_res->hpo_dp_link_enc->funcs->disable_link_phy( link_res->hpo_dp_link_enc, signal); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index bba644024780..d21ee9d12d26 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -863,7 +863,6 @@ static bool detect_link_and_local_sink(struct dc_link *link, struct dc_sink *prev_sink = NULL; struct dpcd_caps prev_dpcd_caps; enum dc_connection_type new_connection_type = dc_connection_none; - enum dc_connection_type pre_connection_type = link->type; const uint32_t post_oui_delay = 30; // 30ms DC_LOGGER_INIT(link->ctx->logger); @@ -965,7 +964,6 @@ static bool detect_link_and_local_sink(struct dc_link *link, } if (!detect_dp(link, &sink_caps, reason)) { - link->type = pre_connection_type; if (prev_sink) dc_sink_release(prev_sink); @@ -1191,8 +1189,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, //sink only can use supported link rate table, we are foreced to enable it if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN) link->panel_config.ilr.optimize_edp_link_rate = true; - if (edp_is_ilr_optimization_enabled(link)) - link->reported_link_cap.link_rate = get_max_link_rate_from_ilr_table(link); + link->reported_link_cap.link_rate = get_max_edp_link_rate(link); } } else { @@ -1299,8 +1296,7 @@ bool link_detect(struct dc_link *link, enum dc_detect_reason reason) link->dpcd_caps.is_mst_capable) is_delegated_to_mst_top_mgr = discover_dp_mst_topology(link, reason); - if (is_local_sink_detect_success && - pre_link_type == dc_connection_mst_branch && + if (pre_link_type == dc_connection_mst_branch && link->type != dc_connection_mst_branch) is_delegated_to_mst_top_mgr = link_reset_cur_dp_mst_topology(link); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 65607589495f..c4e03482ba9a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -817,17 +817,17 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, true); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, true); } dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; dsc_cfg.pic_width *= opp_cnt; @@ -879,19 +879,32 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) } /* disable DSC block */ - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, pipe_ctx->stream_res.dsc->inst, false); - pipe_ctx->stream_res.dsc->funcs->dsc_disconnect(pipe_ctx->stream_res.dsc); - if (dccg->funcs->set_ref_dscclk) - dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst); - pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_pipe->stream_res.dsc->inst, false); + for (odm_pipe = pipe_ctx; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.dsc->funcs->dsc_disconnect(odm_pipe->stream_res.dsc); + /* + * TODO - dsc_disconnect is a double buffered register. + * by the time we call dsc_disable, dsc may still remain + * connected to OPP. In this case OPTC will no longer + * get correct pixel data because DSCC is off. However + * we also can't wait for the disconnect pending + * complete, because this function can be called + * with/without OTG master lock acquired. When the lock + * is acquired we will never get pending complete until + * we release the lock later. So there is no easy way to + * solve this problem especially when the lock is + * acquired. DSC is a front end hw block it should be + * programmed as part of front end sequence, where the + * commit sequence without lock and update sequence + * with lock are completely separated. However because + * we are programming dsc as part of back end link + * programming sequence, we don't know if front end OPTC + * master lock is acquired. The back end should be + * agnostic to front end lock. DSC programming shouldn't + * belong to this sequence. + */ + odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); if (dccg->funcs->set_ref_dscclk) dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst); - odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); } } } @@ -2345,7 +2358,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); - else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && + else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) && dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, false); @@ -2578,7 +2591,7 @@ void link_set_dpms_on( if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) allocate_mst_payload(pipe_ctx); - else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && + else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) && dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, true); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 8246006857b3..5e1b5ab9fbc6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -385,7 +385,7 @@ static void link_destruct(struct dc_link *link) if (link->panel_cntl) link->panel_cntl->funcs->destroy(&link->panel_cntl); - if (link->link_enc) { + if (link->link_enc && !link->is_dig_mapping_flexible) { /* Update link encoder resource tracking variables. These are used for * the dynamic assignment of link encoders to streams. Virtual links * are not assigned encoder resources on creation. @@ -524,6 +524,7 @@ static bool construct_phy(struct dc_link *link, link->connector_signal = SIGNAL_TYPE_DVI_DUAL_LINK; break; case CONNECTOR_ID_DISPLAY_PORT: + case CONNECTOR_ID_MXM: case CONNECTOR_ID_USBC: link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 46bb7a855bc2..34a618a7278b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -212,6 +212,13 @@ static enum dc_link_rate linkRateInKHzToLinkRateMultiplier(uint32_t link_rate_in case 10000000: link_rate = LINK_RATE_UHBR10; // UHBR10 - 10.0 Gbps/Lane break; + case 13500000: + link_rate = LINK_RATE_UHBR13_5; // UHBR13.5 - 13.5 Gbps/Lane + break; + case 20000000: + link_rate = LINK_RATE_UHBR20; // UHBR20 - 20.0 Gbps/Lane + break; + default: link_rate = LINK_RATE_UNKNOWN; break; @@ -541,6 +548,23 @@ static enum dc_link_rate increase_link_rate(struct dc_link *link, } } +static void increase_edp_link_rate(struct dc_link *link, + struct dc_link_settings *current_link_setting) +{ + if (current_link_setting->use_link_rate_set) { + if (current_link_setting->link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { + current_link_setting->link_rate_set++; + current_link_setting->link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting->link_rate_set]; + } else { + current_link_setting->use_link_rate_set = false; + current_link_setting->link_rate = LINK_RATE_UHBR10; + } + } else { + current_link_setting->link_rate = increase_link_rate(link, current_link_setting->link_rate); + } +} + static bool decide_fallback_link_setting_max_bw_policy( struct dc_link *link, const struct dc_link_settings *max, @@ -759,14 +783,7 @@ bool edp_decide_link_settings(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; - current_link_setting.lane_count = - initial_link_setting.lane_count; - } else - break; + increase_edp_link_rate(link, ¤t_link_setting); } } return false; @@ -818,9 +835,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, if (policy) { /* minimize lane */ if (current_link_setting.link_rate < max_link_rate) { - current_link_setting.link_rate = - increase_link_rate(link, - current_link_setting.link_rate); + increase_edp_link_rate(link, ¤t_link_setting); } else { if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { @@ -839,9 +854,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - current_link_setting.link_rate = - increase_link_rate(link, - current_link_setting.link_rate); + increase_edp_link_rate(link, ¤t_link_setting); current_link_setting.lane_count = initial_link_setting.lane_count; } @@ -874,18 +887,15 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, } if (policy) { /* minimize lane */ - if (current_link_setting.link_rate_set < - link->dpcd_caps.edp_supported_link_rates_count - && current_link_setting.link_rate < max_link_rate) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + if (current_link_setting.link_rate < max_link_rate) { + increase_edp_link_rate(link, ¤t_link_setting); } else { if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { current_link_setting.lane_count = increase_lane_count( current_link_setting.lane_count); current_link_setting.link_rate_set = initial_link_setting.link_rate_set; + current_link_setting.use_link_rate_set = initial_link_setting.use_link_rate_set; current_link_setting.link_rate = link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; } else @@ -899,13 +909,8 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; - current_link_setting.lane_count = - initial_link_setting.lane_count; - } else + increase_edp_link_rate(link, ¤t_link_setting); + if (current_link_setting.link_rate == LINK_RATE_UNKNOWN) break; } } @@ -1166,6 +1171,8 @@ static void get_active_converter_info( link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = intersect_frl_link_bw_support( link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps, hdmi_encoded_link_bw); + DC_LOG_DC("%s: pcon frl link bw = %u\n", __func__, + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps); } if (link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps > 0) @@ -1541,7 +1548,11 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) * Override count to 1 if we receive a known bad count (0 or an invalid value) */ if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) { - ASSERT(0); + /* If you see this message consistently, either the host platform has FIXED_VS flag + * incorrectly configured or the sink device is returning an invalid count. + */ + DC_LOG_ERROR("lttpr_caps phy_repeater_cnt is 0x%x, forcing it to 0x80.", + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); link->dpcd_caps.lttpr_caps.phy_repeater_cnt = 0x80; DC_LOG_DC("lttpr_caps forced phy_repeater_cnt = %d\n", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); } @@ -2254,7 +2265,7 @@ bool dp_verify_link_cap_with_retries( memset(&link->verified_link_cap, 0, sizeof(struct dc_link_settings)); - if (!link_detect_connection_type(link, &type) || type == dc_connection_none) { + if (link->link_enc && (!link_detect_connection_type(link, &type) || type == dc_connection_none)) { link->verified_link_cap = fail_safe_link_settings; break; } else if (dp_verify_link_cap(link, known_limit_link_setting, &fail_count)) { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index bf820d2b4dc4..3aa05a2be6c0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -305,16 +305,17 @@ bool edp_is_ilr_optimization_enabled(struct dc_link *link) return true; } -enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link) +enum dc_link_rate get_max_edp_link_rate(struct dc_link *link) { - enum dc_link_rate link_rate = link->reported_link_cap.link_rate; + enum dc_link_rate max_ilr_rate = LINK_RATE_UNKNOWN; + enum dc_link_rate max_non_ilr_rate = dp_get_max_link_cap(link).link_rate; for (int i = 0; i < link->dpcd_caps.edp_supported_link_rates_count; i++) { - if (link_rate < link->dpcd_caps.edp_supported_link_rates[i]) - link_rate = link->dpcd_caps.edp_supported_link_rates[i]; + if (max_ilr_rate < link->dpcd_caps.edp_supported_link_rates[i]) + max_ilr_rate = link->dpcd_caps.edp_supported_link_rates[i]; } - return link_rate; + return (max_ilr_rate > max_non_ilr_rate ? max_ilr_rate : max_non_ilr_rate); } bool edp_is_ilr_optimization_required(struct dc_link *link, @@ -1167,6 +1168,9 @@ static void edp_set_assr_enable(const struct dc *pDC, struct dc_link *link, link_enc_index = link->link_enc->transmitter - TRANSMITTER_UNIPHY_A; if (link_res->hpo_dp_link_enc) { + if (link->wa_flags.disable_assr_for_uhbr) + return; + link_enc_index = link_res->hpo_dp_link_enc->inst; use_hpo_dp_link_enc = true; } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 8df8ac5bde5b..30dc8c24c008 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -69,7 +69,7 @@ bool edp_wait_for_t12(struct dc_link *link); bool edp_is_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing); bool edp_is_ilr_optimization_enabled(struct dc_link *link); -enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link); +enum dc_link_rate get_max_edp_link_rate(struct dc_link *link); bool edp_backlight_enable_aux(struct dc_link *link, bool enable); void edp_add_delay_for_T9(struct dc_link *link); bool edp_receiver_ready_T9(struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile b/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile index 505bc0517e08..eab196c57c6c 100644 --- a/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile +++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile @@ -24,6 +24,15 @@ # ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN20 +############################################################################### +MMHUBBUB_DCN20 = dcn20_mmhubbub.o + +AMD_DAL_MMHUBBUB_DCN20 = $(addprefix $(AMDDALPATH)/dc/mmhubbub/dcn20/,$(MMHUBBUB_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MMHUBBUB_DCN20) + ############################################################################### # DCN32 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c rename to drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h rename to drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h diff --git a/drivers/gpu/drm/amd/display/dc/mpc/Makefile b/drivers/gpu/drm/amd/display/dc/mpc/Makefile index 7f7458c07e2a..5402c3529f5e 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/mpc/Makefile @@ -24,6 +24,33 @@ # ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN10 +############################################################################### +MPC_DCN10 = dcn10_mpc.o + +AMD_DAL_MPC_DCN10 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn10/,$(MPC_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN10) + +############################################################################### +# DCN20 +############################################################################### +MPC_DCN20 = dcn20_mpc.o + +AMD_DAL_MPC_DCN20 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn20/,$(MPC_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN20) + +############################################################################### +# DCN30 +############################################################################### +MPC_DCN30 = dcn30_mpc.o + +AMD_DAL_MPC_DCN30 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn30/,$(MPC_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN30) + ############################################################################### # DCN32 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c rename to drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h rename to drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c rename to drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h rename to drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c similarity index 99% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c rename to drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c index 3aeb85ec40b0..fe26fde12eeb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c @@ -25,7 +25,7 @@ #include "reg_helper.h" #include "dcn30_mpc.h" -#include "dcn30_cm_common.h" +#include "dcn30/dcn30_cm_common.h" #include "basics/conversion.h" #include "dcn10/dcn10_cm_common.h" #include "dc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h rename to drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h diff --git a/drivers/gpu/drm/amd/display/dc/opp/Makefile b/drivers/gpu/drm/amd/display/dc/opp/Makefile index fbfb3c3ad819..1be76754db30 100644 --- a/drivers/gpu/drm/amd/display/dc/opp/Makefile +++ b/drivers/gpu/drm/amd/display/dc/opp/Makefile @@ -25,6 +25,22 @@ ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### +# DCN10 +############################################################################### +OPP_DCN10 = dcn10_opp.o + +AMD_DAL_OPP_DCN10 = $(addprefix $(AMDDALPATH)/dc/opp/dcn10/,$(OPP_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPP_DCN10) +############################################################################### +# DCN20 +############################################################################### +OPP_DCN20 = dcn20_opp.o + +AMD_DAL_OPP_DCN20 = $(addprefix $(AMDDALPATH)/dc/opp/dcn20/,$(OPP_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPP_DCN20) +############################################################################### # DCN35 ############################################################################### OPP_DCN35 = dcn35_opp.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c rename to drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h rename to drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c rename to drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h rename to drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c index 94427875bcdd..097d06023e64 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c @@ -65,7 +65,8 @@ void optc1_program_global_sync( int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width) + int vupdate_width, + int pstate_keepout) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -73,6 +74,7 @@ void optc1_program_global_sync( optc1->vstartup_start = vstartup_start; optc1->vupdate_offset = vupdate_offset; optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; if (optc1->vstartup_start == 0) { BREAK_TO_DEBUGGER(); @@ -146,6 +148,7 @@ void optc1_setup_vertical_interrupt2( * @vstartup_start: Vstartup period. * @vupdate_offset: Vupdate starting position. * @vupdate_width: Vupdate duration. + * @pstate_keepout: determines low power mode timing during refresh * @signal: DC signal types. * @use_vbios: to program timings from BIOS command table. * @@ -157,6 +160,7 @@ void optc1_program_timing( int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { @@ -177,6 +181,7 @@ void optc1_program_timing( optc1->vstartup_start = vstartup_start; optc1->vupdate_offset = vupdate_offset; optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; patched_crtc_timing = *dc_crtc_timing; apply_front_porch_workaround(&patched_crtc_timing); optc1->orginal_patched_timing = patched_crtc_timing; @@ -282,7 +287,8 @@ void optc1_program_timing( vready_offset, vstartup_start, vupdate_offset, - vupdate_width); + vupdate_width, + pstate_keepout); optc->funcs->set_vtg_params(optc, dc_crtc_timing, true); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h index 369a13244e5e..b7a57f98553d 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h @@ -201,6 +201,7 @@ struct dcn_optc_registers { uint32_t OTG_CRC1_WINDOWB_Y_CONTROL_READBACK; uint32_t OPTC_CLOCK_CONTROL; uint32_t OPTC_WIDTH_CONTROL2; + uint32_t OTG_PSTATE_REGISTER; }; #define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\ @@ -590,7 +591,11 @@ struct dcn_optc_registers { type OTG_V_COUNT_STOP_TIMER; #define TG_REG_FIELD_LIST_DCN401(type) \ - type OPTC_SEGMENT_WIDTH_LAST; + type OPTC_SEGMENT_WIDTH_LAST;\ + type OTG_PSTATE_KEEPOUT_START;\ + type OTG_PSTATE_EXTEND;\ + type OTG_UNBLANK;\ + type OTG_PSTATE_ALLOW_WIDTH_MIN; struct dcn_optc_shift { diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c index 6bbbf313b2bb..4b6446ed4ce4 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c @@ -149,7 +149,9 @@ static bool optc31_disable_crtc(struct timing_generator *optc) return true; } - +/* + * Immediate_Disable_Crtc - this is to temp disable Timing generator without reset ODM. + */ bool optc31_immediate_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -162,10 +164,12 @@ bool optc31_immediate_disable_crtc(struct timing_generator *optc) VTG0_ENABLE, 0); /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, + if (optc->ctx->dce_environment != DCE_ENV_DIAG) + REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); + /* clear the false state */ optc1_clear_optc_underflow(optc); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c index 9f5c2efa7560..a5d6a7dca554 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c @@ -396,13 +396,47 @@ void optc401_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, i } } +static void optc401_program_global_sync( + struct timing_generator *optc, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + int pstate_keepout) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + optc1->vready_offset = vready_offset; + optc1->vstartup_start = vstartup_start; + optc1->vupdate_offset = vupdate_offset; + optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; + + if (optc1->vstartup_start == 0) { + BREAK_TO_DEBUGGER(); + return; + } + + REG_SET(OTG_VSTARTUP_PARAM, 0, + VSTARTUP_START, optc1->vstartup_start); + + REG_SET_2(OTG_VUPDATE_PARAM, 0, + VUPDATE_OFFSET, optc1->vupdate_offset, + VUPDATE_WIDTH, optc1->vupdate_width); + + REG_SET(OTG_VREADY_PARAM, 0, + VREADY_OFFSET, optc1->vready_offset); + + REG_UPDATE(OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, pstate_keepout); +} + static struct timing_generator_funcs dcn401_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, + .program_global_sync = optc401_program_global_sync, .enable_crtc = optc401_enable_crtc, .disable_crtc = optc401_disable_crtc, .phantom_crtc_post_enable = optc401_phantom_crtc_post_enable, diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h index 3114ecef332a..bb13a645802d 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h @@ -155,7 +155,11 @@ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\ SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_EXTEND, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_UNBLANK, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_ALLOW_WIDTH_MIN, mask_sh) void dcn401_timing_generator_init(struct optc *optc1); diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile index 4860bb2531a1..09320344d8e9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/Makefile +++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile @@ -198,8 +198,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN351) ############################################################################### -############################################################################### - RESOURCE_DCN401 = dcn401_resource.o AMD_DAL_RESOURCE_DCN401 = $(addprefix $(AMDDALPATH)/dc/resource/dcn401/,$(RESOURCE_DCN401)) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index fe518fd27b08..91da5cf85b69 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c @@ -1163,6 +1163,7 @@ static struct pipe_ctx *dce110_acquire_underlay( 0, 0, 0, + 0, pipe_ctx->stream->signal, false); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 88afb2a30eef..162856c523e4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -1067,7 +1067,10 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) struct dm_pp_clock_levels clks = {0}; int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; - if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm) + if (!dc->bw_vbios) + return; + + if (dc->bw_vbios->memory_type == bw_def_hbm) memory_type_multiplier = MEMORY_TYPE_HBM; /*do system clock TODO PPLIB: after PPLIB implement, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 5e7cfa8e8ec9..eea2b3b307cd 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -2040,6 +2040,7 @@ bool dcn20_fast_validate_bw( { bool out = false; int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; int pipe_cnt, i, pipe_idx, vlevel; ASSERT(pipes); @@ -2064,7 +2065,7 @@ bool dcn20_fast_validate_bw( if (vlevel > context->bw_ctx.dml.soc.num_states) goto validate_fail; - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); /*initialize pipe_just_split_from to invalid idx*/ for (i = 0; i < MAX_PIPES; i++) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 131d98025bd4..fc54483b9104 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -1007,8 +1007,10 @@ static struct pipe_ctx *dcn201_acquire_free_pipe_for_layer( struct pipe_ctx *head_pipe = resource_get_otg_master_for_stream(res_ctx, opp_head_pipe->stream); struct pipe_ctx *idle_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, head_pipe); - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } if (!idle_pipe) return NULL; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 8663cbc3d1cf..347e6aaea582 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -774,6 +774,7 @@ bool dcn21_fast_validate_bw(struct dc *dc, { bool out = false; int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; int pipe_cnt, i, pipe_idx, vlevel; ASSERT(pipes); @@ -816,7 +817,7 @@ bool dcn21_fast_validate_bw(struct dc *dc, goto validate_fail; } - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 5d1801dce273..ac8cb20e2e3b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1948,6 +1948,7 @@ static bool dcn31_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 969658313fd6..a124ad9bd108 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1651,6 +1651,9 @@ static void dcn32_enable_phantom_plane(struct dc *dc, else phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state); + if (!phantom_plane) + continue; + memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, sizeof(phantom_plane->scaling_quality)); @@ -1717,6 +1720,9 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, // be a valid candidate for SubVP (i.e. has a plane, stream, doesn't // already have phantom pipe assigned, etc.) by previous checks. phantom_stream = dcn32_enable_phantom_stream(dc, context, pipes, pipe_cnt, index); + if (!phantom_stream) + return; + dcn32_enable_phantom_plane(dc, context, phantom_stream, index); for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -2220,6 +2226,7 @@ static bool dcn32_resource_construct( dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { @@ -2671,8 +2678,10 @@ static struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx; int head_index; - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } /* * Modified from dcn20_acquire_idle_pipe_for_layer diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index fee67fbab8e2..7901792afb7b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -505,6 +505,8 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SRI_ARR(CM_POST_CSC_B_C11_C12, CM, id), \ SRI_ARR(CM_POST_CSC_B_C33_C34, CM, id), \ SRI_ARR(CM_MEM_PWR_CTRL, CM, id), SRI_ARR(CM_CONTROL, CM, id), \ + SRI_ARR(CM_TEST_DEBUG_INDEX, CM, id), \ + SRI_ARR(CM_TEST_DEBUG_DATA, CM, id), \ SRI_ARR(FORMAT_CONTROL, CNVC_CFG, id), \ SRI_ARR(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \ SRI_ARR(CURSOR0_CONTROL, CNVC_CUR, id), \ @@ -761,6 +763,7 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SRI_ARR(DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id), \ SRI_ARR(DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id), \ SRI_ARR(DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCC_TEST_DEBUG_BUS_ROTATE, DSCC, id), \ SRI_ARR(DSCCIF_CONFIG0, DSCCIF, id), \ SRI_ARR(DSCCIF_CONFIG1, DSCCIF, id), \ SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) @@ -1185,6 +1188,8 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL), \ SR(DCHUBBUB_ARB_DRAM_STATE_CNTL), SR(DCHUBBUB_ARB_SAT_LEVEL), \ SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND), SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_TEST_DEBUG_INDEX), \ + SR(DCHUBBUB_TEST_DEBUG_DATA), \ SR(DCHUBBUB_SOFT_RESET), SR(DCHUBBUB_CRC_CTRL), \ SR(DCN_VM_FB_LOCATION_BASE), SR(DCN_VM_FB_LOCATION_TOP), \ SR(DCN_VM_FB_OFFSET), SR(DCN_VM_AGP_BOT), SR(DCN_VM_AGP_TOP), \ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c index d184105ce2b3..f5a4e97c40ce 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c @@ -218,12 +218,12 @@ bool dcn32_is_center_timing(struct pipe_ctx *pipe) pipe->stream->timing.v_addressable != pipe->stream->src.height) { is_center_timing = true; } - } - if (pipe->plane_state) { - if (pipe->stream->timing.v_addressable != pipe->plane_state->dst_rect.height && - pipe->stream->timing.v_addressable != pipe->plane_state->src_rect.height) { - is_center_timing = true; + if (pipe->plane_state) { + if (pipe->stream->timing.v_addressable != pipe->plane_state->dst_rect.height && + pipe->stream->timing.v_addressable != pipe->plane_state->src_rect.height) { + is_center_timing = true; + } } } @@ -663,7 +663,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context) subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + pipe->stream->timing.v_total * (unsigned long long)pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } @@ -724,7 +724,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + pipe->stream->timing.v_total * (unsigned long long)pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 8e0588b1cf30..827a94f84f10 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1783,6 +1783,7 @@ static bool dcn321_resource_construct( dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index ddf251901fb3..46ad684fe192 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -786,6 +786,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmub_reallow_idle = false, .static_screen_wait_frames = 2, .disable_timeout = true, + .min_disp_clk_khz = 50000, }; static const struct dc_panel_config panel_config_defaults = { @@ -1899,6 +1900,7 @@ static bool dcn35_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 34b02147881d..02e63b95c36d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -1188,7 +1188,7 @@ static struct stream_encoder *dcn401_stream_encoder_create( vpg = dcn401_vpg_create(ctx, vpg_inst); afmt = dcn401_afmt_create(ctx, afmt_inst); - if (!enc1 || !vpg || !afmt) { + if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) { kfree(enc1); kfree(vpg); kfree(afmt); @@ -1822,6 +1822,7 @@ static bool dcn401_resource_construct( dc->caps.edp_dsc_support = true; dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; + dc->caps.max_v_total = (1 << 15) - 1; if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) dc->caps.dcc_plane_width_limit = 7680; @@ -2099,6 +2100,7 @@ static bool dcn401_resource_construct( dc->dml2_options.use_native_soc_bb_construction = true; dc->dml2_options.minimize_dispclk_using_odm = true; dc->dml2_options.map_dc_pipes_with_callbacks = true; + dc->dml2_options.force_tdlut_enable = true; resource_init_common_dml2_callbacks(dc, &dc->dml2_options); dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index bb46f30d11d0..514d1ce20df9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -536,7 +536,8 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \ SRI_ARR(OPTC_WIDTH_CONTROL2, ODM, inst), \ SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \ - SRI_ARR(OTG_DRR_CONTROL, OTG, inst) + SRI_ARR(OTG_DRR_CONTROL, OTG, inst), \ + SRI_ARR(OTG_PSTATE_REGISTER, OTG, inst) /* HUBBUB */ #define HUBBUB_REG_LIST_DCN4_01_RI(id) \ diff --git a/drivers/gpu/drm/amd/display/dc/spl/Makefile b/drivers/gpu/drm/amd/display/dc/spl/Makefile index 89cad60b1a10..f8df85ea4d32 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/Makefile +++ b/drivers/gpu/drm/amd/display/dc/spl/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'spl' sub-component of DAL. # It provides the scaling library interface. -SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_isharp_filters.o +SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_filters_old.o dc_spl_isharp_filters.o AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/spl/,$(SPL)) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index e3e20cd86af6..9eccdb38bed4 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -910,6 +910,7 @@ static const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_3 return NULL; else { /* should never happen, bug */ + BREAK_TO_DEBUGGER(); return NULL; } } @@ -925,13 +926,14 @@ static void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( data->taps.v_taps_c, data->ratios.vert_c); } - +#ifdef CONFIG_DRM_AMD_DC_FP static const uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) { if ((taps == 3) || (taps == 4) || (taps == 6)) return spl_get_filter_isharp_bs_4tap_64p(); else { /* should never happen, bug */ + BREAK_TO_DEBUGGER(); return NULL; } } @@ -943,7 +945,7 @@ static void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->filter_blur_scale_v = spl_dscl_get_blur_scale_coeffs_64p( data->taps.v_taps); } - +#endif /* Populate dscl prog data structure from scaler data calculated by SPL */ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out) { @@ -1321,7 +1323,7 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, break; case SHARPNESS_HIGH: dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_2p0x(); - break; + break; default: BREAK_TO_DEBUGGER(); } @@ -1344,7 +1346,9 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, } // Set the values as per lookup table +#ifdef CONFIG_DRM_AMD_DC_FP spl_set_blur_scale_data(dscl_prog_data, data); +#endif } static bool spl_get_isharp_en(struct adaptive_sharpness adp_sharpness, int vscale_ratio, int hscale_ratio, struct spl_taps taps, @@ -1407,6 +1411,8 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool enable_easf_v = false; bool enable_easf_h = false; bool lls_enable_easf = true; + int vratio = 0; + int hratio = 0; const struct spl_scaler_data *data = &spl_out->scl_data; // All SPL calls /* recout calculation */ @@ -1446,8 +1452,8 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) // Save all calculated parameters in dscl_prog_data structure to program hw registers spl_set_dscl_prog_data(spl_in, spl_out); - int vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - int hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); if (!lls_enable_easf || spl_in->disable_easf) { enable_easf_v = false; enable_easf_h = false; diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h index f1fd3eb92f8a..205e59a2a8ee 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h @@ -9,16 +9,8 @@ #define BLACK_OFFSET_RGB_Y 0x0 #define BLACK_OFFSET_CBCR 0x8000 -#ifdef __cplusplus -extern "C" { -#endif - /* SPL interfaces */ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out); -#ifdef __cplusplus -} -#endif - #endif /* __DC_SPL_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index 8bc838c7c3c5..a5d9a6223d06 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "dc_spl_types.h" #include "dc_spl_isharp_filters.h" //======================================== diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c index c174b2e8a150..e2baaf584139 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "dc_spl_types.h" #include "dc_spl_scl_filters.h" //========================================= // = 2 diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c new file mode 100644 index 000000000000..bb0e1b80ec3c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c @@ -0,0 +1,25 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 201201d3f55b..36d10b0f2eed 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -2,31 +2,14 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "os_types.h" -#include "dc_hw_types.h" +#include "os_types.h" // swap #ifndef ASSERT -#define ASSERT(_bool) (void *)0 +#define ASSERT(_bool) ((void *)0) #endif #include "include/fixed31_32.h" // fixed31_32 and related functions #ifndef __DC_SPL_TYPES_H__ #define __DC_SPL_TYPES_H__ -enum lb_memory_config { - /* Enable all 3 pieces of memory */ - LB_MEMORY_CONFIG_0 = 0, - - /* Enable only the first piece of memory */ - LB_MEMORY_CONFIG_1 = 1, - - /* Enable only the second piece of memory */ - LB_MEMORY_CONFIG_2 = 2, - - /* Only applicable in 4:2:0 mode, enable all 3 pieces of memory and the - * last piece of chroma memory used for the luma storage - */ - LB_MEMORY_CONFIG_3 = 3 -}; - struct spl_size { uint32_t width; uint32_t height; @@ -88,6 +71,22 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_UNKNOWN }; +enum lb_memory_config { + /* Enable all 3 pieces of memory */ + LB_MEMORY_CONFIG_0 = 0, + + /* Enable only the first piece of memory */ + LB_MEMORY_CONFIG_1 = 1, + + /* Enable only the second piece of memory */ + LB_MEMORY_CONFIG_2 = 2, + + /* Only applicable in 4:2:0 mode, enable all 3 pieces of memory and the + * last piece of chroma memory used for the luma storage + */ + LB_MEMORY_CONFIG_3 = 3 +}; + /* Rotation angle */ enum spl_rotation_angle { SPL_ROTATION_ANGLE_0 = 0, @@ -120,6 +119,13 @@ enum spl_color_space { SPL_COLOR_SPACE_YCBCR709_BLACK, }; +enum chroma_cositing { + CHROMA_COSITING_NONE, + CHROMA_COSITING_LEFT, + CHROMA_COSITING_TOPLEFT, + CHROMA_COSITING_COUNT +}; + // Scratch space for calculating scaler params struct spl_scaler_data { int h_active; diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h new file mode 100644 index 000000000000..7ebea91c84f6 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h @@ -0,0 +1,77 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * Copyright 2019 Raptor Engineering, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _SPL_OS_TYPES_H_ +#define _SPL_OS_TYPES_H_ + +#include +#include +#include +#include +#include +#include + +/* + * + * general debug capabilities + * + */ +// TODO: need backport +#define SPL_BREAK_TO_DEBUGGER() ASSERT(0) + +static inline uint64_t spl_div_u64_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder) +{ + return div_u64_rem(dividend, divisor, remainder); +} + +static inline uint64_t spl_div_u64(uint64_t dividend, uint32_t divisor) +{ + return div_u64(dividend, divisor); +} + +static inline uint64_t spl_div64_u64(uint64_t dividend, uint64_t divisor) +{ + return div64_u64(dividend, divisor); +} + +static inline uint64_t spl_div64_u64_rem(uint64_t dividend, uint64_t divisor, uint64_t *remainder) +{ + return div64_u64_rem(dividend, divisor, remainder); +} + +static inline int64_t spl_div64_s64(int64_t dividend, int64_t divisor) +{ + return div64_s64(dividend, divisor); +} + +#define spl_swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#ifndef spl_min +#define spl_min(a, b) (((a) < (b)) ? (a):(b)) +#endif + +#endif /* _SPL_OS_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 6589bb9aea6b..cd70453aeae0 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -330,6 +330,9 @@ struct dmub_diagnostic_data { uint32_t inbox0_rptr; uint32_t inbox0_wptr; uint32_t inbox0_size; + uint32_t outbox1_rptr; + uint32_t outbox1_wptr; + uint32_t outbox1_size; uint32_t gpint_datain0; struct dmub_srv_debug timeout_info; uint8_t is_dmcub_enabled : 1; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 5ff0a865705f..c5f99cbff0b6 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -336,6 +336,10 @@ union dmub_psr_debug_flags { */ uint32_t back_to_back_flip : 1; + /** + * Enable visual confirm for IPS + */ + uint32_t enable_ips_visual_confirm : 1; } bitfields; /** @@ -3024,14 +3028,6 @@ struct dmub_cmd_update_dirty_rect_data { * Currently the support is only for 0 or 1 */ uint8_t panel_inst; - /** - * 16-bit value dicated by driver that indicates the coasting vtotal high byte part. - */ - uint16_t coasting_vtotal_high; - /** - * Explicit padding to 4 byte boundary. - */ - uint8_t pad[2]; }; /** diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 662c34e9495c..d9f31b191c69 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -449,6 +449,10 @@ void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c index e1da270502cc..9600b7f858b0 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c @@ -459,6 +459,10 @@ void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 916ed022e96b..746696b6f09a 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -502,6 +502,10 @@ void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index cf139e9cc20e..39a8cb6d7523 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -444,6 +444,10 @@ void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnost diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index d4cf7ead1d87..990fa1f19c22 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -531,4 +531,10 @@ static inline struct fixed31_32 dc_fixpt_truncate(struct fixed31_32 arg, unsigne return arg; } +struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits); +struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits); + #endif diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index 83479951732a..a48d564d1660 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -61,6 +61,7 @@ #define DC_LOG_ALL_TF_CHANNELS(...) pr_debug("[GAMMA]:"__VA_ARGS__) #define DC_LOG_DSC(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_SMU(...) pr_debug("[SMU_MSG]:"__VA_ARGS__) +#define DC_LOG_MALL(...) pr_debug("[MALL]:"__VA_ARGS__) #define DC_LOG_DWB(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_DP2(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_AUTO_DPM_TEST(...) pr_debug("[AutoDPMTest]: "__VA_ARGS__) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 1e495e884484..8bc377560787 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -432,18 +432,18 @@ static enum mod_hdcp_status authenticated_dp(struct mod_hdcp *hdcp, goto out; } - if (!mod_hdcp_execute_and_set(mod_hdcp_read_bstatus, + mod_hdcp_execute_and_set(mod_hdcp_read_bstatus, &input->bstatus_read, &status, - hdcp, "bstatus_read")) - goto out; - if (!mod_hdcp_execute_and_set(check_link_integrity_dp, + hdcp, "bstatus_read"); + + mod_hdcp_execute_and_set(check_link_integrity_dp, &input->link_integrity_check, &status, - hdcp, "link_integrity_check")) - goto out; - if (!mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, + hdcp, "link_integrity_check"); + + mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, &input->reauth_request_check, &status, - hdcp, "reauth_request_check")) - goto out; + hdcp, "reauth_request_check"); + out: return status; } diff --git a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h index 8ee3149df5b7..2ef1273e65ab 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h @@ -340,8 +340,6 @@ #define UVD_LMI_CTRL__REQ_MODE_MASK 0x00000200L #define UVD_LMI_CTRL__REQ_MODE__SHIFT 0x00000009 #define UVD_LMI_CTRL__RFU_MASK 0xf8000000L -#define UVD_LMI_CTRL__RFU_MASK 0xfc000000L -#define UVD_LMI_CTRL__RFU__SHIFT 0x0000001a #define UVD_LMI_CTRL__RFU__SHIFT 0x0000001b #define UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK 0x00200000L #define UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN__SHIFT 0x00000015 diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 46bf19c9c5c4..710e328fad48 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -258,6 +258,48 @@ struct gc_info_v1_2 { uint32_t gc_gl2c_per_gpu; }; +struct gc_info_v1_3 { + struct gpu_info_header header; + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; + uint32_t gc_num_tcp_per_wpg; + uint32_t gc_tcp_l1_size; + uint32_t gc_num_sqc_per_wgp; + uint32_t gc_l1_instruction_cache_size_per_sqc; + uint32_t gc_l1_data_cache_size_per_sqc; + uint32_t gc_gl1c_per_sa; + uint32_t gc_gl1c_size_per_instance; + uint32_t gc_gl2c_per_gpu; + uint32_t gc_tcp_size_per_cu; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_size; + uint32_t gc_tcc_cache_line_size; +}; + struct gc_info_v2_0 { struct gpu_info_header header; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 6d094cf3587d..7744ca3ef4b1 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -318,6 +318,12 @@ struct kfd2kgd_calls { void (*program_trap_handler_settings)(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); + uint64_t (*hqd_get_pq_addr)(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst); + uint64_t (*hqd_reset)(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout); }; #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 4b20e2274313..19a48d98830a 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -218,6 +218,7 @@ enum pp_mp1_state { PP_MP1_STATE_SHUTDOWN, PP_MP1_STATE_UNLOAD, PP_MP1_STATE_RESET, + PP_MP1_STATE_FLR, }; enum pp_df_cstate { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 8b7d6ed7e2ed..9dc82f4d7c93 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -168,7 +168,11 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, int ret = 0; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - if (pp_funcs && pp_funcs->set_mp1_state) { + if (mp1_state == PP_MP1_STATE_FLR) { + /* VF lost access to SMU */ + if (amdgpu_sriov_vf(adev)) + adev->pm.dpm_enabled = false; + } else if (pp_funcs && pp_funcs->set_mp1_state) { mutex_lock(&adev->pm.mutex); ret = pp_funcs->set_mp1_state( diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c index ca1c7ae8d146..f06b29e33ba4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c @@ -1183,6 +1183,8 @@ static int init_overdrive_limits(struct pp_hwmgr *hwmgr, fw_info = smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, FirmwareInfo), &size, &frev, &crev); + PP_ASSERT_WITH_CODE(fw_info != NULL, + "Missing firmware info!", return -EINVAL); if ((fw_info->ucTableFormatRevision == 1) && (le16_to_cpu(fw_info->usStructureSize) >= sizeof(ATOM_FIRMWARE_INFO_V1_4))) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 6e717ddbb029..9ace863792d4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -2934,9 +2934,7 @@ static int vega10_stop_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap) } } - vega10_enable_smc_features(hwmgr, false, feature_mask); - - return 0; + return vega10_enable_smc_features(hwmgr, false, feature_mask); } /** diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h index de2e442281ff..87ca5ceb1ece 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h @@ -92,7 +92,6 @@ //Resets #define PPSMC_MSG_PrepareMp1ForUnload 0x2E -#define PPSMC_MSG_Mode1Reset 0x2F //Set SystemVirtual DramAddrHigh #define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x30 @@ -119,11 +118,12 @@ //STB to dram log #define PPSMC_MSG_DumpSTBtoDram 0x3D -#define PPSMC_MSG_STBtoDramLogSetDramAddrHigh 0x3E -#define PPSMC_MSG_STBtoDramLogSetDramAddrLow 0x3F +#define PPSMC_MSG_STBtoDramLogSetDramAddress 0x3E +#define PPSMC_MSG_DummyUndefined 0x3F #define PPSMC_MSG_STBtoDramLogSetDramSize 0x40 #define PPSMC_MSG_SetOBMTraceBufferLogging 0x41 +#define PPSMC_MSG_UseProfilingMode 0x42 #define PPSMC_MSG_AllowGfxDcs 0x43 #define PPSMC_MSG_DisallowGfxDcs 0x44 #define PPSMC_MSG_EnableAudioStutterWA 0x45 @@ -135,6 +135,16 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4B #define PPSMC_MSG_SetPriorityDeltaGain 0x4C #define PPSMC_MSG_AllowIHHostInterrupt 0x4D +#define PPSMC_MSG_EnableShadowDpm 0x4E #define PPSMC_MSG_Mode3Reset 0x4F -#define PPSMC_Message_Count 0x50 +#define PPSMC_MSG_SetDriverDramAddr 0x50 +#define PPSMC_MSG_SetToolsDramAddr 0x51 +#define PPSMC_MSG_TransferTableSmu2DramWithAddr 0x52 +#define PPSMC_MSG_TransferTableDram2SmuWithAddr 0x53 +#define PPSMC_MSG_GetAllRunningSmuFeatures 0x54 +#define PPSMC_MSG_GetSvi3Voltage 0x55 +#define PPSMC_MSG_UpdatePolicy 0x56 +#define PPSMC_MSG_ExtPwrConnSupport 0x57 +#define PPSMC_MSG_PreloadSwPstateForUclkOverDrive 0x58 +#define PPSMC_Message_Count 0x59 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 076620fa3ef5..16af1a329621 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1989,7 +1989,7 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, - "MEMLK", + "MEMCLK", activity_monitor.Mem_FPS, activity_monitor.Mem_MinFreqStep, activity_monitor.Mem_MinActiveFreqType, @@ -2051,7 +2051,7 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u activity_monitor.Soc_PD_Data_error_coeff = input[8]; activity_monitor.Soc_PD_Data_error_rate_coeff = input[9]; break; - case 2: /* Memlk */ + case 2: /* Memclk */ activity_monitor.Mem_FPS = input[1]; activity_monitor.Mem_MinFreqStep = input[2]; activity_monitor.Mem_MinActiveFreqType = input[3]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 0d3e1a121b67..9c3c48297cba 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1691,7 +1691,7 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, - "MEMLK", + "MEMCLK", activity_monitor->Mem_FPS, activity_monitor->Mem_MinFreqStep, activity_monitor->Mem_MinActiveFreqType, @@ -1756,7 +1756,7 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * activity_monitor->Fclk_PD_Data_error_coeff = input[8]; activity_monitor->Fclk_PD_Data_error_rate_coeff = input[9]; break; - case 2: /* Memlk */ + case 2: /* Memclk */ activity_monitor->Mem_FPS = input[1]; activity_monitor->Mem_MinFreqStep = input[2]; activity_monitor->Mem_MinActiveFreqType = input[3]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 78c3f94bb3ff..9974c9f8135e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -121,6 +121,7 @@ struct mca_ras_info { #define P2S_TABLE_ID_A 0x50325341 #define P2S_TABLE_ID_X 0x50325358 +#define P2S_TABLE_ID_3 0x50325303 // clang-format off static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { @@ -271,14 +272,18 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t p2s_table_id = P2S_TABLE_ID_A; int ret = 0, i, p2stable_count; + int var = (adev->pdev->device & 0xF); char ucode_prefix[15]; /* No need to load P2S tables in IOV mode */ if (amdgpu_sriov_vf(adev)) return 0; - if (!(adev->flags & AMD_IS_APU)) + if (!(adev->flags & AMD_IS_APU)) { p2s_table_id = P2S_TABLE_ID_X; + if (var == 0x5) + p2s_table_id = P2S_TABLE_ID_3; + } amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 09973615f210..865e916fc425 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -452,17 +452,26 @@ int smu_v14_0_init_smc_tables(struct smu_context *smu) ret = -ENOMEM; goto err3_out; } + + smu_table->user_overdrive_table = + kzalloc(tables[SMU_TABLE_OVERDRIVE].size, GFP_KERNEL); + if (!smu_table->user_overdrive_table) { + ret = -ENOMEM; + goto err4_out; + } } smu_table->combo_pptable = kzalloc(tables[SMU_TABLE_COMBO_PPTABLE].size, GFP_KERNEL); if (!smu_table->combo_pptable) { ret = -ENOMEM; - goto err4_out; + goto err5_out; } return 0; +err5_out: + kfree(smu_table->user_overdrive_table); err4_out: kfree(smu_table->boot_overdrive_table); err3_out: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index e1a27903c80a..a31fae5feedf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -68,6 +68,18 @@ #define DEBUGSMC_MSG_Mode1Reset 2 #define LINK_SPEED_MAX 3 +#define PP_OD_FEATURE_GFXCLK_FMIN 0 +#define PP_OD_FEATURE_GFXCLK_FMAX 1 +#define PP_OD_FEATURE_UCLK_FMIN 2 +#define PP_OD_FEATURE_UCLK_FMAX 3 +#define PP_OD_FEATURE_GFX_VF_CURVE 4 +#define PP_OD_FEATURE_FAN_CURVE_TEMP 5 +#define PP_OD_FEATURE_FAN_CURVE_PWM 6 +#define PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT 7 +#define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 +#define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 +#define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 + static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -115,7 +127,6 @@ static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0), - MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), @@ -204,6 +215,7 @@ static struct cmn2asic_mapping smu_v14_0_2_table_map[SMU_TABLE_COUNT] = { [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, TAB_MAP(I2C_COMMANDS), TAB_MAP(ECCINFO), + TAB_MAP(OVERDRIVE), }; static struct cmn2asic_mapping smu_v14_0_2_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -1029,16 +1041,97 @@ static int smu_v14_0_2_get_current_clk_freq_by_table(struct smu_context *smu, value); } +static bool smu_v14_0_2_is_od_feature_supported(struct smu_context *smu, + int od_feature_bit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + const OverDriveLimits_t * const overdrive_upperlimits = + &pptable->SkuTable.OverDriveLimitsBasicMax; + + return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); +} + +static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, + int32_t *max) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + const OverDriveLimits_t * const overdrive_upperlimits = + &pptable->SkuTable.OverDriveLimitsBasicMax; + const OverDriveLimits_t * const overdrive_lowerlimits = + &pptable->SkuTable.OverDriveLimitsBasicMin; + int32_t od_min_setting, od_max_setting; + + switch (od_feature_bit) { + case PP_OD_FEATURE_GFXCLK_FMIN: + od_min_setting = overdrive_lowerlimits->GfxclkFmin; + od_max_setting = overdrive_upperlimits->GfxclkFmin; + break; + case PP_OD_FEATURE_GFXCLK_FMAX: + od_min_setting = overdrive_lowerlimits->GfxclkFmax; + od_max_setting = overdrive_upperlimits->GfxclkFmax; + break; + case PP_OD_FEATURE_UCLK_FMIN: + od_min_setting = overdrive_lowerlimits->UclkFmin; + od_max_setting = overdrive_upperlimits->UclkFmin; + break; + case PP_OD_FEATURE_UCLK_FMAX: + od_min_setting = overdrive_lowerlimits->UclkFmax; + od_max_setting = overdrive_upperlimits->UclkFmax; + break; + case PP_OD_FEATURE_GFX_VF_CURVE: + od_min_setting = overdrive_lowerlimits->VoltageOffsetPerZoneBoundary[0]; + od_max_setting = overdrive_upperlimits->VoltageOffsetPerZoneBoundary[0]; + break; + case PP_OD_FEATURE_FAN_CURVE_TEMP: + od_min_setting = overdrive_lowerlimits->FanLinearTempPoints[0]; + od_max_setting = overdrive_upperlimits->FanLinearTempPoints[0]; + break; + case PP_OD_FEATURE_FAN_CURVE_PWM: + od_min_setting = overdrive_lowerlimits->FanLinearPwmPoints[0]; + od_max_setting = overdrive_upperlimits->FanLinearPwmPoints[0]; + break; + case PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT: + od_min_setting = overdrive_lowerlimits->AcousticLimitRpmThreshold; + od_max_setting = overdrive_upperlimits->AcousticLimitRpmThreshold; + break; + case PP_OD_FEATURE_FAN_ACOUSTIC_TARGET: + od_min_setting = overdrive_lowerlimits->AcousticTargetRpmThreshold; + od_max_setting = overdrive_upperlimits->AcousticTargetRpmThreshold; + break; + case PP_OD_FEATURE_FAN_TARGET_TEMPERATURE: + od_min_setting = overdrive_lowerlimits->FanTargetTemperature; + od_max_setting = overdrive_upperlimits->FanTargetTemperature; + break; + case PP_OD_FEATURE_FAN_MINIMUM_PWM: + od_min_setting = overdrive_lowerlimits->FanMinimumPwm; + od_max_setting = overdrive_upperlimits->FanMinimumPwm; + break; + default: + od_min_setting = od_max_setting = INT_MAX; + break; + } + + if (min) + *min = od_min_setting; + if (max) + *max = od_max_setting; +} + static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { struct smu_dpm_context *smu_dpm = &smu->smu_dpm; struct smu_14_0_dpm_context *dpm_context = smu_dpm->dpm_context; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)smu->smu_table.overdrive_table; struct smu_14_0_dpm_table *single_dpm_table; struct smu_14_0_pcie_table *pcie_table; uint32_t gen_speed, lane_width; int i, curr_freq, size = 0; + int32_t min_value, max_value; int ret = 0; smu_cmn_get_sysfs_buf(&buf, &size); @@ -1159,6 +1252,183 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, "*" : ""); break; + case SMU_OD_SCLK: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_GFXCLK_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_SCLK:\n"); + size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", + od_table->OverDriveTable.GfxclkFmin, + od_table->OverDriveTable.GfxclkFmax); + break; + + case SMU_OD_MCLK: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_UCLK_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_MCLK:\n"); + size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMHz\n", + od_table->OverDriveTable.UclkFmin, + od_table->OverDriveTable.UclkFmax); + break; + + case SMU_OD_VDDGFX_OFFSET: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_GFX_VF_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_VDDGFX_OFFSET:\n"); + size += sysfs_emit_at(buf, size, "%dmV\n", + od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[0]); + break; + + case SMU_OD_FAN_CURVE: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_FAN_CURVE:\n"); + for (i = 0; i < NUM_OD_FAN_MAX_POINTS - 1; i++) + size += sysfs_emit_at(buf, size, "%d: %dC %d%%\n", + i, + (int)od_table->OverDriveTable.FanLinearTempPoints[i], + (int)od_table->OverDriveTable.FanLinearPwmPoints[i]); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "FAN_CURVE(hotspot temp): %uC %uC\n", + min_value, max_value); + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "FAN_CURVE(fan speed): %u%% %u%%\n", + min_value, max_value); + + break; + + case SMU_OD_ACOUSTIC_LIMIT: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_ACOUSTIC_LIMIT:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.AcousticLimitRpmThreshold); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ACOUSTIC_LIMIT: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_ACOUSTIC_TARGET: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_ACOUSTIC_TARGET:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.AcousticTargetRpmThreshold); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_TARGET, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ACOUSTIC_TARGET: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_FAN_TARGET_TEMPERATURE: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_TARGET_TEMPERATURE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanTargetTemperature); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_TARGET_TEMPERATURE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "TARGET_TEMPERATURE: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_FAN_MINIMUM_PWM: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_MINIMUM_PWM:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanMinimumPwm); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_MINIMUM_PWM, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "MINIMUM_PWM: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_RANGE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && + !smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && + !smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMIN, + &min_value, + NULL); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMAX, + NULL, + &max_value); + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMIN, + &min_value, + NULL); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMAX, + NULL, + &max_value); + size += sysfs_emit_at(buf, size, "MCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFX_VF_CURVE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "VDDGFX_OFFSET: %7dmv %10dmv\n", + min_value, max_value); + } + break; + default: break; } @@ -1400,7 +1670,27 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, uint32_t *max_power_limit, uint32_t *min_power_limit) { - // TODO + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + CustomSkuTable_t *skutable = &pptable->CustomSkuTable; + uint32_t power_limit; + uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + + if (smu_v14_0_get_current_power_limit(smu, &power_limit)) + power_limit = smu->adev->pm.ac_power ? + skutable->SocketPowerLimitAc[PPT_THROTTLER_PPT0] : + skutable->SocketPowerLimitDc[PPT_THROTTLER_PPT0]; + + if (current_power_limit) + *current_power_limit = power_limit; + if (default_power_limit) + *default_power_limit = power_limit; + + if (max_power_limit) + *max_power_limit = msg_limit; + + if (min_power_limit) + *min_power_limit = 0; return 0; } @@ -1824,50 +2114,6 @@ static void smu_v14_0_2_set_smu_mailbox_registers(struct smu_context *smu) smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, regMP1_SMN_C2PMSG_54); } -static int smu_v14_0_2_smu_send_bad_mem_page_num(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad page number on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetNumBadMemoryPagesRetired, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages number\n", - __func__); - - return ret; -} - -static int smu_v14_0_2_send_bad_mem_channel_flag(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad channel info on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages channel info\n", - __func__); - - return ret; -} - -static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, - void *table) -{ - int ret = 0; - - // TODO - - return ret; -} - static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -1950,6 +2196,594 @@ static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v1_3); } +static void smu_v14_0_2_dump_od_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + struct amdgpu_device *adev = smu->adev; + + dev_dbg(adev->dev, "OD: Gfxclk: (%d, %d)\n", od_table->OverDriveTable.GfxclkFmin, + od_table->OverDriveTable.GfxclkFmax); + dev_dbg(adev->dev, "OD: Uclk: (%d, %d)\n", od_table->OverDriveTable.UclkFmin, + od_table->OverDriveTable.UclkFmax); +} + +static int smu_v14_0_2_upload_overdrive_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + int ret; + ret = smu_cmn_update_table(smu, + SMU_TABLE_OVERDRIVE, + 0, + (void *)od_table, + true); + if (ret) + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + + return ret; +} + +static void smu_v14_0_2_set_supported_od_feature_mask(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + adev->pm.od_feature_mask |= OD_OPS_SUPPORT_FAN_CURVE_RETRIEVE | + OD_OPS_SUPPORT_FAN_CURVE_SET | + OD_OPS_SUPPORT_ACOUSTIC_LIMIT_THRESHOLD_RETRIEVE | + OD_OPS_SUPPORT_ACOUSTIC_LIMIT_THRESHOLD_SET | + OD_OPS_SUPPORT_ACOUSTIC_TARGET_THRESHOLD_RETRIEVE | + OD_OPS_SUPPORT_ACOUSTIC_TARGET_THRESHOLD_SET | + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; +} + +static int smu_v14_0_2_get_overdrive_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + int ret; + ret = smu_cmn_update_table(smu, + SMU_TABLE_OVERDRIVE, + 0, + (void *)od_table, + false); + if (ret) + dev_err(smu->adev->dev, "Failed to get overdrive table!\n"); + + return ret; +} + +static int smu_v14_0_2_set_default_od_settings(struct smu_context *smu) +{ + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)smu->smu_table.overdrive_table; + OverDriveTableExternal_t *boot_od_table = + (OverDriveTableExternal_t *)smu->smu_table.boot_overdrive_table; + OverDriveTableExternal_t *user_od_table = + (OverDriveTableExternal_t *)smu->smu_table.user_overdrive_table; + OverDriveTableExternal_t user_od_table_bak; + int ret; + int i; + + ret = smu_v14_0_2_get_overdrive_table(smu, boot_od_table); + if (ret) + return ret; + + smu_v14_0_2_dump_od_table(smu, boot_od_table); + + memcpy(od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + + /* + * For S3/S4/Runpm resume, we need to setup those overdrive tables again, + * but we have to preserve user defined values in "user_od_table". + */ + if (!smu->adev->in_suspend) { + memcpy(user_od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + smu->user_dpm_profile.user_od = false; + } else if (smu->user_dpm_profile.user_od) { + memcpy(&user_od_table_bak, + user_od_table, + sizeof(OverDriveTableExternal_t)); + memcpy(user_od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + user_od_table->OverDriveTable.GfxclkFmin = + user_od_table_bak.OverDriveTable.GfxclkFmin; + user_od_table->OverDriveTable.GfxclkFmax = + user_od_table_bak.OverDriveTable.GfxclkFmax; + user_od_table->OverDriveTable.UclkFmin = + user_od_table_bak.OverDriveTable.UclkFmin; + user_od_table->OverDriveTable.UclkFmax = + user_od_table_bak.OverDriveTable.UclkFmax; + for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) + user_od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = + user_od_table_bak.OverDriveTable.VoltageOffsetPerZoneBoundary[i]; + for (i = 0; i < NUM_OD_FAN_MAX_POINTS - 1; i++) { + user_od_table->OverDriveTable.FanLinearTempPoints[i] = + user_od_table_bak.OverDriveTable.FanLinearTempPoints[i]; + user_od_table->OverDriveTable.FanLinearPwmPoints[i] = + user_od_table_bak.OverDriveTable.FanLinearPwmPoints[i]; + } + user_od_table->OverDriveTable.AcousticLimitRpmThreshold = + user_od_table_bak.OverDriveTable.AcousticLimitRpmThreshold; + user_od_table->OverDriveTable.AcousticTargetRpmThreshold = + user_od_table_bak.OverDriveTable.AcousticTargetRpmThreshold; + user_od_table->OverDriveTable.FanTargetTemperature = + user_od_table_bak.OverDriveTable.FanTargetTemperature; + user_od_table->OverDriveTable.FanMinimumPwm = + user_od_table_bak.OverDriveTable.FanMinimumPwm; + } + + smu_v14_0_2_set_supported_od_feature_mask(smu); + + return 0; +} + +static int smu_v14_0_2_restore_user_od_settings(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = table_context->overdrive_table; + OverDriveTableExternal_t *user_od_table = table_context->user_overdrive_table; + int res; + + user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | + BIT(PP_OD_FEATURE_UCLK_BIT) | + BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | + BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + res = smu_v14_0_2_upload_overdrive_table(smu, user_od_table); + user_od_table->OverDriveTable.FeatureCtrlMask = 0; + if (res == 0) + memcpy(od_table, user_od_table, sizeof(OverDriveTableExternal_t)); + + return res; +} + +static int smu_v14_0_2_od_restore_table_single(struct smu_context *smu, long input) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *boot_overdrive_table = + (OverDriveTableExternal_t *)table_context->boot_overdrive_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + struct amdgpu_device *adev = smu->adev; + int i; + + switch (input) { + case PP_OD_EDIT_FAN_CURVE: + for (i = 0; i < NUM_OD_FAN_MAX_POINTS; i++) { + od_table->OverDriveTable.FanLinearTempPoints[i] = + boot_overdrive_table->OverDriveTable.FanLinearTempPoints[i]; + od_table->OverDriveTable.FanLinearPwmPoints[i] = + boot_overdrive_table->OverDriveTable.FanLinearPwmPoints[i]; + } + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_ACOUSTIC_LIMIT: + od_table->OverDriveTable.AcousticLimitRpmThreshold = + boot_overdrive_table->OverDriveTable.AcousticLimitRpmThreshold; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_ACOUSTIC_TARGET: + od_table->OverDriveTable.AcousticTargetRpmThreshold = + boot_overdrive_table->OverDriveTable.AcousticTargetRpmThreshold; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_FAN_TARGET_TEMPERATURE: + od_table->OverDriveTable.FanTargetTemperature = + boot_overdrive_table->OverDriveTable.FanTargetTemperature; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_FAN_MINIMUM_PWM: + od_table->OverDriveTable.FanMinimumPwm = + boot_overdrive_table->OverDriveTable.FanMinimumPwm; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + default: + dev_info(adev->dev, "Invalid table index: %ld\n", input); + return -EINVAL; + } + + return 0; +} + +static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, + enum PP_OD_DPM_TABLE_COMMAND type, + long input[], + uint32_t size) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + struct amdgpu_device *adev = smu->adev; + uint32_t offset_of_voltageoffset; + int32_t minimum, maximum; + uint32_t feature_ctrlmask; + int i, ret = 0; + + switch (type) { + case PP_OD_EDIT_SCLK_VDDC_TABLE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT)) { + dev_warn(adev->dev, "GFXCLK_LIMITS setting not supported!\n"); + return -ENOTSUPP; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + dev_info(adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } + + switch (input[i]) { + case 0: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMIN, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "GfxclkFmin (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.GfxclkFmin = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; + break; + + case 1: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMAX, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "GfxclkFmax (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.GfxclkFmax = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; + break; + + default: + dev_info(adev->dev, "Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); + dev_info(adev->dev, "Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + } + + if (od_table->OverDriveTable.GfxclkFmin > od_table->OverDriveTable.GfxclkFmax) { + dev_err(adev->dev, + "Invalid setting: GfxclkFmin(%u) is bigger than GfxclkFmax(%u)\n", + (uint32_t)od_table->OverDriveTable.GfxclkFmin, + (uint32_t)od_table->OverDriveTable.GfxclkFmax); + return -EINVAL; + } + break; + + case PP_OD_EDIT_MCLK_VDDC_TABLE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT)) { + dev_warn(adev->dev, "UCLK_LIMITS setting not supported!\n"); + return -ENOTSUPP; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + dev_info(adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } + + switch (input[i]) { + case 0: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMIN, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "UclkFmin (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.UclkFmin = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_UCLK_BIT; + break; + + case 1: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMAX, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "UclkFmax (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.UclkFmax = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_UCLK_BIT; + break; + + default: + dev_info(adev->dev, "Invalid MCLK_VDDC_TABLE index: %ld\n", input[i]); + dev_info(adev->dev, "Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + } + + if (od_table->OverDriveTable.UclkFmin > od_table->OverDriveTable.UclkFmax) { + dev_err(adev->dev, + "Invalid setting: UclkFmin(%u) is bigger than UclkFmax(%u)\n", + (uint32_t)od_table->OverDriveTable.UclkFmin, + (uint32_t)od_table->OverDriveTable.UclkFmax); + return -EINVAL; + } + break; + + case PP_OD_EDIT_VDDGFX_OFFSET: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) { + dev_warn(adev->dev, "Gfx offset setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFX_VF_CURVE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "Voltage offset (%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) + od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_CURVE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + if (input[0] >= NUM_OD_FAN_MAX_POINTS - 1 || + input[0] < 0) + return -EINVAL; + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_TEMP, + &minimum, + &maximum); + if (input[1] < minimum || + input[1] > maximum) { + dev_info(adev->dev, "Fan curve temp setting(%ld) must be within [%d, %d]!\n", + input[1], minimum, maximum); + return -EINVAL; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_PWM, + &minimum, + &maximum); + if (input[2] < minimum || + input[2] > maximum) { + dev_info(adev->dev, "Fan curve pwm setting(%ld) must be within [%d, %d]!\n", + input[2], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanLinearTempPoints[input[0]] = input[1]; + od_table->OverDriveTable.FanLinearPwmPoints[input[0]] = input[2]; + od_table->OverDriveTable.FanMode = FAN_MODE_MANUAL_LINEAR; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_ACOUSTIC_LIMIT: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "acoustic limit threshold setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.AcousticLimitRpmThreshold = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_ACOUSTIC_TARGET: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_TARGET, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "acoustic target threshold setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.AcousticTargetRpmThreshold = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_TARGET_TEMPERATURE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_TARGET_TEMPERATURE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "fan target temperature setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanTargetTemperature = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_MINIMUM_PWM: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_MINIMUM_PWM, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "fan minimum pwm setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanMinimumPwm = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_RESTORE_DEFAULT_TABLE: + if (size == 1) { + ret = smu_v14_0_2_od_restore_table_single(smu, input[0]); + if (ret) + return ret; + } else { + feature_ctrlmask = od_table->OverDriveTable.FeatureCtrlMask; + memcpy(od_table, + table_context->boot_overdrive_table, + sizeof(OverDriveTableExternal_t)); + od_table->OverDriveTable.FeatureCtrlMask = feature_ctrlmask; + } + fallthrough; + case PP_OD_COMMIT_DPM_TABLE: + /* + * The member below instructs PMFW the settings focused in + * this single operation. + * `uint32_t FeatureCtrlMask;` + * It does not contain actual informations about user's custom + * settings. Thus we do not cache it. + */ + offset_of_voltageoffset = offsetof(OverDriveTable_t, VoltageOffsetPerZoneBoundary); + if (memcmp((u8 *)od_table + offset_of_voltageoffset, + table_context->user_overdrive_table + offset_of_voltageoffset, + sizeof(OverDriveTableExternal_t) - offset_of_voltageoffset)) { + smu_v14_0_2_dump_od_table(smu, od_table); + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + od_table->OverDriveTable.FeatureCtrlMask = 0; + memcpy(table_context->user_overdrive_table + offset_of_voltageoffset, + (u8 *)od_table + offset_of_voltageoffset, + sizeof(OverDriveTableExternal_t) - offset_of_voltageoffset); + + if (!memcmp(table_context->user_overdrive_table, + table_context->boot_overdrive_table, + sizeof(OverDriveTableExternal_t))) + smu->user_dpm_profile.user_od = false; + else + smu->user_dpm_profile.user_od = true; + } + break; + + default: + return -ENOSYS; + } + + return ret; +} + +static int smu_v14_0_2_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + int ret = 0; + + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; + + if (limit <= msg_limit) { + if (smu->current_power_limit > msg_limit) { + od_table->OverDriveTable.Ppt = 0; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + } + return smu_v14_0_set_power_limit(smu, limit_type, limit); + } else if (smu->od_enabled) { + ret = smu_v14_0_set_power_limit(smu, limit_type, msg_limit); + if (ret) + return ret; + + od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + smu->current_power_limit = limit; + } else { + return -EINVAL; + } + + return 0; +} + static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_allowed_feature_mask = smu_v14_0_2_get_allowed_feature_mask, .set_default_dpm_table = smu_v14_0_2_set_default_dpm_table, @@ -1988,13 +2822,16 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location, .get_gpu_metrics = smu_v14_0_2_get_gpu_metrics, .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range, + .set_default_od_settings = smu_v14_0_2_set_default_od_settings, + .restore_user_od_settings = smu_v14_0_2_restore_user_od_settings, + .od_edit_dpm_table = smu_v14_0_2_od_edit_dpm_table, .init_pptable_microcode = smu_v14_0_init_pptable_microcode, .populate_umd_state_clk = smu_v14_0_2_populate_umd_state_clk, .set_performance_level = smu_v14_0_set_performance_level, .gfx_off_control = smu_v14_0_gfx_off_control, .get_unique_id = smu_v14_0_2_get_unique_id, .get_power_limit = smu_v14_0_2_get_power_limit, - .set_power_limit = smu_v14_0_set_power_limit, + .set_power_limit = smu_v14_0_2_set_power_limit, .set_power_source = smu_v14_0_set_power_source, .get_power_profile_mode = smu_v14_0_2_get_power_profile_mode, .set_power_profile_mode = smu_v14_0_2_set_power_profile_mode, @@ -2015,12 +2852,9 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_gfx_features = smu_v14_0_2_enable_gfx_features, .set_mp1_state = smu_v14_0_2_set_mp1_state, .set_df_cstate = smu_v14_0_2_set_df_cstate, - .send_hbm_bad_pages_num = smu_v14_0_2_smu_send_bad_mem_page_num, - .send_hbm_bad_channel_flag = smu_v14_0_2_send_bad_mem_channel_flag, #if 0 .gpo_control = smu_v14_0_gpo_control, #endif - .get_ecc_info = smu_v14_0_2_get_ecc_info, }; void smu_v14_0_2_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index ef8d7f147465..d1c5e471bdca 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -2179,7 +2179,7 @@ assigned: void radeon_atom_encoder_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_encoder *encoder; list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index b5e96a8fc2c1..11a492f21157 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7585,7 +7585,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7615,7 +7615,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7645,7 +7645,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); + drm_handle_vblank(rdev_to_drm(rdev), 2); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7675,7 +7675,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); + drm_handle_vblank(rdev_to_drm(rdev), 3); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7705,7 +7705,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); + drm_handle_vblank(rdev_to_drm(rdev), 4); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7735,7 +7735,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); + drm_handle_vblank(rdev_to_drm(rdev), 5); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -8581,7 +8581,7 @@ int cik_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 4c06f47453fd..d6ab93ed9ec4 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -91,7 +91,7 @@ struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev) pin = &rdev->audio.pin[i]; pin_count = 0; - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (radeon_encoder_is_digital(encoder)) { radeon_encoder = to_radeon_encoder(encoder); dig = radeon_encoder->enc_priv; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index c634dc28e6c3..bc4ab71613a5 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1673,7 +1673,7 @@ void evergreen_pm_misc(struct radeon_device *rdev) */ void evergreen_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -1698,7 +1698,7 @@ void evergreen_pm_prepare(struct radeon_device *rdev) */ void evergreen_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -1763,7 +1763,7 @@ void evergreen_hpd_set_polarity(struct radeon_device *rdev, */ void evergreen_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enabled = 0; u32 tmp = DC_HPDx_CONNECTION_TIMER(0x9c4) | @@ -1804,7 +1804,7 @@ void evergreen_hpd_init(struct radeon_device *rdev) */ void evergreen_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disabled = 0; @@ -4753,7 +4753,7 @@ restart_ih: event_name = "vblank"; if (rdev->irq.crtc_vblank_int[crtc_idx]) { - drm_handle_vblank(rdev->ddev, crtc_idx); + drm_handle_vblank(rdev_to_drm(rdev), crtc_idx); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -5211,7 +5211,7 @@ int evergreen_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize AGP */ diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index e5577d2a19ef..a46613283393 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -397,7 +397,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1; @@ -435,14 +435,14 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i return r; } - offset = track->cb_color_bo_offset[id] << 8; + offset = (u64)track->cb_color_bo_offset[id] << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d cb[%d] bo base %llu not aligned with %ld\n", __func__, __LINE__, id, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->cb_color_bo[id])) { /* old ddx are broken they allocate bo with w*h*bpp but * program slice with ALIGN(h, 8), catch this and patch @@ -450,14 +450,14 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i */ if (!surf.mode) { uint32_t *ib = p->ib.ptr; - unsigned long tmp, nby, bsize, size, min = 0; + u64 tmp, nby, bsize, size, min = 0; /* find the height the ddx wants */ if (surf.nby > 8) { min = surf.nby - 8; } bsize = radeon_bo_size(track->cb_color_bo[id]); - tmp = track->cb_color_bo_offset[id] << 8; + tmp = (u64)track->cb_color_bo_offset[id] << 8; for (nby = surf.nby; nby > min; nby--) { size = nby * surf.nbx * surf.bpe * surf.nsamples; if ((tmp + size * mslice) <= bsize) { @@ -469,7 +469,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i slice = ((nby * surf.nbx) / 64) - 1; if (!evergreen_surface_check(p, &surf, "cb")) { /* check if this one works */ - tmp += surf.layer_size * mslice; + tmp += (u64)surf.layer_size * mslice; if (tmp <= bsize) { ib[track->cb_color_slice_idx[id]] = slice; goto old_ddx_ok; @@ -478,9 +478,9 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i } } dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, " - "offset %d, max layer %d, bo size %ld, slice %d)\n", + "offset %llu, max layer %d, bo size %ld, slice %d)\n", __func__, __LINE__, id, surf.layer_size, - track->cb_color_bo_offset[id] << 8, mslice, + (u64)track->cb_color_bo_offset[id] << 8, mslice, radeon_bo_size(track->cb_color_bo[id]), slice); dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", __func__, __LINE__, surf.nbx, surf.nby, @@ -564,7 +564,7 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -610,18 +610,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return r; } - offset = track->db_s_read_offset << 8; + offset = (u64)track->db_s_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_read_bo)) { dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_read_offset << 8, mslice, + (u64)track->db_s_read_offset << 8, mslice, radeon_bo_size(track->db_s_read_bo)); dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", __func__, __LINE__, track->db_depth_size, @@ -629,18 +629,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return -EINVAL; } - offset = track->db_s_write_offset << 8; + offset = (u64)track->db_s_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_write_bo)) { dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_write_offset << 8, mslice, + (u64)track->db_s_write_offset << 8, mslice, radeon_bo_size(track->db_s_write_bo)); return -EINVAL; } @@ -661,7 +661,7 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -708,34 +708,34 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) return r; } - offset = track->db_z_read_offset << 8; + offset = (u64)track->db_z_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_read_bo)) { dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_read_offset << 8, mslice, + (u64)track->db_z_read_offset << 8, mslice, radeon_bo_size(track->db_z_read_bo)); return -EINVAL; } - offset = track->db_z_write_offset << 8; + offset = (u64)track->db_z_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_write_bo)) { dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_write_offset << 8, mslice, + (u64)track->db_z_write_offset << 8, mslice, radeon_bo_size(track->db_z_write_bo)); return -EINVAL; } diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 77aee99e473a..3890911fe693 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2360,7 +2360,7 @@ int cayman_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize memory controller */ diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 0b1e19345f43..80703417d8a1 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -459,7 +459,7 @@ void r100_pm_misc(struct radeon_device *rdev) */ void r100_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -490,7 +490,7 @@ void r100_pm_prepare(struct radeon_device *rdev) */ void r100_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -603,7 +603,7 @@ void r100_hpd_set_polarity(struct radeon_device *rdev, */ void r100_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -626,7 +626,7 @@ void r100_hpd_init(struct radeon_device *rdev) */ void r100_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -798,7 +798,7 @@ int r100_irq_process(struct radeon_device *rdev) /* Vertical blank interrupts */ if (status & RADEON_CRTC_VBLANK_STAT) { if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -807,7 +807,7 @@ int r100_irq_process(struct radeon_device *rdev) } if (status & RADEON_CRTC2_VBLANK_STAT) { if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -1016,45 +1016,65 @@ static int r100_cp_init_microcode(struct radeon_device *rdev) DRM_DEBUG_KMS("\n"); - if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) || - (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) || - (rdev->family == CHIP_RS200)) { + switch (rdev->family) { + case CHIP_R100: + case CHIP_RV100: + case CHIP_RV200: + case CHIP_RS100: + case CHIP_RS200: DRM_INFO("Loading R100 Microcode\n"); fw_name = FIRMWARE_R100; - } else if ((rdev->family == CHIP_R200) || - (rdev->family == CHIP_RV250) || - (rdev->family == CHIP_RV280) || - (rdev->family == CHIP_RS300)) { + break; + + case CHIP_R200: + case CHIP_RV250: + case CHIP_RV280: + case CHIP_RS300: DRM_INFO("Loading R200 Microcode\n"); fw_name = FIRMWARE_R200; - } else if ((rdev->family == CHIP_R300) || - (rdev->family == CHIP_R350) || - (rdev->family == CHIP_RV350) || - (rdev->family == CHIP_RV380) || - (rdev->family == CHIP_RS400) || - (rdev->family == CHIP_RS480)) { + break; + + case CHIP_R300: + case CHIP_R350: + case CHIP_RV350: + case CHIP_RV380: + case CHIP_RS400: + case CHIP_RS480: DRM_INFO("Loading R300 Microcode\n"); fw_name = FIRMWARE_R300; - } else if ((rdev->family == CHIP_R420) || - (rdev->family == CHIP_R423) || - (rdev->family == CHIP_RV410)) { + break; + + case CHIP_R420: + case CHIP_R423: + case CHIP_RV410: DRM_INFO("Loading R400 Microcode\n"); fw_name = FIRMWARE_R420; - } else if ((rdev->family == CHIP_RS690) || - (rdev->family == CHIP_RS740)) { + break; + + case CHIP_RS690: + case CHIP_RS740: DRM_INFO("Loading RS690/RS740 Microcode\n"); fw_name = FIRMWARE_RS690; - } else if (rdev->family == CHIP_RS600) { + break; + + case CHIP_RS600: DRM_INFO("Loading RS600 Microcode\n"); fw_name = FIRMWARE_RS600; - } else if ((rdev->family == CHIP_RV515) || - (rdev->family == CHIP_R520) || - (rdev->family == CHIP_RV530) || - (rdev->family == CHIP_R580) || - (rdev->family == CHIP_RV560) || - (rdev->family == CHIP_RV570)) { + break; + + case CHIP_RV515: + case CHIP_R520: + case CHIP_RV530: + case CHIP_R580: + case CHIP_RV560: + case CHIP_RV570: DRM_INFO("Loading R500 Microcode\n"); fw_name = FIRMWARE_R520; + break; + + default: + DRM_ERROR("Unsupported Radeon family %u\n", rdev->family); + return -EINVAL; } err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); @@ -1471,7 +1491,7 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) header = radeon_get_ib_value(p, h_idx); crtc_id = radeon_get_ib_value(p, h_idx + 5); reg = R100_CP_PACKET0_GET_REG(header); - crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id); + crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { DRM_ERROR("cannot find crtc %d\n", crtc_id); return -ENOENT; @@ -3059,7 +3079,7 @@ DEFINE_SHOW_ATTRIBUTE(r100_debugfs_mc_info); void r100_debugfs_rbbm_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_rbbm_info", 0444, root, rdev, &r100_debugfs_rbbm_info_fops); @@ -3069,7 +3089,7 @@ void r100_debugfs_rbbm_init(struct radeon_device *rdev) void r100_debugfs_cp_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_cp_ring_info", 0444, root, rdev, &r100_debugfs_cp_ring_info_fops); @@ -3081,7 +3101,7 @@ void r100_debugfs_cp_init(struct radeon_device *rdev) void r100_debugfs_mc_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_mc_info", 0444, root, rdev, &r100_debugfs_mc_info_fops); @@ -3947,7 +3967,7 @@ int r100_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r100_clock_startup(rdev); /* Initialize surface registers */ @@ -4056,7 +4076,7 @@ int r100_init(struct radeon_device *rdev) /* Set asic errata */ r100_errata(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 1620f534f55f..05c13102a8cb 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -616,7 +616,7 @@ DEFINE_SHOW_ATTRIBUTE(rv370_debugfs_pcie_gart_info); static void rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rv370_pcie_gart_info", 0444, root, rdev, &rv370_debugfs_pcie_gart_info_fops); @@ -1452,7 +1452,7 @@ int r300_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r300_clock_startup(rdev); /* Initialize surface registers */ @@ -1538,7 +1538,7 @@ int r300_init(struct radeon_device *rdev) /* Set asic errata */ r300_errata(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index a979662eaa73..9a31cdec6415 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -322,7 +322,7 @@ int r420_resume(struct radeon_device *rdev) if (rdev->is_atom_bios) { atom_asic_init(rdev->mode_info.atom_context); } else { - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); } /* Resume clock after posting */ r420_clock_resume(rdev); @@ -414,7 +414,7 @@ int r420_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); @@ -493,7 +493,7 @@ DEFINE_SHOW_ATTRIBUTE(r420_debugfs_pipes_info); void r420_debugfs_pipes_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r420_pipes_info", 0444, root, rdev, &r420_debugfs_pipes_info_fops); diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index 6cbcaa845192..08e127b3249a 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -287,7 +287,7 @@ int r520_init(struct radeon_device *rdev) atom_asic_init(rdev->mode_info.atom_context); } /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 087d41e370fd..8b62f7faa5b9 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -950,7 +950,7 @@ void r600_hpd_set_polarity(struct radeon_device *rdev, void r600_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -1017,7 +1017,7 @@ void r600_hpd_init(struct radeon_device *rdev) void r600_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -3280,7 +3280,7 @@ int r600_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); if (rdev->flags & RADEON_IS_AGP) { @@ -4136,7 +4136,7 @@ restart_ih: DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -4166,7 +4166,7 @@ restart_ih: DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -4358,7 +4358,7 @@ DEFINE_SHOW_ATTRIBUTE(r600_debugfs_mc_info); static void r600_debugfs_mc_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r600_mc_info", 0444, root, rdev, &r600_debugfs_mc_info_fops); diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 6cf54a747749..1b2d31c4d77c 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -884,7 +884,7 @@ int r600_cs_common_vline_parse(struct radeon_cs_parser *p, crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1); reg = R600_CP_PACKET0_GET_REG(header); - crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id); + crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { DRM_ERROR("cannot find crtc %d\n", crtc_id); return -ENOENT; diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index 64980a61d38a..81d58ef667dd 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -153,7 +153,7 @@ void r600_dpm_print_ps_status(struct radeon_device *rdev, u32 r600_dpm_get_vblank_time(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 vblank_in_pixels; @@ -180,7 +180,7 @@ u32 r600_dpm_get_vblank_time(struct radeon_device *rdev) u32 r600_dpm_get_vrefresh(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 vrefresh = 0; diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index f3551ebaa2f0..661f374f5f27 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -116,7 +116,7 @@ void r600_audio_update_hdmi(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, audio_work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct r600_audio_pin audio_status = r600_audio_status(rdev); struct drm_encoder *encoder; bool changed = false; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 0999c8eaae94..fd8a4513025f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2297,7 +2297,7 @@ typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t); struct radeon_device { struct device *dev; - struct drm_device *ddev; + struct drm_device ddev; struct pci_dev *pdev; #ifdef __alpha__ struct pci_controller *hose; @@ -2476,6 +2476,11 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index); void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v); +static inline struct drm_device *rdev_to_drm(struct radeon_device *rdev) +{ + return &rdev->ddev; +} + /* * Cast helper */ diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c index 603a78e41ba5..22ce61bdfc06 100644 --- a/drivers/gpu/drm/radeon/radeon_acpi.c +++ b/drivers/gpu/drm/radeon/radeon_acpi.c @@ -405,11 +405,11 @@ static int radeon_atif_handler(struct radeon_device *rdev, if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { if ((rdev->flags & RADEON_IS_PX) && radeon_atpx_dgpu_req_power_for_displays()) { - pm_runtime_get_sync(rdev->ddev->dev); + pm_runtime_get_sync(rdev_to_drm(rdev)->dev); /* Just fire off a uevent and let userspace tell us what to do */ - drm_helper_hpd_irq_event(rdev->ddev); - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + drm_helper_hpd_irq_event(rdev_to_drm(rdev)); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); } } /* TODO: check other events */ @@ -736,7 +736,7 @@ int radeon_acpi_init(struct radeon_device *rdev) struct radeon_encoder *target = NULL; /* Find the encoder controlling the brightness */ - list_for_each_entry(tmp, &rdev->ddev->mode_config.encoder_list, + list_for_each_entry(tmp, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { struct radeon_encoder *enc = to_radeon_encoder(tmp); diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c index a3d749e350f9..89d7b0e9e79f 100644 --- a/drivers/gpu/drm/radeon/radeon_agp.c +++ b/drivers/gpu/drm/radeon/radeon_agp.c @@ -161,7 +161,7 @@ struct radeon_agp_head *radeon_agp_head_init(struct drm_device *dev) static int radeon_agp_head_acquire(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct pci_dev *pdev = to_pci_dev(dev->dev); if (!rdev->agp) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 10793a433bf5..81a0a91921b9 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -187,7 +187,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev) if (i2c.valid) { sprintf(stmp, "0x%x", i2c.i2c_id); - rdev->i2c_bus[i] = radeon_i2c_create(rdev->ddev, &i2c, stmp); + rdev->i2c_bus[i] = radeon_i2c_create(rdev_to_drm(rdev), &i2c, stmp); } gpio = (ATOM_GPIO_I2C_ASSIGMENT *) ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT)); @@ -1716,27 +1716,25 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct case LCD_FAKE_EDID_PATCH_RECORD_TYPE: fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { - struct edid *edid; - int edid_size = - max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength); - edid = kmalloc(edid_size, GFP_KERNEL); - if (edid) { - memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0], - fake_edid_record->ucFakeEDIDLength); + const struct drm_edid *edid; + int edid_size; - if (drm_edid_is_valid(edid)) { - rdev->mode_info.bios_hardcoded_edid = edid; - rdev->mode_info.bios_hardcoded_edid_size = edid_size; - } else - kfree(edid); - } + if (fake_edid_record->ucFakeEDIDLength == 128) + edid_size = fake_edid_record->ucFakeEDIDLength; + else + edid_size = fake_edid_record->ucFakeEDIDLength * 128; + edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size); + if (drm_edid_valid(edid)) + rdev->mode_info.bios_hardcoded_edid = edid; + else + drm_edid_free(edid); + record += struct_size(fake_edid_record, + ucFakeEDIDString, + edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; } - record += fake_edid_record->ucFakeEDIDLength ? - struct_size(fake_edid_record, - ucFakeEDIDString, - fake_edid_record->ucFakeEDIDLength) : - /* empty fake edid record must be 3 bytes long */ - sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 0bcd767b9f47..47aa06a9a942 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -196,7 +196,7 @@ static void radeon_audio_enable(struct radeon_device *rdev, return; if (rdev->mode_info.mode_config_initialized) { - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (radeon_encoder_is_digital(encoder)) { radeon_encoder = to_radeon_encoder(encoder); dig = radeon_encoder->enc_priv; @@ -760,7 +760,7 @@ static int radeon_audio_component_get_eld(struct device *kdev, int port, if (!rdev->audio.enabled || !rdev->mode_info.mode_config_initialized) return 0; - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (!radeon_encoder_is_digital(encoder)) continue; radeon_encoder = to_radeon_encoder(encoder); diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 6952b1273b0f..df8d7f56b028 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -370,27 +370,22 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) { int edid_info, size; - struct edid *edid; + const struct drm_edid *edid; unsigned char *raw; - edid_info = combios_get_table_offset(rdev->ddev, COMBIOS_HARDCODED_EDID_TABLE); + edid_info = combios_get_table_offset(rdev_to_drm(rdev), COMBIOS_HARDCODED_EDID_TABLE); if (!edid_info) return false; raw = rdev->bios + edid_info; size = EDID_LENGTH * (raw[0x7e] + 1); - edid = kmalloc(size, GFP_KERNEL); - if (edid == NULL) - return false; + edid = drm_edid_alloc(raw, size); - memcpy((unsigned char *)edid, raw, size); - - if (!drm_edid_is_valid(edid)) { - kfree(edid); + if (!drm_edid_valid(edid)) { + drm_edid_free(edid); return false; } rdev->mode_info.bios_hardcoded_edid = edid; - rdev->mode_info.bios_hardcoded_edid_size = size; return true; } @@ -398,18 +393,7 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) struct edid * radeon_bios_get_hardcoded_edid(struct radeon_device *rdev) { - struct edid *edid; - - if (rdev->mode_info.bios_hardcoded_edid) { - edid = kmalloc(rdev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL); - if (edid) { - memcpy((unsigned char *)edid, - (unsigned char *)rdev->mode_info.bios_hardcoded_edid, - rdev->mode_info.bios_hardcoded_edid_size); - return edid; - } - } - return NULL; + return drm_edid_duplicate(drm_edid_raw(rdev->mode_info.bios_hardcoded_edid)); } static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rdev, @@ -642,7 +626,7 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde static struct radeon_i2c_bus_rec radeon_combios_get_i2c_info_from_table(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_i2c_bus_rec i2c; u16 offset; u8 id, blocks, clk, data; @@ -670,7 +654,7 @@ static struct radeon_i2c_bus_rec radeon_combios_get_i2c_info_from_table(struct r void radeon_combios_i2c_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_i2c_bus_rec i2c; /* actual hw pads @@ -812,7 +796,7 @@ bool radeon_combios_get_clock_info(struct drm_device *dev) bool radeon_combios_sideport_present(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); u16 igp_info; /* sideport is AMD only */ @@ -915,7 +899,7 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct enum radeon_tv_std radeon_combios_get_tv_info(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); uint16_t tv_info; enum radeon_tv_std tv_std = TV_STD_NTSC; @@ -2637,7 +2621,7 @@ static const char *thermal_controller_names[] = { void radeon_combios_get_power_modes(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); u16 offset, misc, misc2 = 0; u8 rev, tmp; int state_index = 0; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 69693ba5949e..528a8f3677c2 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -505,6 +505,9 @@ static void radeon_add_common_modes(struct drm_encoder *encoder, struct drm_conn continue; mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + if (!mode) + continue; + drm_mode_probed_add(connector, mode); } } @@ -1056,7 +1059,7 @@ radeon_vga_detect(struct drm_connector *connector, bool force) */ if ((!rdev->is_atom_bios) && (ret == connector_status_disconnected) && - rdev->mode_info.bios_hardcoded_edid_size) { + rdev->mode_info.bios_hardcoded_edid) { ret = connector_status_connected; } @@ -1389,7 +1392,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) out: if ((!rdev->is_atom_bios) && (ret == connector_status_disconnected) && - rdev->mode_info.bios_hardcoded_edid_size) { + rdev->mode_info.bios_hardcoded_edid) { radeon_connector->use_digital = true; ret = connector_status_connected; } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index afbb3a80c0c6..554b236c2328 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -760,7 +760,7 @@ bool radeon_boot_test_post_card(struct radeon_device *rdev) if (rdev->is_atom_bios) atom_asic_init(rdev->mode_info.atom_context); else - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); return true; } else { dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); @@ -980,7 +980,7 @@ int radeon_atombios_init(struct radeon_device *rdev) return -ENOMEM; rdev->mode_info.atom_card_info = atom_card_info; - atom_card_info->dev = rdev->ddev; + atom_card_info->dev = rdev_to_drm(rdev); atom_card_info->reg_read = cail_reg_read; atom_card_info->reg_write = cail_reg_write; /* needed for iio ops */ @@ -1005,7 +1005,7 @@ int radeon_atombios_init(struct radeon_device *rdev) mutex_init(&rdev->mode_info.atom_context->mutex); mutex_init(&rdev->mode_info.atom_context->scratch_mutex); - radeon_atom_initialize_bios_scratch_regs(rdev->ddev); + radeon_atom_initialize_bios_scratch_regs(rdev_to_drm(rdev)); atom_allocate_fb_scratch(rdev->mode_info.atom_context); return 0; } @@ -1049,7 +1049,7 @@ void radeon_atombios_fini(struct radeon_device *rdev) */ int radeon_combios_init(struct radeon_device *rdev) { - radeon_combios_initialize_bios_scratch_regs(rdev->ddev); + radeon_combios_initialize_bios_scratch_regs(rdev_to_drm(rdev)); return 0; } @@ -1285,9 +1285,6 @@ int radeon_device_init(struct radeon_device *rdev, bool runtime = false; rdev->shutdown = false; - rdev->dev = &pdev->dev; - rdev->ddev = ddev; - rdev->pdev = pdev; rdev->flags = flags; rdev->family = flags & RADEON_FAMILY_MASK; rdev->is_atom_bios = false; @@ -1847,7 +1844,7 @@ int radeon_gpu_reset(struct radeon_device *rdev) downgrade_write(&rdev->exclusive_lock); - drm_helper_resume_force_mode(rdev->ddev); + drm_helper_resume_force_mode(rdev_to_drm(rdev)); /* set the power state here in case we are a PX system or headless */ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 843383f7237f..8f5f8abcb1b4 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -302,13 +302,13 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) if ((radeon_use_pflipirq == 2) && ASIC_IS_DCE4(rdev)) return; - spin_lock_irqsave(&rdev->ddev->event_lock, flags); + spin_lock_irqsave(&rdev_to_drm(rdev)->event_lock, flags); if (radeon_crtc->flip_status != RADEON_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("radeon_crtc->flip_status = %d != " "RADEON_FLIP_SUBMITTED(%d)\n", radeon_crtc->flip_status, RADEON_FLIP_SUBMITTED); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); return; } @@ -334,7 +334,7 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) */ if (update_pending && (DRM_SCANOUTPOS_VALID & - radeon_get_crtc_scanoutpos(rdev->ddev, crtc_id, + radeon_get_crtc_scanoutpos(rdev_to_drm(rdev), crtc_id, GET_DISTANCE_TO_VBLANKSTART, &vpos, &hpos, NULL, NULL, &rdev->mode_info.crtcs[crtc_id]->base.hwmode)) && @@ -347,7 +347,7 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) */ update_pending = 0; } - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); if (!update_pending) radeon_crtc_handle_flip(rdev, crtc_id); } @@ -370,14 +370,14 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) if (radeon_crtc == NULL) return; - spin_lock_irqsave(&rdev->ddev->event_lock, flags); + spin_lock_irqsave(&rdev_to_drm(rdev)->event_lock, flags); work = radeon_crtc->flip_work; if (radeon_crtc->flip_status != RADEON_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("radeon_crtc->flip_status = %d != " "RADEON_FLIP_SUBMITTED(%d)\n", radeon_crtc->flip_status, RADEON_FLIP_SUBMITTED); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); return; } @@ -389,7 +389,7 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) if (work->event) drm_crtc_send_vblank_event(&radeon_crtc->base, work->event); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); drm_crtc_vblank_put(&radeon_crtc->base); radeon_irq_kms_pflip_irq_put(rdev, work->crtc_id); @@ -408,7 +408,7 @@ static void radeon_flip_work_func(struct work_struct *__work) struct radeon_flip_work *work = container_of(__work, struct radeon_flip_work, flip_work); struct radeon_device *rdev = work->rdev; - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id]; struct drm_crtc *crtc = &radeon_crtc->base; @@ -1401,7 +1401,7 @@ static int radeon_modeset_create_props(struct radeon_device *rdev) if (rdev->is_atom_bios) { rdev->mode_info.coherent_mode_property = - drm_property_create_range(rdev->ddev, 0 , "coherent", 0, 1); + drm_property_create_range(rdev_to_drm(rdev), 0, "coherent", 0, 1); if (!rdev->mode_info.coherent_mode_property) return -ENOMEM; } @@ -1409,57 +1409,57 @@ static int radeon_modeset_create_props(struct radeon_device *rdev) if (!ASIC_IS_AVIVO(rdev)) { sz = ARRAY_SIZE(radeon_tmds_pll_enum_list); rdev->mode_info.tmds_pll_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "tmds_pll", radeon_tmds_pll_enum_list, sz); } rdev->mode_info.load_detect_property = - drm_property_create_range(rdev->ddev, 0, "load detection", 0, 1); + drm_property_create_range(rdev_to_drm(rdev), 0, "load detection", 0, 1); if (!rdev->mode_info.load_detect_property) return -ENOMEM; - drm_mode_create_scaling_mode_property(rdev->ddev); + drm_mode_create_scaling_mode_property(rdev_to_drm(rdev)); sz = ARRAY_SIZE(radeon_tv_std_enum_list); rdev->mode_info.tv_std_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "tv standard", radeon_tv_std_enum_list, sz); sz = ARRAY_SIZE(radeon_underscan_enum_list); rdev->mode_info.underscan_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "underscan", radeon_underscan_enum_list, sz); rdev->mode_info.underscan_hborder_property = - drm_property_create_range(rdev->ddev, 0, + drm_property_create_range(rdev_to_drm(rdev), 0, "underscan hborder", 0, 128); if (!rdev->mode_info.underscan_hborder_property) return -ENOMEM; rdev->mode_info.underscan_vborder_property = - drm_property_create_range(rdev->ddev, 0, + drm_property_create_range(rdev_to_drm(rdev), 0, "underscan vborder", 0, 128); if (!rdev->mode_info.underscan_vborder_property) return -ENOMEM; sz = ARRAY_SIZE(radeon_audio_enum_list); rdev->mode_info.audio_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "audio", radeon_audio_enum_list, sz); sz = ARRAY_SIZE(radeon_dither_enum_list); rdev->mode_info.dither_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "dither", radeon_dither_enum_list, sz); sz = ARRAY_SIZE(radeon_output_csc_enum_list); rdev->mode_info.output_csc_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "output_csc", radeon_output_csc_enum_list, sz); @@ -1578,29 +1578,29 @@ int radeon_modeset_init(struct radeon_device *rdev) int i; int ret; - drm_mode_config_init(rdev->ddev); + drm_mode_config_init(rdev_to_drm(rdev)); rdev->mode_info.mode_config_initialized = true; - rdev->ddev->mode_config.funcs = &radeon_mode_funcs; + rdev_to_drm(rdev)->mode_config.funcs = &radeon_mode_funcs; if (radeon_use_pflipirq == 2 && rdev->family >= CHIP_R600) - rdev->ddev->mode_config.async_page_flip = true; + rdev_to_drm(rdev)->mode_config.async_page_flip = true; if (ASIC_IS_DCE5(rdev)) { - rdev->ddev->mode_config.max_width = 16384; - rdev->ddev->mode_config.max_height = 16384; + rdev_to_drm(rdev)->mode_config.max_width = 16384; + rdev_to_drm(rdev)->mode_config.max_height = 16384; } else if (ASIC_IS_AVIVO(rdev)) { - rdev->ddev->mode_config.max_width = 8192; - rdev->ddev->mode_config.max_height = 8192; + rdev_to_drm(rdev)->mode_config.max_width = 8192; + rdev_to_drm(rdev)->mode_config.max_height = 8192; } else { - rdev->ddev->mode_config.max_width = 4096; - rdev->ddev->mode_config.max_height = 4096; + rdev_to_drm(rdev)->mode_config.max_width = 4096; + rdev_to_drm(rdev)->mode_config.max_height = 4096; } - rdev->ddev->mode_config.preferred_depth = 24; - rdev->ddev->mode_config.prefer_shadow = 1; + rdev_to_drm(rdev)->mode_config.preferred_depth = 24; + rdev_to_drm(rdev)->mode_config.prefer_shadow = 1; - rdev->ddev->mode_config.fb_modifiers_not_supported = true; + rdev_to_drm(rdev)->mode_config.fb_modifiers_not_supported = true; ret = radeon_modeset_create_props(rdev); if (ret) { @@ -1618,11 +1618,11 @@ int radeon_modeset_init(struct radeon_device *rdev) /* allocate crtcs */ for (i = 0; i < rdev->num_crtc; i++) { - radeon_crtc_init(rdev->ddev, i); + radeon_crtc_init(rdev_to_drm(rdev), i); } /* okay we should have all the bios connectors */ - ret = radeon_setup_enc_conn(rdev->ddev); + ret = radeon_setup_enc_conn(rdev_to_drm(rdev)); if (!ret) { return ret; } @@ -1639,7 +1639,7 @@ int radeon_modeset_init(struct radeon_device *rdev) /* setup afmt */ radeon_afmt_init(rdev); - drm_kms_helper_poll_init(rdev->ddev); + drm_kms_helper_poll_init(rdev_to_drm(rdev)); /* do pm late init */ ret = radeon_pm_late_init(rdev); @@ -1650,15 +1650,15 @@ int radeon_modeset_init(struct radeon_device *rdev) void radeon_modeset_fini(struct radeon_device *rdev) { if (rdev->mode_info.mode_config_initialized) { - drm_kms_helper_poll_fini(rdev->ddev); + drm_kms_helper_poll_fini(rdev_to_drm(rdev)); radeon_hpd_fini(rdev); - drm_helper_force_disable_all(rdev->ddev); + drm_helper_force_disable_all(rdev_to_drm(rdev)); radeon_afmt_fini(rdev); - drm_mode_config_cleanup(rdev->ddev); + drm_mode_config_cleanup(rdev_to_drm(rdev)); rdev->mode_info.mode_config_initialized = false; } - kfree(rdev->mode_info.bios_hardcoded_edid); + drm_edid_free(rdev->mode_info.bios_hardcoded_edid); /* free i2c buses */ radeon_i2c_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 7bf08164140e..f36aa71c57c7 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -259,7 +259,8 @@ static int radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { unsigned long flags = 0; - struct drm_device *dev; + struct drm_device *ddev; + struct radeon_device *rdev; int ret; if (!ent) @@ -300,28 +301,37 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) return ret; - dev = drm_dev_alloc(&kms_driver, &pdev->dev); - if (IS_ERR(dev)) - return PTR_ERR(dev); + rdev = devm_drm_dev_alloc(&pdev->dev, &kms_driver, typeof(*rdev), ddev); + if (IS_ERR(rdev)) + return PTR_ERR(rdev); + + rdev->dev = &pdev->dev; + rdev->pdev = pdev; + ddev = rdev_to_drm(rdev); + ddev->dev_private = rdev; ret = pci_enable_device(pdev); if (ret) goto err_free; - pci_set_drvdata(pdev, dev); + pci_set_drvdata(pdev, ddev); - ret = drm_dev_register(dev, ent->driver_data); + ret = radeon_driver_load_kms(ddev, flags); if (ret) goto err_agp; - radeon_fbdev_setup(dev->dev_private); + ret = drm_dev_register(ddev, flags); + if (ret) + goto err_agp; + + radeon_fbdev_setup(ddev->dev_private); return 0; err_agp: pci_disable_device(pdev); err_free: - drm_dev_put(dev); + drm_dev_put(ddev); return ret; } @@ -569,7 +579,6 @@ static const struct drm_ioctl_desc radeon_ioctls_kms[] = { static const struct drm_driver kms_driver = { .driver_features = DRIVER_GEM | DRIVER_RENDER | DRIVER_MODESET, - .load = radeon_driver_load_kms, .open = radeon_driver_open_kms, .postclose = radeon_driver_postclose_kms, .unload = radeon_driver_unload_kms, diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index 02bf25759059..fb70de29545c 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -67,7 +67,7 @@ static int radeon_fbdev_create_pinned_object(struct drm_fb_helper *fb_helper, int height = mode_cmd->height; u32 cpp; - info = drm_get_format_info(rdev->ddev, mode_cmd); + info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd); cpp = info->cpp[0]; /* need to align pitch with crtc limits */ @@ -148,15 +148,15 @@ static int radeon_fbdev_fb_open(struct fb_info *info, int user) struct radeon_device *rdev = fb_helper->dev->dev_private; int ret; - ret = pm_runtime_get_sync(rdev->ddev->dev); + ret = pm_runtime_get_sync(rdev_to_drm(rdev)->dev); if (ret < 0 && ret != -EACCES) goto err_pm_runtime_mark_last_busy; return 0; err_pm_runtime_mark_last_busy: - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); return ret; } @@ -165,8 +165,8 @@ static int radeon_fbdev_fb_release(struct fb_info *info, int user) struct drm_fb_helper *fb_helper = info->par; struct radeon_device *rdev = fb_helper->dev->dev_private; - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); return 0; } @@ -236,7 +236,7 @@ static int radeon_fbdev_fb_helper_fb_probe(struct drm_fb_helper *fb_helper, ret = -ENOMEM; goto err_radeon_fbdev_destroy_pinned_object; } - ret = radeon_framebuffer_init(rdev->ddev, fb, &mode_cmd, gobj); + ret = radeon_framebuffer_init(rdev_to_drm(rdev), fb, &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); goto err_kfree; @@ -374,12 +374,12 @@ void radeon_fbdev_setup(struct radeon_device *rdev) fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL); if (!fb_helper) return; - drm_fb_helper_prepare(rdev->ddev, fb_helper, bpp_sel, &radeon_fbdev_fb_helper_funcs); + drm_fb_helper_prepare(rdev_to_drm(rdev), fb_helper, bpp_sel, &radeon_fbdev_fb_helper_funcs); - ret = drm_client_init(rdev->ddev, &fb_helper->client, "radeon-fbdev", + ret = drm_client_init(rdev_to_drm(rdev), &fb_helper->client, "radeon-fbdev", &radeon_fbdev_client_funcs); if (ret) { - drm_err(rdev->ddev, "Failed to register client: %d\n", ret); + drm_err(rdev_to_drm(rdev), "Failed to register client: %d\n", ret); goto err_drm_client_init; } @@ -394,13 +394,13 @@ err_drm_client_init: void radeon_fbdev_set_suspend(struct radeon_device *rdev, int state) { - if (rdev->ddev->fb_helper) - drm_fb_helper_set_suspend(rdev->ddev->fb_helper, state); + if (rdev_to_drm(rdev)->fb_helper) + drm_fb_helper_set_suspend(rdev_to_drm(rdev)->fb_helper, state); } bool radeon_fbdev_robj_is_fb(struct radeon_device *rdev, struct radeon_bo *robj) { - struct drm_fb_helper *fb_helper = rdev->ddev->fb_helper; + struct drm_fb_helper *fb_helper = rdev_to_drm(rdev)->fb_helper; struct drm_gem_object *gobj; if (!fb_helper) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 4fb780d96f32..daff61586be5 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -150,7 +150,7 @@ int radeon_fence_emit(struct radeon_device *rdev, rdev->fence_context + ring, seq); radeon_fence_ring_emit(rdev, ring, *fence); - trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); + trace_radeon_fence_emit(rdev_to_drm(rdev), ring, (*fence)->seq); radeon_fence_schedule_check(rdev, ring); return 0; } @@ -489,7 +489,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, if (!target_seq[i]) continue; - trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); + trace_radeon_fence_wait_begin(rdev_to_drm(rdev), i, target_seq[i]); radeon_irq_kms_sw_irq_get(rdev, i); } @@ -511,7 +511,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, continue; radeon_irq_kms_sw_irq_put(rdev, i); - trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); + trace_radeon_fence_wait_end(rdev_to_drm(rdev), i, target_seq[i]); } return r; @@ -995,7 +995,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(radeon_debugfs_gpu_reset_fops, void radeon_debugfs_fence_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_gpu_reset", 0444, root, rdev, &radeon_debugfs_gpu_reset_fops); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index e66a230331ee..9735f4968b86 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -88,7 +88,7 @@ static void radeon_gem_object_free(struct drm_gem_object *gobj) if (robj) { radeon_mn_unregister(robj); - radeon_bo_unref(&robj); + ttm_bo_put(&robj->tbo); } } @@ -899,7 +899,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_gem_info); void radeon_gem_debugfs_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_gem_info", 0444, root, rdev, &radeon_debugfs_gem_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c index 3d174390a8af..1f16619ed06e 100644 --- a/drivers/gpu/drm/radeon/radeon_i2c.c +++ b/drivers/gpu/drm/radeon/radeon_i2c.c @@ -1011,7 +1011,7 @@ void radeon_i2c_add(struct radeon_device *rdev, struct radeon_i2c_bus_rec *rec, const char *name) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); int i; for (i = 0; i < RADEON_MAX_I2C_BUS; i++) { diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 63d914f3414d..1aa41cc3f991 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -309,7 +309,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_sa_info); static void radeon_debugfs_sa_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_sa_info", 0444, root, rdev, &radeon_debugfs_sa_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index c4dda908666c..9961251b44ba 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -80,7 +80,7 @@ static void radeon_hotplug_work_func(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, hotplug_work.work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; @@ -101,7 +101,7 @@ static void radeon_dp_work_func(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, dp_work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; @@ -197,7 +197,7 @@ static void radeon_driver_irq_uninstall_kms(struct drm_device *dev) static int radeon_irq_install(struct radeon_device *rdev, int irq) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); int ret; if (irq == IRQ_NOTCONNECTED) @@ -218,7 +218,7 @@ static int radeon_irq_install(struct radeon_device *rdev, int irq) static void radeon_irq_uninstall(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct pci_dev *pdev = to_pci_dev(dev->dev); radeon_driver_irq_uninstall_kms(dev); @@ -322,9 +322,9 @@ int radeon_irq_kms_init(struct radeon_device *rdev) spin_lock_init(&rdev->irq.lock); /* Disable vblank irqs aggressively for power-saving */ - rdev->ddev->vblank_disable_immediate = true; + rdev_to_drm(rdev)->vblank_disable_immediate = true; - r = drm_vblank_init(rdev->ddev, rdev->num_crtc); + r = drm_vblank_init(rdev_to_drm(rdev), rdev->num_crtc); if (r) { return r; } diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index a16590c6247f..645e33bf7947 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -104,15 +104,9 @@ done_free: int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) { struct pci_dev *pdev = to_pci_dev(dev->dev); - struct radeon_device *rdev; + struct radeon_device *rdev = dev->dev_private; int r, acpi_status; - rdev = kzalloc(sizeof(struct radeon_device), GFP_KERNEL); - if (rdev == NULL) { - return -ENOMEM; - } - dev->dev_private = (void *)rdev; - #ifdef __alpha__ rdev->hose = pdev->sysdata; #endif diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index e0a5af180801..421c83fc70dc 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -39,6 +39,7 @@ #include struct edid; +struct drm_edid; struct radeon_bo; struct radeon_device; @@ -262,8 +263,7 @@ struct radeon_mode_info { /* Output CSC */ struct drm_property *output_csc_property; /* hardcoded DFP edid from BIOS */ - struct edid *bios_hardcoded_edid; - int bios_hardcoded_edid_size; + const struct drm_edid *bios_hardcoded_edid; /* firmware flags */ u16 firmware_flags; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index a955f8a2f7fe..d0e4b43d155c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -150,7 +150,7 @@ int radeon_bo_create(struct radeon_device *rdev, bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL); if (bo == NULL) return -ENOMEM; - drm_gem_private_object_init(rdev->ddev, &bo->tbo.base, size); + drm_gem_private_object_init(rdev_to_drm(rdev), &bo->tbo.base, size); bo->rdev = rdev; bo->surface_reg = -1; INIT_LIST_HEAD(&bo->list); @@ -256,18 +256,15 @@ struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo) if (bo == NULL) return NULL; - ttm_bo_get(&bo->tbo); + drm_gem_object_get(&bo->tbo.base); return bo; } void radeon_bo_unref(struct radeon_bo **bo) { - struct ttm_buffer_object *tbo; - if ((*bo) == NULL) return; - tbo = &((*bo)->tbo); - ttm_bo_put(tbo); + drm_gem_object_put(&(*bo)->tbo.base); *bo = NULL; } diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 2d9d9f46f243..b4fb7e70320b 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -282,7 +282,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) if (rdev->irq.installed) { i = 0; - drm_for_each_crtc(crtc, rdev->ddev) { + drm_for_each_crtc(crtc, rdev_to_drm(rdev)) { if (rdev->pm.active_crtcs & (1 << i)) { /* This can fail if a modeset is in progress */ if (drm_crtc_vblank_get(crtc) == 0) @@ -299,7 +299,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) if (rdev->irq.installed) { i = 0; - drm_for_each_crtc(crtc, rdev->ddev) { + drm_for_each_crtc(crtc, rdev_to_drm(rdev)) { if (rdev->pm.req_vblank & (1 << i)) { rdev->pm.req_vblank &= ~(1 << i); drm_crtc_vblank_put(crtc); @@ -671,7 +671,7 @@ static ssize_t radeon_hwmon_show_temp(struct device *dev, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); int temp; /* Can't get temperature when the card is off */ @@ -715,7 +715,7 @@ static ssize_t radeon_hwmon_show_sclk(struct device *dev, struct device_attribute *attr, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); u32 sclk = 0; /* Can't get clock frequency when the card is off */ @@ -740,7 +740,7 @@ static ssize_t radeon_hwmon_show_vddc(struct device *dev, struct device_attribute *attr, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); u16 vddc = 0; /* Can't get vddc when the card is off */ @@ -1692,7 +1692,7 @@ void radeon_pm_fini(struct radeon_device *rdev) static void radeon_pm_compute_clocks_old(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; @@ -1765,7 +1765,7 @@ static void radeon_pm_compute_clocks_old(struct radeon_device *rdev) static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; struct radeon_connector *radeon_connector; @@ -1826,7 +1826,7 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev) */ for (crtc = 0; (crtc < rdev->num_crtc) && in_vbl; crtc++) { if (rdev->pm.active_crtcs & (1 << crtc)) { - vbl_status = radeon_get_crtc_scanoutpos(rdev->ddev, + vbl_status = radeon_get_crtc_scanoutpos(rdev_to_drm(rdev), crtc, USE_REAL_VBLANKSTART, &vpos, &hpos, NULL, NULL, @@ -1918,7 +1918,7 @@ static void radeon_dynpm_idle_work_handler(struct work_struct *work) static int radeon_debugfs_pm_info_show(struct seq_file *m, void *unused) { struct radeon_device *rdev = m->private; - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); if ((rdev->flags & RADEON_IS_PX) && (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { @@ -1955,7 +1955,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_pm_info); static void radeon_debugfs_pm_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_pm_info", 0444, root, rdev, &radeon_debugfs_pm_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 8d1d458286a8..581ae20c46e4 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -550,7 +550,7 @@ static void radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_r { #if defined(CONFIG_DEBUG_FS) const char *ring_name = radeon_debugfs_ring_idx_to_name(ring->idx); - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; if (ring_name) debugfs_create_file(ring_name, 0444, root, ring, diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 5c65b6dfb99a..69d0c12fa419 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -682,8 +682,8 @@ int radeon_ttm_init(struct radeon_device *rdev) /* No others user of address space so set it to 0 */ r = ttm_device_init(&rdev->mman.bdev, &radeon_bo_driver, rdev->dev, - rdev->ddev->anon_inode->i_mapping, - rdev->ddev->vma_offset_manager, + rdev_to_drm(rdev)->anon_inode->i_mapping, + rdev_to_drm(rdev)->vma_offset_manager, rdev->need_swiotlb, dma_addressing_limited(&rdev->pdev->dev)); if (r) { @@ -890,7 +890,7 @@ static const struct file_operations radeon_ttm_gtt_fops = { static void radeon_ttm_debugfs_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct drm_minor *minor = rdev->ddev->primary; + struct drm_minor *minor = rdev_to_drm(rdev)->primary; struct dentry *root = minor->debugfs_root; debugfs_create_file("radeon_vram", 0444, root, rdev, diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index d4d1501e6576..d6c18fd740ec 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -379,7 +379,7 @@ DEFINE_SHOW_ATTRIBUTE(rs400_debugfs_gart_info); static void rs400_debugfs_pcie_gart_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rs400_gart_info", 0444, root, rdev, &rs400_debugfs_gart_info_fops); @@ -474,7 +474,7 @@ int rs400_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r300_clock_startup(rdev); /* Initialize surface registers */ @@ -552,7 +552,7 @@ int rs400_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs400_mc_init(rdev); /* Fence driver */ diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 5c162778899b..88c8e91ea651 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -321,7 +321,7 @@ void rs600_pm_misc(struct radeon_device *rdev) void rs600_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -339,7 +339,7 @@ void rs600_pm_prepare(struct radeon_device *rdev) void rs600_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -408,7 +408,7 @@ void rs600_hpd_set_polarity(struct radeon_device *rdev, void rs600_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -435,7 +435,7 @@ void rs600_hpd_init(struct radeon_device *rdev) void rs600_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -797,7 +797,7 @@ int rs600_irq_process(struct radeon_device *rdev) /* Vertical blank interrupts */ if (G_007EDC_LB_D1_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -806,7 +806,7 @@ int rs600_irq_process(struct radeon_device *rdev) } if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -1133,7 +1133,7 @@ int rs600_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs600_mc_init(rdev); r100_debugfs_rbbm_init(rdev); diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 14fb0819b8c1..016eb4992803 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -845,7 +845,7 @@ int rs690_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs690_mc_init(rdev); rv515_debugfs(rdev); diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index bbc6ccabf788..1b4dfb645585 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -255,7 +255,7 @@ DEFINE_SHOW_ATTRIBUTE(rv515_debugfs_ga_info); void rv515_debugfs(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rv515_pipes_info", 0444, root, rdev, &rv515_debugfs_pipes_info_fops); @@ -636,7 +636,7 @@ int rv515_init(struct radeon_device *rdev) if (radeon_boot_test_post_card(rdev) == false) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 9ce12fa3c356..7d4b0bf59109 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1935,7 +1935,7 @@ int rv770_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize AGP */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 15759c8ca5b7..6c95575ce109 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6277,7 +6277,7 @@ restart_ih: event_name = "vblank"; if (rdev->irq.crtc_vblank_int[crtc_idx]) { - drm_handle_vblank(rdev->ddev, crtc_idx); + drm_handle_vblank(rdev_to_drm(rdev), crtc_idx); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -6839,7 +6839,7 @@ int si_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 285a36601dc9..71a7ce5f2d4c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -42,9 +42,10 @@ * - 1.14 - Update kfd_event_data * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl * - 1.16 - Add contiguous VRAM allocation flag + * - 1.17 - Add SDMA queue creation with target SDMA engine ID */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 16 +#define KFD_IOCTL_MINOR_VERSION 17 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -56,6 +57,7 @@ struct kfd_ioctl_get_version_args { #define KFD_IOC_QUEUE_TYPE_SDMA 0x1 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 #define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 +#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID 0x4 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 @@ -78,6 +80,8 @@ struct kfd_ioctl_create_queue_args { __u64 ctx_save_restore_address; /* to KFD */ __u32 ctx_save_restore_size; /* to KFD */ __u32 ctl_stack_size; /* to KFD */ + __u32 sdma_engine_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_destroy_queue_args {