drm/amdkfd: Update SMI throttle event bitmask

Update Arcturus/Aldebaran thermal throttle SMI event path to use
ASIC-independent throttler bits when logging.

Signed-off-by: Graham Sider <Graham.Sider@amd.com>
Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Graham Sider 2021-07-06 17:46:37 -04:00 committed by Alex Deucher
parent e25515e22b
commit 410e302ea5
6 changed files with 15 additions and 11 deletions

View File

@ -332,7 +332,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd);
int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask);
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
#else
static inline int kgd2kfd_init(void)
{
@ -391,7 +391,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
}
static inline
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
}
#endif

View File

@ -1369,7 +1369,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
WARN_ONCE(count < 0, "Compute profile ref. count error");
}
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
if (kfd && kfd->init_complete)
kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);

View File

@ -205,23 +205,23 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
}
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint32_t throttle_bitmask)
uint64_t throttle_bitmask)
{
struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
/*
* ThermalThrottle msg = throttle_bitmask(8):
* thermal_interrupt_count(16):
* 1 byte event + 1 byte space + 8 byte throttle_bitmask +
* 1 byte event + 1 byte space + 16 byte throttle_bitmask +
* 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
* 1 byte \0 = 29
* 1 byte \0 = 37
*/
char fifo_in[29];
char fifo_in[37];
int len;
if (list_empty(&dev->smi_clients))
return;
len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n",
len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
atomic64_read(&adev->smu.throttle_int_counter));

View File

@ -26,7 +26,7 @@
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint32_t throttle_bitmask);
uint64_t throttle_bitmask);
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
#endif

View File

@ -2178,7 +2178,9 @@ static void arcturus_log_thermal_throttling_event(struct smu_context *smu)
dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n",
log_buf);
kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status);
kgd2kfd_smi_event_throttle(smu->adev->kfd.dev,
smu_cmn_get_indep_throttler_status(throttler_status,
arcturus_throttler_map));
}
static uint16_t arcturus_get_current_pcie_link_speed(struct smu_context *smu)

View File

@ -1653,7 +1653,9 @@ static void aldebaran_log_thermal_throttling_event(struct smu_context *smu)
dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n",
log_buf);
kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status);
kgd2kfd_smi_event_throttle(smu->adev->kfd.dev,
smu_cmn_get_indep_throttler_status(throttler_status,
aldebaran_throttler_map));
}
static int aldebaran_get_current_pcie_link_speed(struct smu_context *smu)