mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-23 14:24:25 +08:00
accel/habanalabs: fix EQ heartbeat mechanism
Stop rescheduling another heartbeat check when EQ heartbeat check fails as it generates confusing logs in dmesg that the heartbeat fails. Signed-off-by: Farah Kassabri <fkassabri@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
42422993cf
commit
d1958dce5a
@ -1044,20 +1044,21 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
|
||||
return (vendor_id == PCI_VENDOR_ID_HABANALABS);
|
||||
}
|
||||
|
||||
static void hl_device_eq_heartbeat(struct hl_device *hdev)
|
||||
static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
|
||||
{
|
||||
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
|
||||
if (!prop->cpucp_info.eq_health_check_supported)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
if (hdev->eq_heartbeat_received) {
|
||||
hdev->eq_heartbeat_received = false;
|
||||
} else {
|
||||
dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
|
||||
hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hl_device_heartbeat(struct work_struct *work)
|
||||
@ -1074,10 +1075,9 @@ static void hl_device_heartbeat(struct work_struct *work)
|
||||
/*
|
||||
* For EQ health check need to check if driver received the heartbeat eq event
|
||||
* in order to validate the eq is working.
|
||||
* Only if both the EQ is healthy and we managed to send the next heartbeat reschedule.
|
||||
*/
|
||||
hl_device_eq_heartbeat(hdev);
|
||||
|
||||
if (!hdev->asic_funcs->send_heartbeat(hdev))
|
||||
if ((!hl_device_eq_heartbeat_check(hdev)) && (!hdev->asic_funcs->send_heartbeat(hdev)))
|
||||
goto reschedule;
|
||||
|
||||
if (hl_device_operational(hdev, NULL))
|
||||
|
Loading…
Reference in New Issue
Block a user