mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-26 05:34:13 +08:00
habanalabs: fail reset if device is not idle
After any reset (soft or hard) the device (the engines/QMANs) should be idle. If they are not idle, fail the reset. If it is soft-reset, the driver will try to do hard-reset automatically. If it is hard-reset, the driver will make the device non-operational. Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
35862d1c99
commit
23c3efd1fb
@ -71,21 +71,8 @@ static void hpriv_release(struct kref *ref)
|
||||
|
||||
kfree(hpriv);
|
||||
|
||||
if (hdev->reset_upon_device_release) {
|
||||
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
|
||||
|
||||
/* We try soft reset first */
|
||||
if (hdev->reset_upon_device_release)
|
||||
hl_device_reset(hdev, false, false);
|
||||
|
||||
/* If device is not idle perform hard reset */
|
||||
if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
|
||||
HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
|
||||
dev_info(hdev->dev,
|
||||
"device is not idle (mask %#llx %#llx) after soft reset, performing hard reset",
|
||||
idle_mask[0], idle_mask[1]);
|
||||
hl_device_reset(hdev, true, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void hl_hpriv_get(struct hl_fpriv *hpriv)
|
||||
@ -948,6 +935,7 @@ static void device_disable_open_processes(struct hl_device *hdev)
|
||||
int hl_device_reset(struct hl_device *hdev, bool hard_reset,
|
||||
bool from_hard_reset_thread)
|
||||
{
|
||||
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
|
||||
int i, rc;
|
||||
|
||||
if (!hdev->init_done) {
|
||||
@ -1167,6 +1155,16 @@ kill_processes:
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* If device is not idle fail the reset process */
|
||||
if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
|
||||
HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
|
||||
dev_err(hdev->dev,
|
||||
"device is not idle (mask %#llx %#llx) after reset\n",
|
||||
idle_mask[0], idle_mask[1]);
|
||||
rc = -EIO;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* Check that the communication with the device is working */
|
||||
rc = hdev->asic_funcs->test_queues(hdev);
|
||||
if (rc) {
|
||||
|
Loading…
Reference in New Issue
Block a user