mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-30 07:34:12 +08:00
habanalabs: send device active message to f/w
As part of the RAS that is done by the f/w, we should send a message to the f/w when a user either acquires or releases the device. Signed-off-by: farah kassabri <fkassabri@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
0855bf8b17
commit
4745b2f0d0
@ -470,6 +470,8 @@ static int hl_device_release(struct inode *inode, struct file *filp)
|
|||||||
hdev->last_open_session_duration_jif =
|
hdev->last_open_session_duration_jif =
|
||||||
jiffies - hdev->last_successful_open_jif;
|
jiffies - hdev->last_successful_open_jif;
|
||||||
|
|
||||||
|
hdev->asic_funcs->send_device_activity(hdev, false);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -454,6 +454,21 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
|
|||||||
size);
|
size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int hl_fw_send_device_activity(struct hl_device *hdev, bool open)
|
||||||
|
{
|
||||||
|
struct cpucp_packet pkt;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
memset(&pkt, 0, sizeof(pkt));
|
||||||
|
pkt.ctl = cpu_to_le32(CPUCP_PACKET_ACTIVE_STATUS_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||||
|
pkt.value = cpu_to_le64(open);
|
||||||
|
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
|
||||||
|
if (rc)
|
||||||
|
dev_err(hdev->dev, "failed to send device activity msg(%u)\n", open);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
int hl_fw_send_heartbeat(struct hl_device *hdev)
|
int hl_fw_send_heartbeat(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
struct cpucp_packet hb_pkt;
|
struct cpucp_packet hb_pkt;
|
||||||
|
@ -1528,6 +1528,7 @@ struct engines_data {
|
|||||||
* @access_dev_mem: access device memory
|
* @access_dev_mem: access device memory
|
||||||
* @set_dram_bar_base: set the base of the DRAM BAR
|
* @set_dram_bar_base: set the base of the DRAM BAR
|
||||||
* @set_engine_cores: set a config command to enigne cores
|
* @set_engine_cores: set a config command to enigne cores
|
||||||
|
* @send_device_activity: indication to FW about device availability
|
||||||
*/
|
*/
|
||||||
struct hl_asic_funcs {
|
struct hl_asic_funcs {
|
||||||
int (*early_init)(struct hl_device *hdev);
|
int (*early_init)(struct hl_device *hdev);
|
||||||
@ -1664,6 +1665,7 @@ struct hl_asic_funcs {
|
|||||||
u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
|
u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
|
||||||
int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids,
|
int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids,
|
||||||
u32 num_cores, u32 core_command);
|
u32 num_cores, u32 core_command);
|
||||||
|
int (*send_device_activity)(struct hl_device *hdev, bool open);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -3715,6 +3717,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
|
|||||||
struct cpucp_hbm_row_info *info);
|
struct cpucp_hbm_row_info *info);
|
||||||
int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num);
|
int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num);
|
||||||
int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid);
|
int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid);
|
||||||
|
int hl_fw_send_device_activity(struct hl_device *hdev, bool open);
|
||||||
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
|
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
|
||||||
bool is_wc[3]);
|
bool is_wc[3]);
|
||||||
int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
|
int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
|
||||||
|
@ -204,6 +204,8 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
|||||||
goto out_err;
|
goto out_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rc = hdev->asic_funcs->send_device_activity(hdev, true);
|
||||||
|
|
||||||
list_add(&hpriv->dev_node, &hdev->fpriv_list);
|
list_add(&hpriv->dev_node, &hdev->fpriv_list);
|
||||||
mutex_unlock(&hdev->fpriv_list_lock);
|
mutex_unlock(&hdev->fpriv_list_lock);
|
||||||
|
|
||||||
|
@ -9132,6 +9132,11 @@ static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group
|
|||||||
dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
|
dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct hl_asic_funcs gaudi_funcs = {
|
static const struct hl_asic_funcs gaudi_funcs = {
|
||||||
.early_init = gaudi_early_init,
|
.early_init = gaudi_early_init,
|
||||||
.early_fini = gaudi_early_fini,
|
.early_fini = gaudi_early_fini,
|
||||||
@ -9224,6 +9229,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
|||||||
.mmu_get_real_page_size = hl_mmu_get_real_page_size,
|
.mmu_get_real_page_size = hl_mmu_get_real_page_size,
|
||||||
.access_dev_mem = hl_access_dev_mem,
|
.access_dev_mem = hl_access_dev_mem,
|
||||||
.set_dram_bar_base = gaudi_set_hbm_bar_base,
|
.set_dram_bar_base = gaudi_set_hbm_bar_base,
|
||||||
|
.send_device_activity = gaudi_send_device_activity,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -10031,6 +10031,17 @@ static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
|
|||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
|
||||||
|
{
|
||||||
|
struct gaudi2_device *gaudi2 = hdev->asic_specific;
|
||||||
|
|
||||||
|
if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) || hdev->fw_major_version < 37)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* TODO: add check for FW version using minor ver once it's known */
|
||||||
|
return hl_fw_send_device_activity(hdev, open);
|
||||||
|
}
|
||||||
|
|
||||||
static const struct hl_asic_funcs gaudi2_funcs = {
|
static const struct hl_asic_funcs gaudi2_funcs = {
|
||||||
.early_init = gaudi2_early_init,
|
.early_init = gaudi2_early_init,
|
||||||
.early_fini = gaudi2_early_fini,
|
.early_fini = gaudi2_early_fini,
|
||||||
@ -10127,6 +10138,7 @@ static const struct hl_asic_funcs gaudi2_funcs = {
|
|||||||
.access_dev_mem = hl_access_dev_mem,
|
.access_dev_mem = hl_access_dev_mem,
|
||||||
.set_dram_bar_base = gaudi2_set_hbm_bar_base,
|
.set_dram_bar_base = gaudi2_set_hbm_bar_base,
|
||||||
.set_engine_cores = gaudi2_set_engine_cores,
|
.set_engine_cores = gaudi2_set_engine_cores,
|
||||||
|
.send_device_activity = gaudi2_send_device_activity,
|
||||||
};
|
};
|
||||||
|
|
||||||
void gaudi2_set_asic_funcs(struct hl_device *hdev)
|
void gaudi2_set_asic_funcs(struct hl_device *hdev)
|
||||||
|
@ -553,5 +553,6 @@ void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32
|
|||||||
u32 offended_addr);
|
u32 offended_addr);
|
||||||
int gaudi2_init_security(struct hl_device *hdev);
|
int gaudi2_init_security(struct hl_device *hdev);
|
||||||
void gaudi2_ack_protection_bits_errors(struct hl_device *hdev);
|
void gaudi2_ack_protection_bits_errors(struct hl_device *hdev);
|
||||||
|
int gaudi2_send_device_activity(struct hl_device *hdev, bool open);
|
||||||
|
|
||||||
#endif /* GAUDI2P_H_ */
|
#endif /* GAUDI2P_H_ */
|
||||||
|
@ -5420,6 +5420,11 @@ static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
|
|||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int goya_send_device_activity(struct hl_device *hdev, bool open)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct hl_asic_funcs goya_funcs = {
|
static const struct hl_asic_funcs goya_funcs = {
|
||||||
.early_init = goya_early_init,
|
.early_init = goya_early_init,
|
||||||
.early_fini = goya_early_fini,
|
.early_fini = goya_early_fini,
|
||||||
@ -5512,6 +5517,7 @@ static const struct hl_asic_funcs goya_funcs = {
|
|||||||
.mmu_get_real_page_size = hl_mmu_get_real_page_size,
|
.mmu_get_real_page_size = hl_mmu_get_real_page_size,
|
||||||
.access_dev_mem = hl_access_dev_mem,
|
.access_dev_mem = hl_access_dev_mem,
|
||||||
.set_dram_bar_base = goya_set_ddr_bar_base,
|
.set_dram_bar_base = goya_set_ddr_bar_base,
|
||||||
|
.send_device_activity = goya_send_device_activity,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -636,6 +636,10 @@ enum pq_init_status {
|
|||||||
* passes the max size it allows the CpuCP to write to the structure, to prevent
|
* passes the max size it allows the CpuCP to write to the structure, to prevent
|
||||||
* data corruption in case of mismatched driver/FW versions.
|
* data corruption in case of mismatched driver/FW versions.
|
||||||
* Relevant only to Gaudi.
|
* Relevant only to Gaudi.
|
||||||
|
*
|
||||||
|
* CPUCP_PACKET_ACTIVE_STATUS_SET -
|
||||||
|
* LKD sends FW indication whether device is free or in use, this indication is reported
|
||||||
|
* also to the BMC.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
enum cpucp_packet_id {
|
enum cpucp_packet_id {
|
||||||
@ -691,6 +695,13 @@ enum cpucp_packet_id {
|
|||||||
CPUCP_PACKET_RESERVED4, /* not used */
|
CPUCP_PACKET_RESERVED4, /* not used */
|
||||||
CPUCP_PACKET_RESERVED5, /* not used */
|
CPUCP_PACKET_RESERVED5, /* not used */
|
||||||
CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */
|
CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */
|
||||||
|
CPUCP_PACKET_RESERVED6, /* not used */
|
||||||
|
CPUCP_PACKET_RESERVED7, /* not used */
|
||||||
|
CPUCP_PACKET_RESERVED8, /* not used */
|
||||||
|
CPUCP_PACKET_RESERVED9, /* not used */
|
||||||
|
CPUCP_PACKET_RESERVED10, /* not used */
|
||||||
|
CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */
|
||||||
|
CPUCP_PACKET_ID_MAX /* must be last */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
|
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
|
||||||
|
Loading…
Reference in New Issue
Block a user