This tag contains the following fixes for 5.15-rc3:

- Fix potential race when user waiting for interrupt ioctl
 - Prevent possible kernel oops in staged CS ioctl
 - Use direct MSI mechanism in Gaudi as a WA for a H/W issue
   regarding FLR
 - Don't support collective wait ioctl operation when it
   is not supported. e.g. when the NIC ports are disabled
 - Fix configuration of one of the security mechanism.
 - Change error print to be rate-limited as it can be initiated
   by the user and spam the kernel log
 - Fix return value of CS ioctl when doing staged CS
 - Fix CS ioctl code when user doesn't supply an offset for
   the memory area that we use as fence.
 - Spelling mistake fix
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCgAdFiEE7TEboABC71LctBLFZR1NuKta54AFAmFG7bAACgkQZR1NuKta
 54D4vQf/T477hf2MKKIHdEyCUs601smNqK9VcEm5SNRUiqRo3cEyjMPY/OUqINga
 6jcvWGsnxv1COL0EF3TxHfTN12Lzdli2emCk7h2ErADVB5UqZmS7uYEh/20M67nX
 KPDa5fm9Ah0SjKmocq114xqjJvUkOryB8+x2T8BCQXmuS5jBGoe0vuLKtZKcPtCp
 y+ErR6X8srBv+U1Lb2b1u3JAAsgOg2L7BixaBPnWp/hjqNe+RW+WAEqfWUj2AUfl
 yZNKuI9DRcKOjqoSqwCG8CxpwsZthiYs3a1wYTE8GpNXGeXxDnS02sbvPcudEQAc
 seOkIAbsBNgNSYc110BQSVzZErcdtw==
 =Rzaf
 -----END PGP SIGNATURE-----

Merge tag 'misc-habanalabs-fixes-2021-09-19' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-linus

Oded writes:

This tag contains the following fixes for 5.15-rc3:

- Fix potential race when user waiting for interrupt ioctl
- Prevent possible kernel oops in staged CS ioctl
- Use direct MSI mechanism in Gaudi as a WA for a H/W issue
  regarding FLR
- Don't support collective wait ioctl operation when it
  is not supported. e.g. when the NIC ports are disabled
- Fix configuration of one of the security mechanism.
- Change error print to be rate-limited as it can be initiated
  by the user and spam the kernel log
- Fix return value of CS ioctl when doing staged CS
- Fix CS ioctl code when user doesn't supply an offset for
  the memory area that we use as fence.
- Spelling mistake fix

* tag 'misc-habanalabs-fixes-2021-09-19' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux:
  habanalabs: expose a single cs seq in staged submissions
  habanalabs: fix wait offset handling
  habanalabs: rate limit multi CS completion errors
  habanalabs/gaudi: fix LBW RR configuration
  habanalabs: Fix spelling mistake "FEADBACK" -> "FEEDBACK"
  habanalabs: fail collective wait when not supported
  habanalabs/gaudi: use direct MSI in single mode
  habanalabs: fix kernel OOPs related to staged cs
  habanalabs: fix potential race in interrupt wait ioctl
This commit is contained in:
Greg Kroah-Hartman 2021-09-20 09:02:36 +02:00
commit 3e1d5b0f58
5 changed files with 134 additions and 74 deletions

View File

@ -405,7 +405,7 @@ static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
{
bool next_entry_found = false;
struct hl_cs *next;
struct hl_cs *next, *first_cs;
if (!cs_needs_timeout(cs))
return;
@ -415,9 +415,16 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
/* We need to handle tdr only once for the complete staged submission.
* Hence, we choose the CS that reaches this function first which is
* the CS marked as 'staged_last'.
* In case single staged cs was submitted which has both first and last
* indications, then "cs_find_first" below will return NULL, since we
* removed the cs node from the list before getting here,
* in such cases just continue with the cs to cancel it's TDR work.
*/
if (cs->staged_cs && cs->staged_last)
cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
if (cs->staged_cs && cs->staged_last) {
first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
if (first_cs)
cs = first_cs;
}
spin_unlock(&hdev->cs_mirror_lock);
@ -1288,6 +1295,12 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
if (rc)
goto free_cs_object;
/* If this is a staged submission we must return the staged sequence
* rather than the internal CS sequence
*/
if (cs->staged_cs)
*cs_seq = cs->staged_sequence;
/* Validate ALL the CS chunks before submitting the CS */
for (i = 0 ; i < num_chunks ; i++) {
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
@ -1988,6 +2001,15 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
goto free_cs_chunk_array;
}
if (!hdev->nic_ports_mask) {
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Collective operations not supported when NIC ports are disabled");
rc = -EINVAL;
goto free_cs_chunk_array;
}
collective_engine_id = chunk->collective_engine_id;
}
@ -2026,9 +2048,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
spin_unlock(&ctx->sig_mgr.lock);
if (!handle_found) {
dev_err(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
/* treat as signal CS already finished */
dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
signal_seq);
rc = -EINVAL;
rc = 0;
goto free_cs_chunk_array;
}
@ -2613,7 +2636,8 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
* completed after the poll function.
*/
if (!mcs_data.completion_bitmap) {
dev_err(hdev->dev, "Multi-CS got completion on wait but no CS completed\n");
dev_warn_ratelimited(hdev->dev,
"Multi-CS got completion on wait but no CS completed\n");
rc = -EFAULT;
}
}
@ -2740,10 +2764,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
else
interrupt = &hdev->user_interrupt[interrupt_offset];
/* Add pending user interrupt to relevant list for the interrupt
* handler to monitor
*/
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
/* We check for completion value as interrupt could have been received
* before we added the node to the wait list
*/
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT;
goto free_fence;
goto remove_pending_user_interrupt;
}
if (completion_value >= target_value)
@ -2752,14 +2786,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
*status = CS_WAIT_STATUS_BUSY;
if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
goto free_fence;
/* Add pending user interrupt to relevant list for the interrupt
* handler to monitor
*/
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
goto remove_pending_user_interrupt;
wait_again:
/* Wait for interrupt handler to signal completion */
@ -2770,6 +2797,15 @@ wait_again:
* If comparison fails, keep waiting until timeout expires
*/
if (completion_rc > 0) {
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
/* reinit_completion must be called before we check for user
* completion value, otherwise, if interrupt is received after
* the comparison and before the next wait_for_completion,
* we will reach timeout and fail
*/
reinit_completion(&pend->fence.completion);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT;
@ -2780,11 +2816,7 @@ wait_again:
if (completion_value >= target_value) {
*status = CS_WAIT_STATUS_COMPLETED;
} else {
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
reinit_completion(&pend->fence.completion);
timeout = completion_rc;
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
goto wait_again;
}
} else if (completion_rc == -ERESTARTSYS) {
@ -2802,7 +2834,6 @@ remove_pending_user_interrupt:
list_del(&pend->wait_list_node);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
free_fence:
kfree(pend);
hl_ctx_put(ctx);

View File

@ -437,6 +437,7 @@ void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
struct hl_cs_compl *cs_cmpl)
{
struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
u32 offset = 0;
cs_cmpl->hw_sob = handle->hw_sob;
@ -446,9 +447,13 @@ void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
* set offset 1 for example he mean to wait only for the first
* signal only, which will be pre_sob_val, and if he set offset 2
* then the value required is (pre_sob_val + 1) and so on...
* if user set wait offset to 0, then treat it as legacy wait cs,
* wait for the next signal.
*/
cs_cmpl->sob_val = handle->pre_sob_val +
(job->encaps_sig_wait_offset - 1);
if (job->encaps_sig_wait_offset)
offset = job->encaps_sig_wait_offset - 1;
cs_cmpl->sob_val = handle->pre_sob_val + offset;
}
static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,

View File

@ -395,7 +395,7 @@ static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
{ .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
@ -5802,6 +5802,7 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct packet_msg_prot *cq_pkt;
u64 msi_addr;
u32 tmp;
cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
@ -5823,10 +5824,12 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
cq_pkt->ctl = cpu_to_le32(tmp);
cq_pkt->value = cpu_to_le32(1);
if (!gaudi->multi_msi_mode)
msi_vec = 0;
if (gaudi->multi_msi_mode)
msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
else
msi_addr = mmPCIE_CORE_MSI_REQ;
cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
}
static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)

View File

@ -8,16 +8,21 @@
#include "gaudiP.h"
#include "../include/gaudi/asic_reg/gaudi_regs.h"
#define GAUDI_NUMBER_OF_RR_REGS 24
#define GAUDI_NUMBER_OF_LBW_RANGES 12
#define GAUDI_NUMBER_OF_LBW_RR_REGS 28
#define GAUDI_NUMBER_OF_HBW_RR_REGS 24
#define GAUDI_NUMBER_OF_LBW_RANGES 10
static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
mmDMA_IF_W_S_SOB_HIT_WPROT,
mmDMA_IF_W_S_DMA0_HIT_WPROT,
mmDMA_IF_W_S_DMA1_HIT_WPROT,
mmDMA_IF_E_S_SOB_HIT_WPROT,
mmDMA_IF_E_S_DMA0_HIT_WPROT,
mmDMA_IF_E_S_DMA1_HIT_WPROT,
mmDMA_IF_W_N_SOB_HIT_WPROT,
mmDMA_IF_W_N_DMA0_HIT_WPROT,
mmDMA_IF_W_N_DMA1_HIT_WPROT,
mmDMA_IF_E_N_SOB_HIT_WPROT,
mmDMA_IF_E_N_DMA0_HIT_WPROT,
mmDMA_IF_E_N_DMA1_HIT_WPROT,
mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AW,
@ -38,13 +43,17 @@ static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AW,
};
static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
mmDMA_IF_W_S_SOB_HIT_RPROT,
mmDMA_IF_W_S_DMA0_HIT_RPROT,
mmDMA_IF_W_S_DMA1_HIT_RPROT,
mmDMA_IF_E_S_SOB_HIT_RPROT,
mmDMA_IF_E_S_DMA0_HIT_RPROT,
mmDMA_IF_E_S_DMA1_HIT_RPROT,
mmDMA_IF_W_N_SOB_HIT_RPROT,
mmDMA_IF_W_N_DMA0_HIT_RPROT,
mmDMA_IF_W_N_DMA1_HIT_RPROT,
mmDMA_IF_E_N_SOB_HIT_RPROT,
mmDMA_IF_E_N_DMA0_HIT_RPROT,
mmDMA_IF_E_N_DMA1_HIT_RPROT,
mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AR,
@ -65,13 +74,17 @@ static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AR,
};
static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
mmDMA_IF_W_S_SOB_MIN_WPROT_0,
mmDMA_IF_W_S_DMA0_MIN_WPROT_0,
mmDMA_IF_W_S_DMA1_MIN_WPROT_0,
mmDMA_IF_E_S_SOB_MIN_WPROT_0,
mmDMA_IF_E_S_DMA0_MIN_WPROT_0,
mmDMA_IF_E_S_DMA1_MIN_WPROT_0,
mmDMA_IF_W_N_SOB_MIN_WPROT_0,
mmDMA_IF_W_N_DMA0_MIN_WPROT_0,
mmDMA_IF_W_N_DMA1_MIN_WPROT_0,
mmDMA_IF_E_N_SOB_MIN_WPROT_0,
mmDMA_IF_E_N_DMA0_MIN_WPROT_0,
mmDMA_IF_E_N_DMA1_MIN_WPROT_0,
mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AW_0,
@ -92,13 +105,17 @@ static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AW_0,
};
static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
mmDMA_IF_W_S_SOB_MAX_WPROT_0,
mmDMA_IF_W_S_DMA0_MAX_WPROT_0,
mmDMA_IF_W_S_DMA1_MAX_WPROT_0,
mmDMA_IF_E_S_SOB_MAX_WPROT_0,
mmDMA_IF_E_S_DMA0_MAX_WPROT_0,
mmDMA_IF_E_S_DMA1_MAX_WPROT_0,
mmDMA_IF_W_N_SOB_MAX_WPROT_0,
mmDMA_IF_W_N_DMA0_MAX_WPROT_0,
mmDMA_IF_W_N_DMA1_MAX_WPROT_0,
mmDMA_IF_E_N_SOB_MAX_WPROT_0,
mmDMA_IF_E_N_DMA0_MAX_WPROT_0,
mmDMA_IF_E_N_DMA1_MAX_WPROT_0,
mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AW_0,
@ -119,13 +136,17 @@ static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AW_0,
};
static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
mmDMA_IF_W_S_SOB_MIN_RPROT_0,
mmDMA_IF_W_S_DMA0_MIN_RPROT_0,
mmDMA_IF_W_S_DMA1_MIN_RPROT_0,
mmDMA_IF_E_S_SOB_MIN_RPROT_0,
mmDMA_IF_E_S_DMA0_MIN_RPROT_0,
mmDMA_IF_E_S_DMA1_MIN_RPROT_0,
mmDMA_IF_W_N_SOB_MIN_RPROT_0,
mmDMA_IF_W_N_DMA0_MIN_RPROT_0,
mmDMA_IF_W_N_DMA1_MIN_RPROT_0,
mmDMA_IF_E_N_SOB_MIN_RPROT_0,
mmDMA_IF_E_N_DMA0_MIN_RPROT_0,
mmDMA_IF_E_N_DMA1_MIN_RPROT_0,
mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AR_0,
@ -146,13 +167,17 @@ static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AR_0,
};
static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
mmDMA_IF_W_S_SOB_MAX_RPROT_0,
mmDMA_IF_W_S_DMA0_MAX_RPROT_0,
mmDMA_IF_W_S_DMA1_MAX_RPROT_0,
mmDMA_IF_E_S_SOB_MAX_RPROT_0,
mmDMA_IF_E_S_DMA0_MAX_RPROT_0,
mmDMA_IF_E_S_DMA1_MAX_RPROT_0,
mmDMA_IF_W_N_SOB_MAX_RPROT_0,
mmDMA_IF_W_N_DMA0_MAX_RPROT_0,
mmDMA_IF_W_N_DMA1_MAX_RPROT_0,
mmDMA_IF_E_N_SOB_MAX_RPROT_0,
mmDMA_IF_E_N_DMA0_MAX_RPROT_0,
mmDMA_IF_E_N_DMA1_MAX_RPROT_0,
mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AR_0,
@ -173,7 +198,7 @@ static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AR_0,
};
static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AW,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AW,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AW,
@ -200,7 +225,7 @@ static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AW
};
static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AR,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AR,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AR,
@ -227,7 +252,7 @@ static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AR
};
static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AW_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0,
@ -254,7 +279,7 @@ static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AW_0
};
static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AW_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0,
@ -281,7 +306,7 @@ static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AW_0
};
static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AW_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0,
@ -308,7 +333,7 @@ static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AW_0
};
static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AW_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0,
@ -335,7 +360,7 @@ static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_HIGH_AW_0
};
static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AR_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0,
@ -362,7 +387,7 @@ static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AR_0
};
static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AR_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0,
@ -389,7 +414,7 @@ static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AR_0
};
static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AR_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0,
@ -416,7 +441,7 @@ static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AR_0
};
static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AR_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0,
@ -12849,50 +12874,44 @@ static void gaudi_init_range_registers_lbw(struct hl_device *hdev)
u32 lbw_rng_end[GAUDI_NUMBER_OF_LBW_RANGES];
int i, j;
lbw_rng_start[0] = (0xFBFE0000 & 0x3FFFFFF) - 1;
lbw_rng_end[0] = (0xFBFFF000 & 0x3FFFFFF) + 1;
lbw_rng_start[0] = (0xFC0E8000 & 0x3FFFFFF) - 1; /* 0x000E7FFF */
lbw_rng_end[0] = (0xFC11FFFF & 0x3FFFFFF) + 1; /* 0x00120000 */
lbw_rng_start[1] = (0xFC0E8000 & 0x3FFFFFF) - 1;
lbw_rng_end[1] = (0xFC120000 & 0x3FFFFFF) + 1;
lbw_rng_start[1] = (0xFC1E8000 & 0x3FFFFFF) - 1; /* 0x001E7FFF */
lbw_rng_end[1] = (0xFC48FFFF & 0x3FFFFFF) + 1; /* 0x00490000 */
lbw_rng_start[2] = (0xFC1E8000 & 0x3FFFFFF) - 1;
lbw_rng_end[2] = (0xFC48FFFF & 0x3FFFFFF) + 1;
lbw_rng_start[2] = (0xFC600000 & 0x3FFFFFF) - 1; /* 0x005FFFFF */
lbw_rng_end[2] = (0xFCC48FFF & 0x3FFFFFF) + 1; /* 0x00C49000 */
lbw_rng_start[3] = (0xFC600000 & 0x3FFFFFF) - 1;
lbw_rng_end[3] = (0xFCC48FFF & 0x3FFFFFF) + 1;
lbw_rng_start[3] = (0xFCC4A000 & 0x3FFFFFF) - 1; /* 0x00C49FFF */
lbw_rng_end[3] = (0xFCCDFFFF & 0x3FFFFFF) + 1; /* 0x00CE0000 */
lbw_rng_start[4] = (0xFCC4A000 & 0x3FFFFFF) - 1;
lbw_rng_end[4] = (0xFCCDFFFF & 0x3FFFFFF) + 1;
lbw_rng_start[4] = (0xFCCE4000 & 0x3FFFFFF) - 1; /* 0x00CE3FFF */
lbw_rng_end[4] = (0xFCD1FFFF & 0x3FFFFFF) + 1; /* 0x00D20000 */
lbw_rng_start[5] = (0xFCCE4000 & 0x3FFFFFF) - 1;
lbw_rng_end[5] = (0xFCD1FFFF & 0x3FFFFFF) + 1;
lbw_rng_start[5] = (0xFCD24000 & 0x3FFFFFF) - 1; /* 0x00D23FFF */
lbw_rng_end[5] = (0xFCD5FFFF & 0x3FFFFFF) + 1; /* 0x00D60000 */
lbw_rng_start[6] = (0xFCD24000 & 0x3FFFFFF) - 1;
lbw_rng_end[6] = (0xFCD5FFFF & 0x3FFFFFF) + 1;
lbw_rng_start[6] = (0xFCD64000 & 0x3FFFFFF) - 1; /* 0x00D63FFF */
lbw_rng_end[6] = (0xFCD9FFFF & 0x3FFFFFF) + 1; /* 0x00DA0000 */
lbw_rng_start[7] = (0xFCD64000 & 0x3FFFFFF) - 1;
lbw_rng_end[7] = (0xFCD9FFFF & 0x3FFFFFF) + 1;
lbw_rng_start[7] = (0xFCDA4000 & 0x3FFFFFF) - 1; /* 0x00DA3FFF */
lbw_rng_end[7] = (0xFCDDFFFF & 0x3FFFFFF) + 1; /* 0x00DE0000 */
lbw_rng_start[8] = (0xFCDA4000 & 0x3FFFFFF) - 1;
lbw_rng_end[8] = (0xFCDDFFFF & 0x3FFFFFF) + 1;
lbw_rng_start[8] = (0xFCDE4000 & 0x3FFFFFF) - 1; /* 0x00DE3FFF */
lbw_rng_end[8] = (0xFCE05FFF & 0x3FFFFFF) + 1; /* 0x00E06000 */
lbw_rng_start[9] = (0xFCDE4000 & 0x3FFFFFF) - 1;
lbw_rng_end[9] = (0xFCE05FFF & 0x3FFFFFF) + 1;
lbw_rng_start[9] = (0xFCFC9000 & 0x3FFFFFF) - 1; /* 0x00FC8FFF */
lbw_rng_end[9] = (0xFFFFFFFE & 0x3FFFFFF) + 1; /* 0x03FFFFFF */
lbw_rng_start[10] = (0xFEC43000 & 0x3FFFFFF) - 1;
lbw_rng_end[10] = (0xFEC43FFF & 0x3FFFFFF) + 1;
lbw_rng_start[11] = (0xFE484000 & 0x3FFFFFF) - 1;
lbw_rng_end[11] = (0xFE484FFF & 0x3FFFFFF) + 1;
for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++) {
WREG32(gaudi_rr_lbw_hit_aw_regs[i],
(1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1);
WREG32(gaudi_rr_lbw_hit_ar_regs[i],
(1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1);
}
for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++)
for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++)
for (j = 0 ; j < GAUDI_NUMBER_OF_LBW_RANGES ; j++) {
WREG32(gaudi_rr_lbw_min_aw_regs[i] + (j << 2),
lbw_rng_start[j]);
@ -12939,12 +12958,12 @@ static void gaudi_init_range_registers_hbw(struct hl_device *hdev)
* 6th range is the host
*/
for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) {
WREG32(gaudi_rr_hbw_hit_aw_regs[i], 0x1F);
WREG32(gaudi_rr_hbw_hit_ar_regs[i], 0x1D);
}
for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) {
WREG32(gaudi_rr_hbw_base_low_aw_regs[i], dram_addr_lo);
WREG32(gaudi_rr_hbw_base_low_ar_regs[i], dram_addr_lo);

View File

@ -308,6 +308,8 @@
#define mmPCIE_AUX_FLR_CTRL 0xC07394
#define mmPCIE_AUX_DBI 0xC07490
#define mmPCIE_CORE_MSI_REQ 0xC04100
#define mmPSOC_PCI_PLL_NR 0xC72100
#define mmSRAM_W_PLL_NR 0x4C8100
#define mmPSOC_HBM_PLL_NR 0xC74100