2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-25 05:34:00 +08:00

This tag contains habanalabs driver changes for v5.14:

- Change communication protocol with f/w. The new protocl allows better
   backward compatibility between different f/w versions and is more
   stable.
 - Send hard-reset cause to f/w after a hard-reset has happened.
 - Move to indirection when generating interrupts to f/w.
 - Better progress and error messages during the f/w load stage.
 - Recognize that f/w is with enabled security according to device ID.
 - Add validity check to event queue mechanism.
 - Add new event from f/w that will indicate a daemon has been terminated
   inside the f/w.
 
 - Move to TLB cache range invalidation in the device's MMU.
 - Disable memory scrubbing by default for performance.
 
 - Many fixes for sparse/smatch reported errors.
 - Enable by default stop-on-err in the ASIC.
 - Move to ASYNC device probing to speedup loading of driver in server
   with multiple devices.
 - Fix to stop using disabled NIC ports when doing collective operation.
 - Use standard error codes instead of positive values.
 - Add support for resetting device after user has finished using it.
 - Add debugfs option to avoid reset when a CS has got stuck.
 - Add print of the last 8 CS pointers in case of error in QMANs.
 - Add statistics on opening of the FD of a device.
 -----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCgAxFiEE7TEboABC71LctBLFZR1NuKta54AFAmDRrdoTHG9nYWJiYXlA
 a2VybmVsLm9yZwAKCRBlHU24q1rngHvyCACheRh5ExpPOvFZkPT6l4pGekx1vJwy
 tJsYmILk5mWprRczeSskilxQMFZOGTQzQqN01c/Bl/94eyCcdmBoLNCFcrtDYcjh
 pBZft4tUWMKToPX3j5gpMospXg+CBsEIsltxKrrlQ1ZxgY5JWmcg1NZOTU32yMvC
 /9rpTxdpfda6870hY0kfoXjRfCAReENQCQkCNWi/DONmtneOmpDgJC7AQgW8gQcm
 pQBFwjvF3aweO5/R9pvJa3QhuwY5nWQDsLKGJvcPNThpEYJ230Yh6N33KQQUNsaz
 4Y5pUl5MS8Z6qz2Yd79bnRolWTSDP2QQhHRUnx7vh2rRsJKzr1QGP6Ck
 =MixT
 -----END PGP SIGNATURE-----

Merge tag 'misc-habanalabs-next-2021-06-22' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains habanalabs driver changes for v5.14:

- Change communication protocol with f/w. The new protocl allows better
  backward compatibility between different f/w versions and is more
  stable.
- Send hard-reset cause to f/w after a hard-reset has happened.
- Move to indirection when generating interrupts to f/w.
- Better progress and error messages during the f/w load stage.
- Recognize that f/w is with enabled security according to device ID.
- Add validity check to event queue mechanism.
- Add new event from f/w that will indicate a daemon has been terminated
  inside the f/w.

- Move to TLB cache range invalidation in the device's MMU.
- Disable memory scrubbing by default for performance.

- Many fixes for sparse/smatch reported errors.
- Enable by default stop-on-err in the ASIC.
- Move to ASYNC device probing to speedup loading of driver in server
  with multiple devices.
- Fix to stop using disabled NIC ports when doing collective operation.
- Use standard error codes instead of positive values.
- Add support for resetting device after user has finished using it.
- Add debugfs option to avoid reset when a CS has got stuck.
- Add print of the last 8 CS pointers in case of error in QMANs.
- Add statistics on opening of the FD of a device.

* tag 'misc-habanalabs-next-2021-06-22' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (72 commits)
  habanalabs/gaudi: refactor hard-reset related code
  habanalabs/gaudi: add support for NIC DERR
  habanalabs: add validity check for signal cs
  habanalabs: get lower/upper 32 bits via masking
  habanalabs: allow reset upon device release
  debugfs: add skip_reset_on_timeout option
  habanalabs: fix typo
  habanalabs/gaudi: correct driver events numbering
  habanalabs: remove a rogue #ifdef
  habanalabs/gaudi: print last QM PQEs on error
  habanalabs/goya: add '__force' attribute to suppress false alarm
  habanalabs: added open_stats info ioctl
  habanalabs/gaudi: set the correct rc in case of err
  habanalabs/gaudi: update coresight configuration
  habanalabs: remove node from list before freeing the node
  habanalabs: set rc as 'valid' in case of intentional func exit
  habanalabs: zero complex structures using memset
  habanalabs: print more info when failing to pin user memory
  habanalabs: Fix an error handling path in 'hl_pci_probe()'
  habanalabs: print firmware versions
  ...
This commit is contained in:
Greg Kroah-Hartman 2021-06-22 12:37:19 +02:00
commit 8254ee0e0a
30 changed files with 3226 additions and 895 deletions

View File

@ -207,6 +207,14 @@ Contact: ogabbay@kernel.org
Description: Sets the PCI power state. Valid values are "1" for D0 and "2"
for D3Hot
What: /sys/kernel/debug/habanalabs/hl<n>/skip_reset_on_timeout
Date: Jun 2021
KernelVersion: 5.13
Contact: ynudelman@habana.ai
Description: Sets the skip reset on timeout option for the device. Value of
"0" means device will be reset in case some CS has timed out,
otherwise it will not be reset.
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date: Mar 2020
KernelVersion: 5.6

View File

@ -556,6 +556,13 @@ out:
else if (!cs->submitted)
cs->fence->error = -EBUSY;
if (unlikely(cs->skip_reset_on_timeout)) {
dev_err(hdev->dev,
"Command submission %llu completed after %llu (s)\n",
cs->sequence,
div_u64(jiffies - cs->submission_time_jiffies, HZ));
}
if (cs->timestamp)
cs->fence->timestamp = ktime_get();
complete_all(&cs->fence->completion);
@ -571,6 +578,8 @@ static void cs_timedout(struct work_struct *work)
int rc;
struct hl_cs *cs = container_of(work, struct hl_cs,
work_tdr.work);
bool skip_reset_on_timeout = cs->skip_reset_on_timeout;
rc = cs_get_unless_zero(cs);
if (!rc)
return;
@ -581,7 +590,8 @@ static void cs_timedout(struct work_struct *work)
}
/* Mark the CS is timed out so we won't try to cancel its TDR */
cs->timedout = true;
if (likely(!skip_reset_on_timeout))
cs->timedout = true;
hdev = cs->ctx->hdev;
@ -613,10 +623,12 @@ static void cs_timedout(struct work_struct *work)
cs_put(cs);
if (hdev->reset_on_lockup)
hl_device_reset(hdev, 0);
else
hdev->needs_reset = true;
if (likely(!skip_reset_on_timeout)) {
if (hdev->reset_on_lockup)
hl_device_reset(hdev, HL_RESET_TDR);
else
hdev->needs_reset = true;
}
}
static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
@ -650,6 +662,10 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs->type = cs_type;
cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
cs->timeout_jiffies = timeout;
cs->skip_reset_on_timeout =
hdev->skip_reset_on_timeout ||
!!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
cs->submission_time_jiffies = jiffies;
INIT_LIST_HEAD(&cs->job_list);
INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
kref_init(&cs->refcount);
@ -1481,6 +1497,61 @@ out:
return rc;
}
/*
* hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
* if the SOB value reaches the max value move to the other SOB reserved
* to the queue.
* Note that this function must be called while hw_queues_lock is taken.
*/
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
struct hl_hw_sob **hw_sob, u32 count)
{
struct hl_sync_stream_properties *prop;
struct hl_hw_sob *sob = *hw_sob, *other_sob;
u8 other_sob_offset;
prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
kref_get(&sob->kref);
/* check for wraparound */
if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
/*
* Decrement as we reached the max value.
* The release function won't be called here as we've
* just incremented the refcount right before calling this
* function.
*/
kref_put(&sob->kref, hl_sob_reset_error);
/*
* check the other sob value, if it still in use then fail
* otherwise make the switch
*/
other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
other_sob = &prop->hw_sob[other_sob_offset];
if (kref_read(&other_sob->kref) != 1) {
dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
q_idx);
return -EINVAL;
}
prop->next_sob_val = 1;
/* only two SOBs are currently in use */
prop->curr_sob_offset = other_sob_offset;
*hw_sob = other_sob;
dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
prop->curr_sob_offset, q_idx);
} else {
prop->next_sob_val += count;
}
return 0;
}
static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
{

View File

@ -12,7 +12,6 @@
static void hl_ctx_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
int i;
/* Release all allocated pending cb's, those cb's were never
@ -57,14 +56,6 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
/* Scrub both SRAM and DRAM */
hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
if ((!hdev->pldm) && (hdev->pdev) &&
(!hdev->asic_funcs->is_device_idle(hdev,
idle_mask,
HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)))
dev_notice(hdev->dev,
"device not idle after user context is closed (0x%llx, 0x%llx)\n",
idle_mask[0], idle_mask[1]);
} else {
dev_dbg(hdev->dev, "closing kernel context\n");
hdev->asic_funcs->ctx_fini(ctx);

View File

@ -1278,6 +1278,11 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry->root,
&dev_entry->blob_desc);
debugfs_create_x8("skip_reset_on_timeout",
0644,
dev_entry->root,
&hdev->skip_reset_on_timeout);
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
debugfs_create_file(hl_debugfs_list[i].name,
0444,

View File

@ -51,6 +51,8 @@ bool hl_device_operational(struct hl_device *hdev,
static void hpriv_release(struct kref *ref)
{
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
bool device_is_idle = true;
struct hl_fpriv *hpriv;
struct hl_device *hdev;
@ -71,8 +73,20 @@ static void hpriv_release(struct kref *ref)
kfree(hpriv);
if (hdev->reset_upon_device_release)
hl_device_reset(hdev, 0);
if ((!hdev->pldm) && (hdev->pdev) &&
(!hdev->asic_funcs->is_device_idle(hdev,
idle_mask,
HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) {
dev_err(hdev->dev,
"device not idle after user context is closed (0x%llx_%llx)\n",
idle_mask[1], idle_mask[0]);
device_is_idle = false;
}
if ((hdev->reset_if_device_not_idle && !device_is_idle)
|| hdev->reset_upon_device_release)
hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE);
}
void hl_hpriv_get(struct hl_fpriv *hpriv)
@ -118,6 +132,9 @@ static int hl_device_release(struct inode *inode, struct file *filp)
dev_warn(hdev->dev,
"Device is still in use because there are live CS and/or memory mappings\n");
hdev->last_open_session_duration_jif =
jiffies - hdev->last_successful_open_jif;
return 0;
}
@ -868,7 +885,7 @@ static void device_disable_open_processes(struct hl_device *hdev)
int hl_device_reset(struct hl_device *hdev, u32 flags)
{
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
bool hard_reset, from_hard_reset_thread;
bool hard_reset, from_hard_reset_thread, hard_instead_soft = false;
int i, rc;
if (!hdev->init_done) {
@ -880,11 +897,28 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
hard_reset = (flags & HL_RESET_HARD) != 0;
from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0;
if ((!hard_reset) && (!hdev->supports_soft_reset)) {
dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
if (!hard_reset && !hdev->supports_soft_reset) {
hard_instead_soft = true;
hard_reset = true;
}
if (hdev->reset_upon_device_release &&
(flags & HL_RESET_DEVICE_RELEASE)) {
dev_dbg(hdev->dev,
"Perform %s-reset upon device release\n",
hard_reset ? "hard" : "soft");
goto do_reset;
}
if (!hard_reset && !hdev->allow_external_soft_reset) {
hard_instead_soft = true;
hard_reset = true;
}
if (hard_instead_soft)
dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
do_reset:
/* Re-entry of reset thread */
if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
goto kill_processes;
@ -900,6 +934,19 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
if (rc)
return 0;
/*
* 'reset cause' is being updated here, because getting here
* means that it's the 1st time and the last time we're here
* ('in_reset' makes sure of it). This makes sure that
* 'reset_cause' will continue holding its 1st recorded reason!
*/
if (flags & HL_RESET_HEARTBEAT)
hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
else if (flags & HL_RESET_TDR)
hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
else
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
/*
* if reset is due to heartbeat, device CPU is no responsive in
* which case no point sending PCI disable message to it
@ -943,9 +990,8 @@ again:
hdev->process_kill_trial_cnt = 0;
/*
* Because the reset function can't run from interrupt or
* from heartbeat work, we need to call the reset function
* from a dedicated work
* Because the reset function can't run from heartbeat work,
* we need to call the reset function from a dedicated work.
*/
queue_delayed_work(hdev->device_reset_work.wq,
&hdev->device_reset_work.reset_work, 0);
@ -1096,8 +1142,8 @@ kill_processes:
if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
dev_err(hdev->dev,
"device is not idle (mask %#llx %#llx) after reset\n",
idle_mask[0], idle_mask[1]);
"device is not idle (mask 0x%llx_%llx) after reset\n",
idle_mask[1], idle_mask[0]);
rc = -EIO;
goto out_err;
}
@ -1334,8 +1380,9 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
}
/*
* From this point, in case of an error, add char devices and create
* sysfs nodes as part of the error flow, to allow debugging.
* From this point, override rc (=0) in case of an error to allow
* debugging (by adding char devices and create sysfs nodes as part of
* the error flow).
*/
add_cdev_sysfs_on_err = true;
@ -1369,7 +1416,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
hdev->asic_name,
hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
hdev->asic_prop.dram_size / SZ_1G);
rc = hl_vm_init(hdev);
if (rc) {
@ -1475,6 +1522,7 @@ out_disabled:
void hl_device_fini(struct hl_device *hdev)
{
ktime_t timeout;
u64 reset_sec;
int i, rc;
dev_info(hdev->dev, "Removing device\n");
@ -1482,6 +1530,11 @@ void hl_device_fini(struct hl_device *hdev)
hdev->device_fini_pending = 1;
flush_delayed_work(&hdev->device_reset_work.reset_work);
if (hdev->pldm)
reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT;
else
reset_sec = HL_HARD_RESET_MAX_TIMEOUT;
/*
* This function is competing with the reset function, so try to
* take the reset atomic and if we are already in middle of reset,
@ -1490,8 +1543,7 @@ void hl_device_fini(struct hl_device *hdev)
* ports, the hard reset could take between 10-30 seconds
*/
timeout = ktime_add_us(ktime_get(),
HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000);
rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
while (rc) {
usleep_range(50, 200);

File diff suppressed because it is too large Load Diff

View File

@ -48,6 +48,7 @@
#define HL_PENDING_RESET_LONG_SEC 60
#define HL_HARD_RESET_MAX_TIMEOUT 120
#define HL_PLDM_HARD_RESET_MAX_TIMEOUT (HL_HARD_RESET_MAX_TIMEOUT * 3)
#define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */
@ -115,10 +116,18 @@ enum hl_mmu_page_table_location {
*
* - HL_RESET_HEARTBEAT
* Set if reset is due to heartbeat
*
* - HL_RESET_TDR
* Set if reset is due to TDR
*
* - HL_RESET_DEVICE_RELEASE
* Set if reset is due to device release
*/
#define HL_RESET_HARD (1 << 0)
#define HL_RESET_FROM_RESET_THREAD (1 << 1)
#define HL_RESET_HEARTBEAT (1 << 2)
#define HL_RESET_TDR (1 << 3)
#define HL_RESET_DEVICE_RELEASE (1 << 4)
#define HL_MAX_SOBS_PER_MONITOR 8
@ -178,12 +187,14 @@ enum hl_pci_match_mode {
/**
* enum hl_fw_component - F/W components to read version through registers.
* @FW_COMP_UBOOT: u-boot.
* @FW_COMP_BOOT_FIT: boot fit.
* @FW_COMP_PREBOOT: preboot.
* @FW_COMP_LINUX: linux.
*/
enum hl_fw_component {
FW_COMP_UBOOT,
FW_COMP_PREBOOT
FW_COMP_BOOT_FIT,
FW_COMP_PREBOOT,
FW_COMP_LINUX,
};
/**
@ -420,12 +431,24 @@ struct hl_mmu_properties {
* @cb_pool_cb_size: size of each CB in the CB pool.
* @max_pending_cs: maximum of concurrent pending command submissions
* @max_queues: maximum amount of queues in the system
* @fw_boot_cpu_security_map: bitmap representation of boot cpu security status
* reported by FW, bit description can be found in
* CPU_BOOT_DEV_STS*
* @fw_app_security_map: bitmap representation of application security status
* reported by FW, bit description can be found in
* CPU_BOOT_DEV_STS*
* @fw_preboot_cpu_boot_dev_sts0: bitmap representation of preboot cpu
* capabilities reported by FW, bit description
* can be found in CPU_BOOT_DEV_STS0
* @fw_preboot_cpu_boot_dev_sts1: bitmap representation of preboot cpu
* capabilities reported by FW, bit description
* can be found in CPU_BOOT_DEV_STS1
* @fw_bootfit_cpu_boot_dev_sts0: bitmap representation of boot cpu security
* status reported by FW, bit description can be
* found in CPU_BOOT_DEV_STS0
* @fw_bootfit_cpu_boot_dev_sts1: bitmap representation of boot cpu security
* status reported by FW, bit description can be
* found in CPU_BOOT_DEV_STS1
* @fw_app_cpu_boot_dev_sts0: bitmap representation of application security
* status reported by FW, bit description can be
* found in CPU_BOOT_DEV_STS0
* @fw_app_cpu_boot_dev_sts1: bitmap representation of application security
* status reported by FW, bit description can be
* found in CPU_BOOT_DEV_STS1
* @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
@ -438,14 +461,19 @@ struct hl_mmu_properties {
* @user_interrupt_count: number of user interrupts.
* @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues.
* @fw_security_disabled: true if security measures are disabled in firmware,
* false otherwise
* @fw_security_status_valid: security status bits are valid and can be fetched
* from BOOT_DEV_STS0
* @fw_security_enabled: true if security measures are enabled in firmware,
* false otherwise
* @fw_cpu_boot_dev_sts0_valid: status bits are valid and can be fetched from
* BOOT_DEV_STS0
* @fw_cpu_boot_dev_sts1_valid: status bits are valid and can be fetched from
* BOOT_DEV_STS1
* @dram_supports_virtual_memory: is there an MMU towards the DRAM
* @hard_reset_done_by_fw: true if firmware is handling hard reset flow
* @num_functional_hbms: number of functional HBMs in each DCORE.
* @iatu_done_by_fw: true if iATU configuration is being done by FW.
* @dynamic_fw_load: is dynamic FW load is supported.
* @gic_interrupts_enable: true if FW is not blocking GIC controller,
* false otherwise.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@ -491,8 +519,12 @@ struct asic_fixed_properties {
u32 cb_pool_cb_size;
u32 max_pending_cs;
u32 max_queues;
u32 fw_boot_cpu_security_map;
u32 fw_app_security_map;
u32 fw_preboot_cpu_boot_dev_sts0;
u32 fw_preboot_cpu_boot_dev_sts1;
u32 fw_bootfit_cpu_boot_dev_sts0;
u32 fw_bootfit_cpu_boot_dev_sts1;
u32 fw_app_cpu_boot_dev_sts0;
u32 fw_app_cpu_boot_dev_sts1;
u16 collective_first_sob;
u16 collective_first_mon;
u16 sync_stream_first_sob;
@ -504,12 +536,15 @@ struct asic_fixed_properties {
u16 user_interrupt_count;
u8 tpc_enabled_mask;
u8 completion_queues_count;
u8 fw_security_disabled;
u8 fw_security_status_valid;
u8 fw_security_enabled;
u8 fw_cpu_boot_dev_sts0_valid;
u8 fw_cpu_boot_dev_sts1_valid;
u8 dram_supports_virtual_memory;
u8 hard_reset_done_by_fw;
u8 num_functional_hbms;
u8 iatu_done_by_fw;
u8 dynamic_fw_load;
u8 gic_interrupts_enable;
};
/**
@ -750,12 +785,19 @@ struct hl_user_pending_interrupt {
* @kernel_address: holds the queue's kernel virtual address
* @bus_address: holds the queue's DMA address
* @ci: ci inside the queue
* @prev_eqe_index: the index of the previous event queue entry. The index of
* the current entry's index must be +1 of the previous one.
* @check_eqe_index: do we need to check the index of the current entry vs. the
* previous one. This is for backward compatibility with older
* firmwares
*/
struct hl_eq {
struct hl_device *hdev;
void *kernel_address;
dma_addr_t bus_address;
u32 ci;
u32 prev_eqe_index;
bool check_eqe_index;
};
@ -812,6 +854,132 @@ enum div_select_defs {
DIV_SEL_DIVIDED_PLL = 3,
};
enum pci_region {
PCI_REGION_CFG,
PCI_REGION_SRAM,
PCI_REGION_DRAM,
PCI_REGION_SP_SRAM,
PCI_REGION_NUMBER,
};
/**
* struct pci_mem_region - describe memory region in a PCI bar
* @region_base: region base address
* @region_size: region size
* @bar_size: size of the BAR
* @offset_in_bar: region offset into the bar
* @bar_id: bar ID of the region
* @used: if used 1, otherwise 0
*/
struct pci_mem_region {
u64 region_base;
u64 region_size;
u64 bar_size;
u32 offset_in_bar;
u8 bar_id;
u8 used;
};
/**
* struct static_fw_load_mgr - static FW load manager
* @preboot_version_max_off: max offset to preboot version
* @boot_fit_version_max_off: max offset to boot fit version
* @kmd_msg_to_cpu_reg: register address for KDM->CPU messages
* @cpu_cmd_status_to_host_reg: register address for CPU command status response
* @cpu_boot_status_reg: boot status register
* @cpu_boot_dev_status0_reg: boot device status register 0
* @cpu_boot_dev_status1_reg: boot device status register 1
* @boot_err0_reg: boot error register 0
* @boot_err1_reg: boot error register 1
* @preboot_version_offset_reg: SRAM offset to preboot version register
* @boot_fit_version_offset_reg: SRAM offset to boot fit version register
* @sram_offset_mask: mask for getting offset into the SRAM
* @cpu_reset_wait_msec: used when setting WFE via kmd_msg_to_cpu_reg
*/
struct static_fw_load_mgr {
u64 preboot_version_max_off;
u64 boot_fit_version_max_off;
u32 kmd_msg_to_cpu_reg;
u32 cpu_cmd_status_to_host_reg;
u32 cpu_boot_status_reg;
u32 cpu_boot_dev_status0_reg;
u32 cpu_boot_dev_status1_reg;
u32 boot_err0_reg;
u32 boot_err1_reg;
u32 preboot_version_offset_reg;
u32 boot_fit_version_offset_reg;
u32 sram_offset_mask;
u32 cpu_reset_wait_msec;
};
/**
* struct fw_response - FW response to LKD command
* @ram_offset: descriptor offset into the RAM
* @ram_type: RAM type containing the descriptor (SRAM/DRAM)
* @status: command status
*/
struct fw_response {
u32 ram_offset;
u8 ram_type;
u8 status;
};
/**
* struct dynamic_fw_load_mgr - dynamic FW load manager
* @response: FW to LKD response
* @comm_desc: the communication descriptor with FW
* @image_region: region to copy the FW image to
* @fw_image_size: size of FW image to load
* @wait_for_bl_timeout: timeout for waiting for boot loader to respond
*/
struct dynamic_fw_load_mgr {
struct fw_response response;
struct lkd_fw_comms_desc comm_desc;
struct pci_mem_region *image_region;
size_t fw_image_size;
u32 wait_for_bl_timeout;
};
/**
* struct fw_image_props - properties of FW image
* @image_name: name of the image
* @src_off: offset in src FW to copy from
* @copy_size: amount of bytes to copy (0 to copy the whole binary)
*/
struct fw_image_props {
char *image_name;
u32 src_off;
u32 copy_size;
};
/**
* struct fw_load_mgr - manager FW loading process
* @dynamic_loader: specific structure for dynamic load
* @static_loader: specific structure for static load
* @boot_fit_img: boot fit image properties
* @linux_img: linux image properties
* @cpu_timeout: CPU response timeout in usec
* @boot_fit_timeout: Boot fit load timeout in usec
* @skip_bmc: should BMC be skipped
* @sram_bar_id: SRAM bar ID
* @dram_bar_id: DRAM bar ID
* @linux_loaded: true if linux was loaded so far
*/
struct fw_load_mgr {
union {
struct dynamic_fw_load_mgr dynamic_loader;
struct static_fw_load_mgr static_loader;
};
struct fw_image_props boot_fit_img;
struct fw_image_props linux_img;
u32 cpu_timeout;
u32 boot_fit_timeout;
u8 skip_bmc;
u8 sram_bar_id;
u8 dram_bar_id;
u8 linux_loaded;
};
/**
* struct hl_asic_funcs - ASIC specific functions that are can be called from
* common code.
@ -901,8 +1069,6 @@ enum div_select_defs {
* @ctx_fini: context dependent cleanup.
* @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
* @read_device_fw_version: read the device's firmware versions that are
* contained in registers
* @load_firmware_to_device: load the firmware to the device's memory
* @load_boot_fit_to_device: load boot fit to device's memory
* @get_signal_cb_size: Get signal CB size.
@ -933,6 +1099,8 @@ enum div_select_defs {
* @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event
* @map_pll_idx_to_fw_idx: convert driver specific per asic PLL index to
* generic f/w compatible PLL Indexes
* @init_firmware_loader: initialize data for FW loader.
* @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@ -1006,7 +1174,7 @@ struct hl_asic_funcs {
int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
u32 flags);
int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
u32 asid, u64 va, u64 size);
u32 flags, u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
void (*set_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
@ -1030,8 +1198,6 @@ struct hl_asic_funcs {
void (*ctx_fini)(struct hl_ctx *ctx);
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
int (*read_device_fw_version)(struct hl_device *hdev,
enum hl_fw_component fwc);
int (*load_firmware_to_device)(struct hl_device *hdev);
int (*load_boot_fit_to_device)(struct hl_device *hdev);
u32 (*get_signal_cb_size)(struct hl_device *hdev);
@ -1056,8 +1222,10 @@ struct hl_asic_funcs {
int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
u32 block_id, u32 block_size);
void (*enable_events_from_fw)(struct hl_device *hdev);
void (*get_msi_info)(u32 *table);
void (*get_msi_info)(__le32 *table);
int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
void (*init_firmware_loader)(struct hl_device *hdev);
void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
};
@ -1262,6 +1430,7 @@ struct hl_userptr {
* @staged_sequence: the sequence of the staged submission this CS is part of,
* relevant only if staged_cs is set.
* @timeout_jiffies: cs timeout in jiffies.
* @submission_time_jiffies: submission time of the cs
* @type: CS_TYPE_*.
* @submitted: true if CS was submitted to H/W.
* @completed: true if CS was completed by device.
@ -1274,6 +1443,8 @@ struct hl_userptr {
* @staged_first: true if this is the first staged CS and we need to receive
* timeout for this CS.
* @staged_cs: true if this CS is part of a staged submission.
* @skip_reset_on_timeout: true if we shall not reset the device in case
* timeout occurs (debug scenario).
*/
struct hl_cs {
u16 *jobs_in_queue_cnt;
@ -1291,6 +1462,7 @@ struct hl_cs {
u64 sequence;
u64 staged_sequence;
u64 timeout_jiffies;
u64 submission_time_jiffies;
enum hl_cs_type type;
u8 submitted;
u8 completed;
@ -1301,6 +1473,7 @@ struct hl_cs {
u8 staged_last;
u8 staged_first;
u8 staged_cs;
u8 skip_reset_on_timeout;
};
/**
@ -1922,7 +2095,7 @@ struct hl_mmu_funcs {
* @kernel_queues: array of hl_hw_queue.
* @cs_mirror_list: CS mirror list for TDR.
* @cs_mirror_lock: protects cs_mirror_list.
* @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
* @kernel_cb_mgr: command buffer manager for creating/destroying/handling CBs.
* @event_queue: event queue for IRQ from CPU-CP.
* @dma_pool: DMA pool for small allocations.
* @cpu_accessible_dma_mem: Host <-> CPU-CP shared memory CPU address.
@ -1954,6 +2127,8 @@ struct hl_mmu_funcs {
* @aggregated_cs_counters: aggregated cs counters among all contexts
* @mmu_priv: device-specific MMU data.
* @mmu_func: device-related MMU functions.
* @fw_loader: FW loader manager.
* @pci_mem_region: array of memory regions in the PCI
* @dram_used_mem: current DRAM memory consumption.
* @timeout_jiffies: device CS timeout value.
* @max_power: the max power of the device, as configured by the sysadmin. This
@ -1968,6 +2143,11 @@ struct hl_mmu_funcs {
* the error will be ignored by the driver during
* device initialization. Mainly used to debug and
* workaround firmware bugs
* @last_successful_open_jif: timestamp (jiffies) of the last successful
* device open.
* @last_open_session_duration_jif: duration (jiffies) of the last device open
* session.
* @open_counter: number of successful device open operations.
* @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile.
* @card_type: Various ASICs have several card types. This indicates the card
@ -2007,6 +2187,8 @@ struct hl_mmu_funcs {
* @collective_mon_idx: helper index for collective initialization
* @supports_coresight: is CoreSight supported.
* @supports_soft_reset: is soft reset supported.
* @allow_external_soft_reset: true if soft reset initiated by user or TDR is
* allowed.
* @supports_cb_mapping: is mapping a CB to the device's MMU supported.
* @needs_reset: true if reset_on_lockup is false and device should be reset
* due to lockup.
@ -2015,6 +2197,14 @@ struct hl_mmu_funcs {
* @device_fini_pending: true if device_fini was called and might be
* waiting for the reset thread to finish
* @supports_staged_submission: true if staged submissions are supported
* @curr_reset_cause: saves an enumerated reset cause when a hard reset is
* triggered, and cleared after it is shared with preboot.
* @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to
* complete instead.
* @device_cpu_is_halted: Flag to indicate whether the device CPU was already
* halted. We can't halt it again because the COMMS
* protocol will throw an error. Relevant only for
* cases where Linux was not loaded to device CPU
*/
struct hl_device {
struct pci_dev *pdev;
@ -2079,11 +2269,18 @@ struct hl_device {
struct hl_mmu_priv mmu_priv;
struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS];
struct fw_load_mgr fw_loader;
struct pci_mem_region pci_mem_region[PCI_REGION_NUMBER];
atomic64_t dram_used_mem;
u64 timeout_jiffies;
u64 max_power;
u64 clock_gating_mask;
u64 boot_error_status_mask;
u64 last_successful_open_jif;
u64 last_open_session_duration_jif;
u64 open_counter;
atomic_t in_reset;
enum hl_pll_frequency curr_pll_profile;
enum cpucp_card_types card_type;
@ -2116,11 +2313,15 @@ struct hl_device {
u8 collective_mon_idx;
u8 supports_coresight;
u8 supports_soft_reset;
u8 allow_external_soft_reset;
u8 supports_cb_mapping;
u8 needs_reset;
u8 process_kill_trial_cnt;
u8 device_fini_pending;
u8 supports_staged_submission;
u8 curr_reset_cause;
u8 skip_reset_on_timeout;
u8 device_cpu_is_halted;
/* Parameters for bring-up */
u64 nic_ports_mask;
@ -2138,6 +2339,7 @@ struct hl_device {
u8 rl_enable;
u8 reset_on_preboot_fail;
u8 reset_upon_device_release;
u8 reset_if_device_not_idle;
};
@ -2384,11 +2586,13 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr);
int hl_fw_send_heartbeat(struct hl_device *hdev);
int hl_fw_cpucp_info_get(struct hl_device *hdev,
u32 cpu_security_boot_status_reg,
u32 boot_err0_reg);
u32 sts_boot_dev_sts0_reg,
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
u32 boot_err1_reg);
int hl_fw_cpucp_handshake(struct hl_device *hdev,
u32 cpu_security_boot_status_reg,
u32 boot_err0_reg);
u32 sts_boot_dev_sts0_reg,
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
u32 boot_err1_reg);
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters);
@ -2399,14 +2603,17 @@ int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
u16 *pll_freq_arr);
int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout);
void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev);
void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev);
int hl_fw_init_cpu(struct hl_device *hdev);
int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
u32 timeout);
u32 sts_boot_dev_sts0_reg,
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
u32 boot_err1_reg, u32 timeout);
int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
struct fw_load_mgr *fw_loader,
enum comms_cmd cmd, unsigned int size,
bool wait_ok, u32 timeout);
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
bool is_wc[3]);
int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
@ -2415,6 +2622,7 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
struct hl_inbound_pci_region *pci_region);
int hl_pci_set_outbound_region(struct hl_device *hdev,
struct hl_outbound_pci_region *pci_region);
enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr);
int hl_pci_init(struct hl_device *hdev);
void hl_pci_fini(struct hl_device *hdev);
@ -2443,6 +2651,8 @@ int hl_set_voltage(struct hl_device *hdev,
int hl_set_current(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
void hl_release_pending_user_interrupts(struct hl_device *hdev);
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
struct hl_hw_sob **hw_sob, u32 count);
#ifdef CONFIG_DEBUG_FS

View File

@ -29,7 +29,7 @@ static DEFINE_MUTEX(hl_devs_idr_lock);
static int timeout_locked = 30;
static int reset_on_lockup = 1;
static int memory_scrub = 1;
static int memory_scrub;
static ulong boot_error_status_mask = ULONG_MAX;
module_param(timeout_locked, int, 0444);
@ -42,7 +42,7 @@ MODULE_PARM_DESC(reset_on_lockup,
module_param(memory_scrub, int, 0444);
MODULE_PARM_DESC(memory_scrub,
"Scrub device memory in various states (0 = no, 1 = yes, default yes)");
"Scrub device memory in various states (0 = no, 1 = yes, default no)");
module_param(boot_error_status_mask, ulong, 0444);
MODULE_PARM_DESC(boot_error_status_mask,
@ -187,6 +187,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_debugfs_add_file(hpriv);
hdev->open_counter++;
hdev->last_successful_open_jif = jiffies;
return 0;
out_err:
@ -264,6 +267,7 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
hdev->bmc_enable = 1;
hdev->hard_reset_on_fw_events = 1;
hdev->reset_on_preboot_fail = 1;
hdev->reset_if_device_not_idle = 1;
hdev->reset_pcilink = 0;
hdev->axi_drain = 0;
@ -308,10 +312,10 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
}
if (pdev)
hdev->asic_prop.fw_security_disabled =
!is_asic_secured(pdev->device);
hdev->asic_prop.fw_security_enabled =
is_asic_secured(hdev->asic_type);
else
hdev->asic_prop.fw_security_disabled = true;
hdev->asic_prop.fw_security_enabled = false;
/* Assign status description string */
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
@ -325,11 +329,14 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
hdev->reset_on_lockup = reset_on_lockup;
hdev->memory_scrub = memory_scrub;
hdev->boot_error_status_mask = boot_error_status_mask;
hdev->stop_on_err = true;
hdev->pldm = 0;
set_driver_behavior_per_device(hdev);
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
if (timeout_locked)
hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
else
@ -464,6 +471,7 @@ static int hl_pci_probe(struct pci_dev *pdev,
return 0;
disable_device:
pci_disable_pcie_error_reporting(pdev);
pci_set_drvdata(pdev, NULL);
destroy_hdev(hdev);
@ -572,7 +580,11 @@ static struct pci_driver hl_pci_driver = {
.probe = hl_pci_probe,
.remove = hl_pci_remove,
.shutdown = hl_pci_remove,
.driver.pm = &hl_pm_ops,
.driver = {
.name = HL_NAME,
.pm = &hl_pm_ops,
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
},
.err_handler = &hl_pci_err_handler,
};

View File

@ -95,7 +95,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.first_available_interrupt_id =
prop->first_available_user_msix_interrupt;
return copy_to_user(out, &hw_ip,
min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
}
static int hw_events_info(struct hl_device *hdev, bool aggregate,
@ -460,6 +460,24 @@ static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0;
}
static int open_stats_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
u32 max_size = args->return_size;
struct hl_open_stats_info open_stats_info = {0};
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
if ((!max_size) || (!out))
return -EINVAL;
open_stats_info.last_open_period_ms = jiffies64_to_msecs(
hdev->last_open_session_duration_jif);
open_stats_info.open_counter = hdev->open_counter;
return copy_to_user(out, &open_stats_info,
min((size_t) max_size, sizeof(open_stats_info))) ? -EFAULT : 0;
}
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
@ -543,6 +561,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_POWER:
return power_info(hpriv, args);
case HL_INFO_OPEN_STATS:
return open_stats_info(hpriv, args);
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;

View File

@ -410,19 +410,20 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
}
static void init_signal_cs(struct hl_device *hdev,
static int init_signal_cs(struct hl_device *hdev,
struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
{
struct hl_sync_stream_properties *prop;
struct hl_hw_sob *hw_sob;
u32 q_idx;
int rc = 0;
q_idx = job->hw_queue_id;
prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
hw_sob = &prop->hw_sob[prop->curr_sob_offset];
cs_cmpl->hw_sob = hw_sob;
cs_cmpl->sob_val = prop->next_sob_val++;
cs_cmpl->sob_val = prop->next_sob_val;
dev_dbg(hdev->dev,
"generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
@ -434,24 +435,9 @@ static void init_signal_cs(struct hl_device *hdev,
hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
cs_cmpl->hw_sob->sob_id, 0, true);
kref_get(&hw_sob->kref);
rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1);
/* check for wraparound */
if (prop->next_sob_val == HL_MAX_SOB_VAL) {
/*
* Decrement as we reached the max value.
* The release function won't be called here as we've
* just incremented the refcount.
*/
kref_put(&hw_sob->kref, hl_sob_reset_error);
prop->next_sob_val = 1;
/* only two SOBs are currently in use */
prop->curr_sob_offset =
(prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
prop->curr_sob_offset, q_idx);
}
return rc;
}
static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
@ -504,22 +490,25 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
*
* H/W queues spinlock should be taken before calling this function
*/
static void init_signal_wait_cs(struct hl_cs *cs)
static int init_signal_wait_cs(struct hl_cs *cs)
{
struct hl_ctx *ctx = cs->ctx;
struct hl_device *hdev = ctx->hdev;
struct hl_cs_job *job;
struct hl_cs_compl *cs_cmpl =
container_of(cs->fence, struct hl_cs_compl, base_fence);
int rc = 0;
/* There is only one job in a signal/wait CS */
job = list_first_entry(&cs->job_list, struct hl_cs_job,
cs_node);
if (cs->type & CS_TYPE_SIGNAL)
init_signal_cs(hdev, job, cs_cmpl);
rc = init_signal_cs(hdev, job, cs_cmpl);
else if (cs->type & CS_TYPE_WAIT)
init_wait_cs(hdev, cs, job, cs_cmpl);
return rc;
}
/*
@ -590,11 +579,16 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
}
}
if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT))
init_signal_wait_cs(cs);
else if (cs->type == CS_TYPE_COLLECTIVE_WAIT)
if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
rc = init_signal_wait_cs(cs);
if (rc) {
dev_err(hdev->dev, "Failed to submit signal cs\n");
goto unroll_cq_resv;
}
} else if (cs->type == CS_TYPE_COLLECTIVE_WAIT)
hdev->asic_funcs->collective_wait_init_cs(cs);
spin_lock(&hdev->cs_mirror_lock);
/* Verify staged CS exists and add to the staged list */

View File

@ -207,17 +207,33 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
struct hl_eq_entry *eq_entry;
struct hl_eq_entry *eq_base;
struct hl_eqe_work *handle_eqe_work;
bool entry_ready;
u32 cur_eqe;
u16 cur_eqe_index;
eq_base = eq->kernel_address;
while (1) {
bool entry_ready =
((le32_to_cpu(eq_base[eq->ci].hdr.ctl) &
EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT);
cur_eqe = le32_to_cpu(eq_base[eq->ci].hdr.ctl);
entry_ready = !!FIELD_GET(EQ_CTL_READY_MASK, cur_eqe);
if (!entry_ready)
break;
cur_eqe_index = FIELD_GET(EQ_CTL_INDEX_MASK, cur_eqe);
if ((hdev->event_queue.check_eqe_index) &&
(((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK)
!= cur_eqe_index)) {
dev_dbg(hdev->dev,
"EQE 0x%x in queue is ready but index does not match %d!=%d",
eq_base[eq->ci].hdr.ctl,
((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK),
cur_eqe_index);
break;
}
eq->prev_eqe_index++;
eq_entry = &eq_base[eq->ci];
/*
@ -341,6 +357,7 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
q->hdev = hdev;
q->kernel_address = p;
q->ci = 0;
q->prev_eqe_index = 0;
return 0;
}
@ -365,6 +382,7 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
{
q->ci = 0;
q->prev_eqe_index = 0;
/*
* It's not enough to just reset the PI/CI because the H/W may have

View File

@ -570,8 +570,10 @@ static u64 get_va_block(struct hl_device *hdev,
if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
(!is_align_pow_2 &&
do_div(tmp_hint_addr, va_range->page_size))) {
dev_info(hdev->dev, "Hint address 0x%llx will be ignored\n",
hint_addr);
dev_dbg(hdev->dev,
"Hint address 0x%llx will be ignored because it is not aligned\n",
hint_addr);
hint_addr = 0;
}
@ -1117,7 +1119,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
goto map_err;
}
rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false,
*vm_type, ctx->asid, ret_vaddr, phys_pg_pack->total_size);
mutex_unlock(&ctx->mmu_lock);
@ -1261,8 +1264,9 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
* at the loop end rather than for each iteration
*/
if (!ctx_free)
rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
*vm_type);
rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true,
*vm_type, ctx->asid, vaddr,
phys_pg_pack->total_size);
mutex_unlock(&ctx->mmu_lock);
@ -1369,12 +1373,7 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
/* Driver only allows mapping of a complete HW block */
block_size = vma->vm_end - vma->vm_start;
#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
if (!access_ok(VERIFY_WRITE,
(void __user *) (uintptr_t) vma->vm_start, block_size)) {
#else
if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) {
#endif
dev_err(hdev->dev,
"user pointer is invalid - 0x%lx\n",
vma->vm_start);
@ -1608,7 +1607,8 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
if (rc != npages) {
dev_err(hdev->dev,
"Failed to map host memory, user ptr probably wrong\n");
"Failed (%d) to pin host memory with user ptr 0x%llx, size 0x%llx, npages %d\n",
rc, addr, size, npages);
if (rc < 0)
goto destroy_pages;
npages = rc;

View File

@ -501,12 +501,20 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) &&
!is_power_of_2(prop->dram_page_size)) {
u32 bit;
unsigned long dram_page_size = prop->dram_page_size;
u64 page_offset_mask;
u64 phys_addr_mask;
u32 bit;
bit = __ffs64((u64)prop->dram_page_size);
page_offset_mask = ((1ull << bit) - 1);
/*
* find last set bit in page_size to cover all bits of page
* offset. note that 1 has to be added to bit index.
* note that the internal ulong variable is used to avoid
* alignment issue.
*/
bit = find_last_bit(&dram_page_size,
sizeof(dram_page_size) * BITS_PER_BYTE) + 1;
page_offset_mask = (BIT_ULL(bit) - 1);
phys_addr_mask = ~page_offset_mask;
*phys_addr = (tmp_phys_addr & phys_addr_mask) |
(virt_addr & page_offset_mask);

View File

@ -10,7 +10,7 @@
#include <linux/pci.h>
#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 100)
#define IATU_REGION_CTRL_REGION_EN_MASK BIT(31)
#define IATU_REGION_CTRL_MATCH_MODE_MASK BIT(30)
@ -359,6 +359,32 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
return rc;
}
/**
* hl_get_pci_memory_region() - get PCI region for given address
* @hdev: Pointer to hl_device structure.
* @addr: device address
*
* @return region index on success, otherwise PCI_REGION_NUMBER (invalid
* region index)
*/
enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr)
{
int i;
for (i = 0 ; i < PCI_REGION_NUMBER ; i++) {
struct pci_mem_region *region = &hdev->pci_mem_region[i];
if (!region->used)
continue;
if ((addr >= region->region_base) &&
(addr < region->region_base + region->region_size))
return i;
}
return PCI_REGION_NUMBER;
}
/**
* hl_pci_init() - PCI initialization code.
* @hdev: Pointer to hl_device structure.
@ -395,6 +421,12 @@ int hl_pci_init(struct hl_device *hdev)
goto unmap_pci_bars;
}
/* Driver must sleep in order for FW to finish the iATU configuration */
if (hdev->asic_prop.iatu_done_by_fw) {
usleep_range(2000, 3000);
hdev->asic_funcs->set_dma_mask_from_fw(hdev);
}
rc = dma_set_mask_and_coherent(&pdev->dev,
DMA_BIT_MASK(hdev->dma_mask));
if (rc) {

View File

@ -208,7 +208,7 @@ static ssize_t soft_reset_store(struct device *dev,
goto out;
}
if (!hdev->supports_soft_reset) {
if (!hdev->allow_external_soft_reset) {
dev_err(hdev->dev, "Device does not support soft-reset\n");
goto out;
}

File diff suppressed because it is too large Load Diff

View File

@ -82,6 +82,7 @@
QMAN_STREAMS)
#define QMAN_STREAMS 4
#define PQ_FETCHER_CACHE_SIZE 8
#define DMA_QMAN_OFFSET (mmDMA1_QM_BASE - mmDMA0_QM_BASE)
#define TPC_QMAN_OFFSET (mmTPC1_QM_BASE - mmTPC0_QM_BASE)

View File

@ -424,7 +424,7 @@ static int gaudi_config_stm(struct hl_device *hdev,
if (frequency == 0)
frequency = input->frequency;
WREG32(base_reg + 0xE8C, frequency);
WREG32(base_reg + 0xE90, 0x7FF);
WREG32(base_reg + 0xE90, 0x1F00);
/* SW-2176 - SW WA for HW bug */
if ((CFG_BASE + base_reg) >= mmDMA_CH_0_CS_STM_BASE &&
@ -434,7 +434,7 @@ static int gaudi_config_stm(struct hl_device *hdev,
WREG32(base_reg + 0xE6C, 0x0);
}
WREG32(base_reg + 0xE80, 0x27 | (input->id << 16));
WREG32(base_reg + 0xE80, 0x23 | (input->id << 16));
} else {
WREG32(base_reg + 0xE80, 4);
WREG32(base_reg + 0xD64, 0);
@ -634,7 +634,7 @@ static int gaudi_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
WREG32(mmPSOC_ETR_MODE, input->sink_mode);
if (hdev->asic_prop.fw_security_disabled) {
if (!hdev->asic_prop.fw_security_enabled) {
/* make ETR not privileged */
val = FIELD_PREP(
PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);

View File

@ -1448,7 +1448,7 @@ static void gaudi_init_dma_protection_bits(struct hl_device *hdev)
u32 pb_addr, mask;
u8 word_offset;
if (hdev->asic_prop.fw_security_disabled) {
if (!hdev->asic_prop.fw_security_enabled) {
gaudi_pb_set_block(hdev, mmDMA_IF_E_S_BASE);
gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_CH0_BASE);
gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_CH1_BASE);
@ -9135,7 +9135,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
u32 pb_addr, mask;
u8 word_offset;
if (hdev->asic_prop.fw_security_disabled) {
if (!hdev->asic_prop.fw_security_enabled) {
gaudi_pb_set_block(hdev, mmTPC0_E2E_CRED_BASE);
gaudi_pb_set_block(hdev, mmTPC1_E2E_CRED_BASE);
gaudi_pb_set_block(hdev, mmTPC2_E2E_CRED_BASE);
@ -12818,7 +12818,7 @@ static void gaudi_init_protection_bits(struct hl_device *hdev)
* secured
*/
if (hdev->asic_prop.fw_security_disabled) {
if (!hdev->asic_prop.fw_security_enabled) {
gaudi_pb_set_block(hdev, mmIF_E_PLL_BASE);
gaudi_pb_set_block(hdev, mmMESH_W_PLL_BASE);
gaudi_pb_set_block(hdev, mmSRAM_W_PLL_BASE);
@ -13023,7 +13023,7 @@ void gaudi_init_security(struct hl_device *hdev)
* property configuration of MME SBAB and ACC to be non-privileged and
* non-secured
*/
if (hdev->asic_prop.fw_security_disabled) {
if (!hdev->asic_prop.fw_security_enabled) {
WREG32(mmMME0_SBAB_PROT, 0x2);
WREG32(mmMME0_ACC_PROT, 0x2);
WREG32(mmMME1_SBAB_PROT, 0x2);
@ -13032,11 +13032,12 @@ void gaudi_init_security(struct hl_device *hdev)
WREG32(mmMME2_ACC_PROT, 0x2);
WREG32(mmMME3_SBAB_PROT, 0x2);
WREG32(mmMME3_ACC_PROT, 0x2);
}
/* On RAZWI, 0 will be returned from RR and 0xBABA0BAD from PB */
if (hdev->asic_prop.fw_security_disabled)
/*
* On RAZWI, 0 will be returned from RR and 0xBABA0BAD from PB
*/
WREG32(0xC01B28, 0x1);
}
gaudi_init_range_registers_lbw(hdev);

View File

@ -87,6 +87,7 @@
#define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
#define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
#define GOYA_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
#define GOYA_QMAN0_FENCE_VAL 0xD169B243
@ -354,7 +355,7 @@ static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
int goya_get_fixed_properties(struct hl_device *hdev)
int goya_set_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
int i;
@ -460,8 +461,10 @@ int goya_get_fixed_properties(struct hl_device *hdev)
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
prop->fw_security_status_valid = false;
prop->fw_cpu_boot_dev_sts0_valid = false;
prop->fw_cpu_boot_dev_sts1_valid = false;
prop->hard_reset_done_by_fw = false;
prop->gic_interrupts_enable = true;
return 0;
}
@ -531,10 +534,8 @@ static int goya_init_iatu(struct hl_device *hdev)
struct hl_outbound_pci_region outbound_region;
int rc;
if (hdev->asic_prop.iatu_done_by_fw) {
hdev->asic_funcs->set_dma_mask_from_fw(hdev);
if (hdev->asic_prop.iatu_done_by_fw)
return 0;
}
/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
inbound_region.mode = PCI_BAR_MATCH_MODE;
@ -586,7 +587,7 @@ static int goya_early_init(struct hl_device *hdev)
u32 fw_boot_status, val;
int rc;
rc = goya_get_fixed_properties(hdev);
rc = goya_set_fixed_properties(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to get fixed properties\n");
return rc;
@ -618,7 +619,7 @@ static int goya_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
/* If FW security is enabled at this point it means no access to ELBI */
if (!hdev->asic_prop.fw_security_disabled) {
if (hdev->asic_prop.fw_security_enabled) {
hdev->asic_prop.iatu_done_by_fw = true;
goto pci_init;
}
@ -642,8 +643,10 @@ pci_init:
* version to determine whether we run with a security-enabled firmware
*/
rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
mmCPU_BOOT_DEV_STS0,
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
mmCPU_BOOT_ERR1,
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
if (rc) {
if (hdev->reset_on_preboot_fail)
hdev->asic_funcs->hw_fini(hdev, true);
@ -723,7 +726,15 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
int rc;
if (hdev->asic_prop.fw_security_disabled) {
if (hdev->asic_prop.fw_security_enabled) {
rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
pll_freq_arr);
if (rc)
return;
freq = pll_freq_arr[1];
} else {
div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
nr = RREG32(mmPSOC_PCI_PLL_NR);
@ -750,14 +761,6 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
div_sel);
freq = 0;
}
} else {
rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
pll_freq_arr);
if (rc)
return;
freq = pll_freq_arr[1];
}
prop->psoc_timestamp_frequency = freq;
@ -849,6 +852,39 @@ void goya_late_fini(struct hl_device *hdev)
hdev->hl_chip_info->info = NULL;
}
static void goya_set_pci_memory_regions(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct pci_mem_region *region;
/* CFG */
region = &hdev->pci_mem_region[PCI_REGION_CFG];
region->region_base = CFG_BASE;
region->region_size = CFG_SIZE;
region->offset_in_bar = CFG_BASE - SRAM_BASE_ADDR;
region->bar_size = CFG_BAR_SIZE;
region->bar_id = SRAM_CFG_BAR_ID;
region->used = 1;
/* SRAM */
region = &hdev->pci_mem_region[PCI_REGION_SRAM];
region->region_base = SRAM_BASE_ADDR;
region->region_size = SRAM_SIZE;
region->offset_in_bar = 0;
region->bar_size = CFG_BAR_SIZE;
region->bar_id = SRAM_CFG_BAR_ID;
region->used = 1;
/* DRAM */
region = &hdev->pci_mem_region[PCI_REGION_DRAM];
region->region_base = DRAM_PHYS_BASE;
region->region_size = hdev->asic_prop.dram_size;
region->offset_in_bar = 0;
region->bar_size = prop->dram_pci_bar_size;
region->bar_id = DDR_BAR_ID;
region->used = 1;
}
/*
* goya_sw_init - Goya software initialization code
*
@ -918,6 +954,9 @@ static int goya_sw_init(struct hl_device *hdev)
spin_lock_init(&goya->hw_queues_lock);
hdev->supports_coresight = true;
hdev->supports_soft_reset = true;
hdev->allow_external_soft_reset = true;
goya_set_pci_memory_regions(hdev);
return 0;
@ -1263,8 +1302,11 @@ int goya_init_cpu_queues(struct hl_device *hdev)
}
/* update FW application security bits */
if (prop->fw_security_status_valid)
prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
if (prop->fw_cpu_boot_dev_sts0_valid)
prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
if (prop->fw_cpu_boot_dev_sts1_valid)
prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
goya->hw_cap_initialized |= HW_CAP_CPU_Q;
return 0;
@ -2402,47 +2444,67 @@ static int goya_load_boot_fit_to_device(struct hl_device *hdev)
return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0);
}
/*
* FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
* The version string should be located by that offset.
*/
static int goya_read_device_fw_version(struct hl_device *hdev,
enum hl_fw_component fwc)
static void goya_init_dynamic_firmware_loader(struct hl_device *hdev)
{
const char *name;
u32 ver_off;
char *dest;
struct dynamic_fw_load_mgr *dynamic_loader;
struct cpu_dyn_regs *dyn_regs;
switch (fwc) {
case FW_COMP_UBOOT:
ver_off = RREG32(mmUBOOT_VER_OFFSET);
dest = hdev->asic_prop.uboot_ver;
name = "U-Boot";
break;
case FW_COMP_PREBOOT:
ver_off = RREG32(mmPREBOOT_VER_OFFSET);
dest = hdev->asic_prop.preboot_ver;
name = "Preboot";
break;
default:
dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
return -EIO;
}
dynamic_loader = &hdev->fw_loader.dynamic_loader;
ver_off &= ~((u32)SRAM_BASE_ADDR);
/*
* here we update initial values for few specific dynamic regs (as
* before reading the first descriptor from FW those value has to be
* hard-coded) in later stages of the protocol those values will be
* updated automatically by reading the FW descriptor so data there
* will always be up-to-date
*/
dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
dyn_regs->kmd_msg_to_cpu =
cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
dyn_regs->cpu_cmd_status_to_host =
cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
VERSION_MAX_LEN);
} else {
dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
name, ver_off);
strcpy(dest, "unavailable");
dynamic_loader->wait_for_bl_timeout = GOYA_WAIT_FOR_BL_TIMEOUT_USEC;
}
return -EIO;
}
static void goya_init_static_firmware_loader(struct hl_device *hdev)
{
struct static_fw_load_mgr *static_loader;
return 0;
static_loader = &hdev->fw_loader.static_loader;
static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
}
static void goya_init_firmware_loader(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
/* fill common fields */
fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
fw_loader->boot_fit_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
fw_loader->skip_bmc = false;
fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
fw_loader->dram_bar_id = DDR_BAR_ID;
if (prop->dynamic_fw_load)
goya_init_dynamic_firmware_loader(hdev);
else
goya_init_static_firmware_loader(hdev);
}
static int goya_init_cpu(struct hl_device *hdev)
@ -2466,12 +2528,7 @@ static int goya_init_cpu(struct hl_device *hdev)
return -EIO;
}
rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
mmPSOC_GLOBAL_CONF_UBOOT_MAGIC,
mmCPU_CMD_STATUS_TO_HOST,
mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
false, GOYA_CPU_TIMEOUT_USEC,
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
rc = hl_fw_init_cpu(hdev);
if (rc)
return rc;
@ -2881,7 +2938,7 @@ void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
*dma_handle = hdev->asic_prop.sram_base_address;
base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
base = (__force void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
switch (queue_id) {
case GOYA_QUEUE_ID_MME:
@ -3270,6 +3327,7 @@ already_pinned:
return 0;
unpin_memory:
list_del(&userptr->job_node);
hl_unpin_host_memory(hdev, userptr);
free_userptr:
kfree(userptr);
@ -5169,54 +5227,13 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
}
static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
bool is_hard, u32 asid, u64 va, u64 size)
bool is_hard, u32 flags,
u32 asid, u64 va, u64 size)
{
struct goya_device *goya = hdev->asic_specific;
u32 status, timeout_usec, inv_data, pi;
int rc;
if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
hdev->hard_reset_pending)
return 0;
/* no need in L1 only invalidation in Goya */
if (!is_hard)
return 0;
if (hdev->pldm)
timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
else
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
/*
* TODO: currently invalidate entire L0 & L1 as in regular hard
* invalidation. Need to apply invalidation of specific cache lines with
* mask of ASID & VA & size.
* Note that L1 with be flushed entirely in any case.
/* Treat as invalidate all because there is no range invalidation
* in Goya
*/
/* L0 & L1 invalidation */
inv_data = RREG32(mmSTLB_CACHE_INV);
/* PI is 8 bit */
pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
WREG32(mmSTLB_CACHE_INV,
(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
rc = hl_poll_timeout(
hdev,
mmSTLB_INV_CONSUMER_INDEX,
status,
status == pi,
1000,
timeout_usec);
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
hl_device_reset(hdev, HL_RESET_HARD);
}
return rc;
return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
}
int goya_send_heartbeat(struct hl_device *hdev)
@ -5239,7 +5256,9 @@ int goya_cpucp_info_get(struct hl_device *hdev)
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
mmCPU_BOOT_ERR1);
if (rc)
return rc;
@ -5385,6 +5404,11 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
return hl_fw_get_eeprom_data(hdev, data, max_size);
}
static void goya_cpu_init_scrambler_dram(struct hl_device *hdev)
{
}
static int goya_ctx_init(struct hl_ctx *ctx)
{
if (ctx->asid != HL_KERNEL_ASID_ID)
@ -5565,7 +5589,6 @@ static const struct hl_asic_funcs goya_funcs = {
.ctx_fini = goya_ctx_fini,
.get_clk_rate = goya_get_clk_rate,
.get_queue_id_for_cq = goya_get_queue_id_for_cq,
.read_device_fw_version = goya_read_device_fw_version,
.load_firmware_to_device = goya_load_firmware_to_device,
.load_boot_fit_to_device = goya_load_boot_fit_to_device,
.get_signal_cb_size = goya_get_signal_cb_size,
@ -5584,7 +5607,9 @@ static const struct hl_asic_funcs goya_funcs = {
.get_hw_block_id = goya_get_hw_block_id,
.hw_block_mmap = goya_block_mmap,
.enable_events_from_fw = goya_enable_events_from_fw,
.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx
.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
.init_firmware_loader = goya_init_firmware_loader,
.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram
};
/*

View File

@ -168,7 +168,7 @@ struct goya_device {
u8 device_cpu_mmu_mappings_done;
};
int goya_get_fixed_properties(struct hl_device *hdev);
int goya_set_fixed_properties(struct hl_device *hdev);
int goya_mmu_init(struct hl_device *hdev);
void goya_init_dma_qmans(struct hl_device *hdev);
void goya_init_mme_qmans(struct hl_device *hdev);

View File

@ -434,7 +434,7 @@ static int goya_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
WREG32(mmPSOC_ETR_MODE, input->sink_mode);
if (hdev->asic_prop.fw_security_disabled) {
if (!hdev->asic_prop.fw_security_enabled) {
/* make ETR not privileged */
val = FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
/* make ETR non-secured (inverted logic) */

View File

@ -84,6 +84,20 @@ struct hl_eq_sm_sei_data {
__u8 pad[3];
};
enum hl_fw_alive_severity {
FW_ALIVE_SEVERITY_MINOR,
FW_ALIVE_SEVERITY_CRITICAL
};
struct hl_eq_fw_alive {
__le64 uptime_seconds;
__le32 process_id;
__le32 thread_id;
/* enum hl_fw_alive_severity */
__u8 severity;
__u8 pad[7];
};
struct hl_eq_entry {
struct hl_eq_header hdr;
union {
@ -91,6 +105,7 @@ struct hl_eq_entry {
struct hl_eq_hbm_ecc_data hbm_ecc_data;
struct hl_eq_sm_sei_data sm_sei_data;
struct cpucp_pkt_sync_err pkt_sync_err;
struct hl_eq_fw_alive fw_alive;
__le64 data[7];
};
};
@ -103,11 +118,16 @@ struct hl_eq_entry {
#define EQ_CTL_EVENT_TYPE_SHIFT 16
#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000
#define EQ_CTL_INDEX_SHIFT 0
#define EQ_CTL_INDEX_MASK 0x0000FFFF
enum pq_init_status {
PQ_INIT_STATUS_NA = 0,
PQ_INIT_STATUS_READY_FOR_CP,
PQ_INIT_STATUS_READY_FOR_HOST,
PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI
PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI,
PQ_INIT_STATUS_LEN_NOT_POWER_OF_TWO_ERR,
PQ_INIT_STATUS_ILLEGAL_Q_ADDR_ERR
};
/*
@ -384,6 +404,20 @@ enum cpucp_packet_id {
#define CPUCP_PKT_RES_PLL_OUT3_SHIFT 48
#define CPUCP_PKT_RES_PLL_OUT3_MASK 0xFFFF000000000000ull
#define CPUCP_PKT_VAL_PFC_IN1_SHIFT 0
#define CPUCP_PKT_VAL_PFC_IN1_MASK 0x0000000000000001ull
#define CPUCP_PKT_VAL_PFC_IN2_SHIFT 1
#define CPUCP_PKT_VAL_PFC_IN2_MASK 0x000000000000001Eull
#define CPUCP_PKT_VAL_LPBK_IN1_SHIFT 0
#define CPUCP_PKT_VAL_LPBK_IN1_MASK 0x0000000000000001ull
#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1
#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull
/* heartbeat status bits */
#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0
#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001
struct cpucp_packet {
union {
__le64 value; /* For SET packets */
@ -425,6 +459,12 @@ struct cpucp_packet {
/* For get CpuCP info/EEPROM data/NIC info */
__le32 data_max_size;
/*
* For any general status bitmask. Shall be used whenever the
* result cannot be used to hold general purpose data.
*/
__le32 status_mask;
};
__le32 reserved;
@ -629,6 +669,8 @@ struct cpucp_security_info {
* @card_name: card name that will be displayed in HWMON subsystem on the host
* @sec_info: security information
* @pll_map: Bit map of supported PLLs for current ASIC version.
* @mme_binning_mask: MME binning mask,
* (0 = functional, 1 = binned)
*/
struct cpucp_info {
struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
@ -651,6 +693,7 @@ struct cpucp_info {
struct cpucp_security_info sec_info;
__le32 reserved6;
__u8 pll_map[PLL_MAP_LEN];
__le64 mme_binning_mask;
};
struct cpucp_mac_addr {

View File

@ -8,7 +8,7 @@
#ifndef HL_BOOT_IF_H
#define HL_BOOT_IF_H
#define LKD_HARD_RESET_MAGIC 0xED7BD694
#define LKD_HARD_RESET_MAGIC 0xED7BD694 /* deprecated - do not use */
#define HL_POWER9_HOST_MAGIC 0x1DA30009
#define BOOT_FIT_SRAM_OFFSET 0x200000
@ -99,6 +99,7 @@
#define CPU_BOOT_ERR0_PLL_FAIL (1 << 12)
#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << 13)
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
#define CPU_BOOT_ERR1_ENABLED (1 << 31)
/*
* BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers
@ -190,6 +191,24 @@
* PLLs.
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from
* previleged entity. FW sets this status
* bit for host. If this bit is set then
* GIC can not be accessed from host.
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_EQ_INDEX_EN Event Queue (EQ) index is a running
* index for each new event sent to host.
* This is used as a method in host to
* identify that the waiting event in
* queue is actually a new event which
* was not served before.
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to
* prevent IRQs overriding each other.
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
* This is a main indication that the
* running FW populates the device status
@ -218,7 +237,11 @@
#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16)
#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17)
#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19)
#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << 20)
#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << 21)
#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << 22)
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
#define CPU_BOOT_DEV_STS1_ENABLED (1 << 31)
enum cpu_boot_status {
CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */
@ -264,46 +287,98 @@ enum cpu_msg_status {
/* communication registers mapping - consider ABI when changing */
struct cpu_dyn_regs {
uint32_t cpu_pq_base_addr_low;
uint32_t cpu_pq_base_addr_high;
uint32_t cpu_pq_length;
uint32_t cpu_pq_init_status;
uint32_t cpu_eq_base_addr_low;
uint32_t cpu_eq_base_addr_high;
uint32_t cpu_eq_length;
uint32_t cpu_eq_ci;
uint32_t cpu_cq_base_addr_low;
uint32_t cpu_cq_base_addr_high;
uint32_t cpu_cq_length;
uint32_t cpu_pf_pq_pi;
uint32_t cpu_boot_dev_sts0;
uint32_t cpu_boot_dev_sts1;
uint32_t cpu_boot_err0;
uint32_t cpu_boot_err1;
uint32_t cpu_boot_status;
uint32_t fw_upd_sts;
uint32_t fw_upd_cmd;
uint32_t fw_upd_pending_sts;
uint32_t fuse_ver_offset;
uint32_t preboot_ver_offset;
uint32_t uboot_ver_offset;
uint32_t hw_state;
uint32_t kmd_msg_to_cpu;
uint32_t cpu_cmd_status_to_host;
uint32_t reserved1[32]; /* reserve for future use */
__le32 cpu_pq_base_addr_low;
__le32 cpu_pq_base_addr_high;
__le32 cpu_pq_length;
__le32 cpu_pq_init_status;
__le32 cpu_eq_base_addr_low;
__le32 cpu_eq_base_addr_high;
__le32 cpu_eq_length;
__le32 cpu_eq_ci;
__le32 cpu_cq_base_addr_low;
__le32 cpu_cq_base_addr_high;
__le32 cpu_cq_length;
__le32 cpu_pf_pq_pi;
__le32 cpu_boot_dev_sts0;
__le32 cpu_boot_dev_sts1;
__le32 cpu_boot_err0;
__le32 cpu_boot_err1;
__le32 cpu_boot_status;
__le32 fw_upd_sts;
__le32 fw_upd_cmd;
__le32 fw_upd_pending_sts;
__le32 fuse_ver_offset;
__le32 preboot_ver_offset;
__le32 uboot_ver_offset;
__le32 hw_state;
__le32 kmd_msg_to_cpu;
__le32 cpu_cmd_status_to_host;
union {
__le32 gic_host_irq_ctrl;
__le32 gic_host_pi_upd_irq;
};
__le32 gic_tpc_qm_irq_ctrl;
__le32 gic_mme_qm_irq_ctrl;
__le32 gic_dma_qm_irq_ctrl;
__le32 gic_nic_qm_irq_ctrl;
__le32 gic_dma_core_irq_ctrl;
__le32 gic_host_halt_irq;
__le32 gic_host_ints_irq;
__le32 reserved1[24]; /* reserve for future use */
};
/* TODO: remove the desc magic after the code is updated to use message */
/* HCDM - Habana Communications Descriptor Magic */
#define HL_COMMS_DESC_MAGIC 0x4843444D
#define HL_COMMS_DESC_VER 1
/* HCMv - Habana Communications Message + header version */
#define HL_COMMS_MSG_MAGIC_VALUE 0x48434D00
#define HL_COMMS_MSG_MAGIC_MASK 0xFFFFFF00
#define HL_COMMS_MSG_MAGIC_VER_MASK 0xFF
#define HL_COMMS_MSG_MAGIC_VER(ver) (HL_COMMS_MSG_MAGIC_VALUE | \
((ver) & HL_COMMS_MSG_MAGIC_VER_MASK))
#define HL_COMMS_MSG_MAGIC_V0 HL_COMMS_DESC_MAGIC
#define HL_COMMS_MSG_MAGIC_V1 HL_COMMS_MSG_MAGIC_VER(1)
#define HL_COMMS_MSG_MAGIC HL_COMMS_MSG_MAGIC_V1
#define HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC(magic) \
(((magic) & HL_COMMS_MSG_MAGIC_MASK) == \
HL_COMMS_MSG_MAGIC_VALUE)
#define HL_COMMS_MSG_MAGIC_VALIDATE_VERSION(magic, ver) \
(((magic) & HL_COMMS_MSG_MAGIC_VER_MASK) >= \
((ver) & HL_COMMS_MSG_MAGIC_VER_MASK))
#define HL_COMMS_MSG_MAGIC_VALIDATE(magic, ver) \
(HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC((magic)) && \
HL_COMMS_MSG_MAGIC_VALIDATE_VERSION((magic), (ver)))
enum comms_msg_type {
HL_COMMS_DESC_TYPE = 0,
HL_COMMS_RESET_CAUSE_TYPE = 1,
};
/* TODO: remove this struct after the code is updated to use message */
/* this is the comms descriptor header - meta data */
struct comms_desc_header {
uint32_t magic; /* magic for validation */
uint32_t crc32; /* CRC32 of the descriptor w/o header */
uint16_t size; /* size of the descriptor w/o header */
uint8_t version; /* descriptor version */
uint8_t reserved[5]; /* pad to 64 bit */
__le32 magic; /* magic for validation */
__le32 crc32; /* CRC32 of the descriptor w/o header */
__le16 size; /* size of the descriptor w/o header */
__u8 version; /* descriptor version */
__u8 reserved[5]; /* pad to 64 bit */
};
/* this is the comms message header - meta data */
struct comms_msg_header {
__le32 magic; /* magic for validation */
__le32 crc32; /* CRC32 of the message w/o header */
__le16 size; /* size of the message w/o header */
__u8 version; /* message payload version */
__u8 type; /* message type */
__u8 reserved[4]; /* pad to 64 bit */
};
/* this is the main FW descriptor - consider ABI when changing */
@ -314,7 +389,36 @@ struct lkd_fw_comms_desc {
char cur_fw_ver[VERSION_MAX_LEN];
/* can be used for 1 more version w/o ABI change */
char reserved0[VERSION_MAX_LEN];
uint64_t img_addr; /* address for next FW component load */
__le64 img_addr; /* address for next FW component load */
};
enum comms_reset_cause {
HL_RESET_CAUSE_UNKNOWN = 0,
HL_RESET_CAUSE_HEARTBEAT = 1,
HL_RESET_CAUSE_TDR = 2,
};
/* TODO: remove define after struct name is aligned on all projects */
#define lkd_msg_comms lkd_fw_comms_msg
/* this is the comms message descriptor */
struct lkd_fw_comms_msg {
struct comms_msg_header header;
/* union for future expantions of new messages */
union {
struct {
struct cpu_dyn_regs cpu_dyn_regs;
char fuse_ver[VERSION_MAX_LEN];
char cur_fw_ver[VERSION_MAX_LEN];
/* can be used for 1 more version w/o ABI change */
char reserved0[VERSION_MAX_LEN];
/* address for next FW component load */
__le64 img_addr;
};
struct {
__u8 reset_cause;
};
};
};
/*
@ -386,11 +490,11 @@ enum comms_cmd {
struct comms_command {
union { /* bit fields are only for FW use */
struct {
unsigned int size :25; /* 32MB max. */
unsigned int reserved :2;
u32 size :25; /* 32MB max. */
u32 reserved :2;
enum comms_cmd cmd :5; /* 32 commands */
};
unsigned int val;
__le32 val;
};
};
@ -449,11 +553,11 @@ enum comms_ram_types {
struct comms_status {
union { /* bit fields are only for FW use */
struct {
unsigned int offset :26;
unsigned int ram_type :2;
u32 offset :26;
enum comms_ram_types ram_type :2;
enum comms_sts status :4; /* 16 statuses */
};
unsigned int val;
__le32 val;
};
};

View File

@ -252,10 +252,11 @@ enum gaudi_async_event_id {
GAUDI_EVENT_HBM3_SPI_0 = 407,
GAUDI_EVENT_HBM3_SPI_1 = 408,
GAUDI_EVENT_PSOC_GPIO_U16_0 = 421,
GAUDI_EVENT_PI_UPDATE = 484,
GAUDI_EVENT_HALT_MACHINE = 485,
GAUDI_EVENT_INTS_REGISTER = 486,
GAUDI_EVENT_SOFT_RESET = 487,
GAUDI_EVENT_NIC0_CS_DBG_DERR = 483,
GAUDI_EVENT_NIC1_CS_DBG_DERR = 487,
GAUDI_EVENT_NIC2_CS_DBG_DERR = 491,
GAUDI_EVENT_NIC3_CS_DBG_DERR = 495,
GAUDI_EVENT_NIC4_CS_DBG_DERR = 499,
GAUDI_EVENT_RAZWI_OR_ADC = 548,
GAUDI_EVENT_TPC0_QM = 572,
GAUDI_EVENT_TPC1_QM = 573,
@ -303,6 +304,11 @@ enum gaudi_async_event_id {
GAUDI_EVENT_NIC3_QP1 = 619,
GAUDI_EVENT_NIC4_QP0 = 620,
GAUDI_EVENT_NIC4_QP1 = 621,
GAUDI_EVENT_PI_UPDATE = 635,
GAUDI_EVENT_HALT_MACHINE = 636,
GAUDI_EVENT_INTS_REGISTER = 637,
GAUDI_EVENT_SOFT_RESET = 638,
GAUDI_EVENT_FW_ALIVE_S = 645,
GAUDI_EVENT_DEV_RESET_REQ = 646,
GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647,
GAUDI_EVENT_FIX_POWER_ENV_S = 658,

View File

@ -507,23 +507,28 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = {
{ .fc_id = 480, .cpu_id = 329, .valid = 0, .name = "" },
{ .fc_id = 481, .cpu_id = 330, .valid = 0, .name = "" },
{ .fc_id = 482, .cpu_id = 331, .valid = 0, .name = "" },
{ .fc_id = 483, .cpu_id = 332, .valid = 0, .name = "" },
{ .fc_id = 484, .cpu_id = 333, .valid = 1, .name = "PI_UPDATE" },
{ .fc_id = 485, .cpu_id = 334, .valid = 1, .name = "HALT_MACHINE" },
{ .fc_id = 486, .cpu_id = 335, .valid = 1, .name = "INTS_REGISTER" },
{ .fc_id = 487, .cpu_id = 336, .valid = 1, .name = "SOFT_RESET" },
{ .fc_id = 483, .cpu_id = 332, .valid = 1,
.name = "NIC0_CS_DBG_DERR" },
{ .fc_id = 484, .cpu_id = 333, .valid = 0, .name = "" },
{ .fc_id = 485, .cpu_id = 334, .valid = 0, .name = "" },
{ .fc_id = 486, .cpu_id = 335, .valid = 0, .name = "" },
{ .fc_id = 487, .cpu_id = 336, .valid = 1,
.name = "NIC1_CS_DBG_DERR" },
{ .fc_id = 488, .cpu_id = 337, .valid = 0, .name = "" },
{ .fc_id = 489, .cpu_id = 338, .valid = 0, .name = "" },
{ .fc_id = 490, .cpu_id = 339, .valid = 0, .name = "" },
{ .fc_id = 491, .cpu_id = 340, .valid = 0, .name = "" },
{ .fc_id = 491, .cpu_id = 340, .valid = 1,
.name = "NIC2_CS_DBG_DERR" },
{ .fc_id = 492, .cpu_id = 341, .valid = 0, .name = "" },
{ .fc_id = 493, .cpu_id = 342, .valid = 0, .name = "" },
{ .fc_id = 494, .cpu_id = 343, .valid = 0, .name = "" },
{ .fc_id = 495, .cpu_id = 344, .valid = 0, .name = "" },
{ .fc_id = 495, .cpu_id = 344, .valid = 1,
.name = "NIC3_CS_DBG_DERR" },
{ .fc_id = 496, .cpu_id = 345, .valid = 0, .name = "" },
{ .fc_id = 497, .cpu_id = 346, .valid = 0, .name = "" },
{ .fc_id = 498, .cpu_id = 347, .valid = 0, .name = "" },
{ .fc_id = 499, .cpu_id = 348, .valid = 0, .name = "" },
{ .fc_id = 499, .cpu_id = 348, .valid = 1,
.name = "NIC4_CS_DBG_DERR" },
{ .fc_id = 500, .cpu_id = 349, .valid = 0, .name = "" },
{ .fc_id = 501, .cpu_id = 350, .valid = 0, .name = "" },
{ .fc_id = 502, .cpu_id = 351, .valid = 0, .name = "" },
@ -659,17 +664,17 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = {
{ .fc_id = 632, .cpu_id = 481, .valid = 0, .name = "" },
{ .fc_id = 633, .cpu_id = 482, .valid = 0, .name = "" },
{ .fc_id = 634, .cpu_id = 483, .valid = 0, .name = "" },
{ .fc_id = 635, .cpu_id = 484, .valid = 0, .name = "" },
{ .fc_id = 636, .cpu_id = 485, .valid = 0, .name = "" },
{ .fc_id = 637, .cpu_id = 486, .valid = 0, .name = "" },
{ .fc_id = 638, .cpu_id = 487, .valid = 0, .name = "" },
{ .fc_id = 635, .cpu_id = 484, .valid = 1, .name = "PI_UPDATE" },
{ .fc_id = 636, .cpu_id = 485, .valid = 1, .name = "HALT_MACHINE" },
{ .fc_id = 637, .cpu_id = 486, .valid = 1, .name = "INTS_REGISTER" },
{ .fc_id = 638, .cpu_id = 487, .valid = 1, .name = "SOFT_RESET" },
{ .fc_id = 639, .cpu_id = 488, .valid = 0, .name = "" },
{ .fc_id = 640, .cpu_id = 489, .valid = 0, .name = "" },
{ .fc_id = 641, .cpu_id = 490, .valid = 0, .name = "" },
{ .fc_id = 642, .cpu_id = 491, .valid = 0, .name = "" },
{ .fc_id = 643, .cpu_id = 492, .valid = 0, .name = "" },
{ .fc_id = 644, .cpu_id = 493, .valid = 0, .name = "" },
{ .fc_id = 645, .cpu_id = 494, .valid = 0, .name = "" },
{ .fc_id = 645, .cpu_id = 494, .valid = 1, .name = "FW_ALIVE_S" },
{ .fc_id = 646, .cpu_id = 495, .valid = 1, .name = "DEV_RESET_REQ" },
{ .fc_id = 647, .cpu_id = 496, .valid = 1,
.name = "PKT_QUEUE_OUT_SYNC" },

View File

@ -20,6 +20,9 @@
#define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */
#define LINUX_FW_OFFSET 0x800000 /* 8MB in HBM */
/* HBM thermal delta in [Deg] added to composite (CTemp) */
#define HBM_TEMP_ADJUST_COEFF 6
enum gaudi_nic_axi_error {
RXB,
RXE,
@ -27,6 +30,7 @@ enum gaudi_nic_axi_error {
TXE,
QPC_RESP,
NON_AXI_ERR,
TMR,
};
/*
@ -42,6 +46,48 @@ struct eq_nic_sei_event {
__u8 pad[6];
};
/*
* struct gaudi_nic_status - describes the status of a NIC port.
* @port: NIC port index.
* @bad_format_cnt: e.g. CRC.
* @responder_out_of_sequence_psn_cnt: e.g NAK.
* @high_ber_reinit_cnt: link reinit due to high BER.
* @correctable_err_cnt: e.g. bit-flip.
* @uncorrectable_err_cnt: e.g. MAC errors.
* @retraining_cnt: re-training counter.
* @up: is port up.
* @pcs_link: has PCS link.
* @phy_ready: is PHY ready.
* @auto_neg: is Autoneg enabled.
* @timeout_retransmission_cnt: timeout retransmission events
* @high_ber_cnt: high ber events
*/
struct gaudi_nic_status {
__u32 port;
__u32 bad_format_cnt;
__u32 responder_out_of_sequence_psn_cnt;
__u32 high_ber_reinit;
__u32 correctable_err_cnt;
__u32 uncorrectable_err_cnt;
__u32 retraining_cnt;
__u8 up;
__u8 pcs_link;
__u8 phy_ready;
__u8 auto_neg;
__u32 timeout_retransmission_cnt;
__u32 high_ber_cnt;
};
struct gaudi_flops_2_data {
union {
struct {
__u32 spsram_init_done : 1;
__u32 reserved : 31;
};
__u32 data;
};
};
#define GAUDI_PLL_FREQ_LOW 200000000 /* 200 MHz */
#endif /* GAUDI_FW_IF_H */

View File

@ -66,7 +66,8 @@
#define PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK (\
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_MASK, 0xF)) | \
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_MASK, 0xF)) | \
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0xF)))
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0xF)) | \
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_ARB_STOP_ON_ERR_MASK, 0x1)))
#define HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK (\
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_MASK, 0xF)) | \
@ -76,7 +77,8 @@
#define HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK (\
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_MASK, 0xF)) | \
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_MASK, 0x1F)) | \
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0x1F)))
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0x1F)) | \
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_ARB_STOP_ON_ERR_MASK, 0x1)))
#define TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK (\
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_MASK, 0xF)) | \
@ -86,7 +88,8 @@
#define TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK (\
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_MASK, 0xF)) | \
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_MASK, 0x1F)) | \
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0x1F)))
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0x1F)) | \
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_ARB_STOP_ON_ERR_MASK, 0x1)))
#define MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK (\
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_MASK, 0xF)) | \
@ -96,7 +99,8 @@
#define MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK (\
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_MASK, 0xF)) | \
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_MASK, 0x1F)) | \
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0x1F)))
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0x1F)) | \
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_ARB_STOP_ON_ERR_MASK, 0x1)))
#define NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK (\
(FIELD_PREP(NIC0_QM0_GLBL_ERR_CFG_PQF_ERR_MSG_EN_MASK, 0xF)) | \
@ -106,7 +110,8 @@
#define NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK (\
(FIELD_PREP(NIC0_QM0_GLBL_ERR_CFG_PQF_STOP_ON_ERR_MASK, 0xF)) | \
(FIELD_PREP(NIC0_QM0_GLBL_ERR_CFG_CQF_STOP_ON_ERR_MASK, 0xF)) | \
(FIELD_PREP(NIC0_QM0_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0xF)))
(FIELD_PREP(NIC0_QM0_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0xF)) | \
(FIELD_PREP(NIC0_QM0_GLBL_ERR_CFG_ARB_STOP_ON_ERR_MASK, 0x1)))
#define QMAN_CGM1_PWR_GATE_EN (FIELD_PREP(DMA0_QM_CGM_CFG1_MASK_TH_MASK, 0xA))

View File

@ -12,6 +12,16 @@
* PSOC scratch-pad registers
*/
#define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
/* TODO: remove mmGIC_HOST_IRQ_CTRL_POLL_REG */
#define mmGIC_HOST_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
#define mmGIC_HOST_PI_UPD_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
#define mmGIC_TPC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
#define mmGIC_MME_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
#define mmGIC_DMA_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
#define mmGIC_NIC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
#define mmGIC_DMA_CR_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
#define mmGIC_HOST_HALT_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
#define mmGIC_HOST_INTS_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
#define mmCPU_BOOT_DEV_STS0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_20
#define mmCPU_BOOT_DEV_STS1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_21
#define mmFUSE_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_22

View File

@ -313,6 +313,7 @@ enum hl_device_status {
* HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore
* HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption
* HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
* HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
@ -331,6 +332,7 @@ enum hl_device_status {
#define HL_INFO_TOTAL_ENERGY 15
#define HL_INFO_PLL_FREQUENCY 16
#define HL_INFO_POWER 17
#define HL_INFO_OPEN_STATS 18
#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
@ -444,6 +446,16 @@ struct hl_pll_frequency_info {
__u16 output[HL_PLL_NUM_OUTPUTS];
};
/**
* struct hl_open_stats_info - device open statistics information
* @open_counter: ever growing counter, increased on each successful dev open
* @last_open_period_ms: duration (ms) device was open last time
*/
struct hl_open_stats_info {
__u64 open_counter;
__u64 last_open_period_ms;
};
/**
* struct hl_power_info - power information
* @power: power consumption
@ -664,6 +676,7 @@ struct hl_cs_chunk {
#define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80
#define HL_CS_FLAGS_STAGED_SUBMISSION_LAST 0x100
#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200
#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400
#define HL_CS_STATUS_SUCCESS 0