bnxt_en: Discover firmware error recovery capabilities.

Call the new firmware API HWRM_ERROR_RECOVERY_QCFG if it is supported
to discover the firmware health and recovery capabilities and settings.
This feature allows the driver to reset the chip if firmware crashes and
becomes unresponsive.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Michael Chan 2019-08-29 23:54:53 -04:00 committed by David S. Miller
parent ec5d31e3c1
commit 07f83d72d2
2 changed files with 115 additions and 0 deletions

View File

@ -6847,6 +6847,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
bp->fw_cap |= BNXT_FW_CAP_PCIE_STATS_SUPPORTED;
if (flags & FUNC_QCAPS_RESP_FLAGS_EXT_STATS_SUPPORTED)
bp->fw_cap |= BNXT_FW_CAP_EXT_STATS_SUPPORTED;
if (flags & FUNC_QCAPS_RESP_FLAGS_ERROR_RECOVERY_CAPABLE)
bp->fw_cap |= BNXT_FW_CAP_ERROR_RECOVERY;
bp->tx_push_thresh = 0;
if (flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED)
@ -6948,6 +6950,74 @@ hwrm_cfa_adv_qcaps_exit:
return rc;
}
static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
{
struct hwrm_error_recovery_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
struct bnxt_fw_health *fw_health = bp->fw_health;
struct hwrm_error_recovery_qcfg_input req = {0};
int rc, i;
if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
return 0;
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_ERROR_RECOVERY_QCFG, -1, -1);
mutex_lock(&bp->hwrm_cmd_lock);
rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
goto err_recovery_out;
if (!fw_health) {
fw_health = kzalloc(sizeof(*fw_health), GFP_KERNEL);
bp->fw_health = fw_health;
if (!fw_health) {
rc = -ENOMEM;
goto err_recovery_out;
}
}
fw_health->flags = le32_to_cpu(resp->flags);
if ((fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) &&
!(bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL)) {
rc = -EINVAL;
goto err_recovery_out;
}
fw_health->polling_dsecs = le32_to_cpu(resp->driver_polling_freq);
fw_health->master_func_wait_dsecs =
le32_to_cpu(resp->master_func_wait_period);
fw_health->normal_func_wait_dsecs =
le32_to_cpu(resp->normal_func_wait_period);
fw_health->post_reset_wait_dsecs =
le32_to_cpu(resp->master_func_wait_period_after_reset);
fw_health->post_reset_max_wait_dsecs =
le32_to_cpu(resp->max_bailout_time_after_reset);
fw_health->regs[BNXT_FW_HEALTH_REG] =
le32_to_cpu(resp->fw_health_status_reg);
fw_health->regs[BNXT_FW_HEARTBEAT_REG] =
le32_to_cpu(resp->fw_heartbeat_reg);
fw_health->regs[BNXT_FW_RESET_CNT_REG] =
le32_to_cpu(resp->fw_reset_cnt_reg);
fw_health->regs[BNXT_FW_RESET_INPROG_REG] =
le32_to_cpu(resp->reset_inprogress_reg);
fw_health->fw_reset_inprog_reg_mask =
le32_to_cpu(resp->reset_inprogress_reg_mask);
fw_health->fw_reset_seq_cnt = resp->reg_array_cnt;
if (fw_health->fw_reset_seq_cnt >= 16) {
rc = -EINVAL;
goto err_recovery_out;
}
for (i = 0; i < fw_health->fw_reset_seq_cnt; i++) {
fw_health->fw_reset_seq_regs[i] =
le32_to_cpu(resp->reset_reg[i]);
fw_health->fw_reset_seq_vals[i] =
le32_to_cpu(resp->reset_reg_val[i]);
fw_health->fw_reset_seq_delay_msec[i] =
resp->delay_after_reset[i];
}
err_recovery_out:
mutex_unlock(&bp->hwrm_cmd_lock);
if (rc)
bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
return rc;
}
static int bnxt_hwrm_func_reset(struct bnxt *bp)
{
struct hwrm_func_reset_input req = {0};
@ -10058,6 +10128,11 @@ static int bnxt_fw_init_one_p2(struct bnxt *bp)
netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
rc);
rc = bnxt_hwrm_error_recovery_qcfg(bp);
if (rc)
netdev_warn(bp->dev, "hwrm query error recovery failure rc: %d\n",
rc);
rc = bnxt_hwrm_func_drv_rgtr(bp);
if (rc)
return -ENODEV;
@ -11238,6 +11313,8 @@ init_err_pci_clean:
bnxt_free_ctx_mem(bp);
kfree(bp->ctx);
bp->ctx = NULL;
kfree(bp->fw_health);
bp->fw_health = NULL;
bnxt_cleanup_pci(bp);
init_err_free:

View File

@ -1333,6 +1333,41 @@ struct bnxt_ctx_mem_info {
struct bnxt_ctx_pg_info *tqm_mem[9];
};
struct bnxt_fw_health {
u32 flags;
u32 polling_dsecs;
u32 master_func_wait_dsecs;
u32 normal_func_wait_dsecs;
u32 post_reset_wait_dsecs;
u32 post_reset_max_wait_dsecs;
u32 regs[4];
u32 mapped_regs[4];
#define BNXT_FW_HEALTH_REG 0
#define BNXT_FW_HEARTBEAT_REG 1
#define BNXT_FW_RESET_CNT_REG 2
#define BNXT_FW_RESET_INPROG_REG 3
u32 fw_reset_inprog_reg_mask;
u32 last_fw_heartbeat;
u32 last_fw_reset_cnt;
u8 enabled:1;
u8 master:1;
u8 tmr_multiplier;
u8 tmr_counter;
u8 fw_reset_seq_cnt;
u32 fw_reset_seq_regs[16];
u32 fw_reset_seq_vals[16];
u32 fw_reset_seq_delay_msec[16];
};
#define BNXT_FW_HEALTH_REG_TYPE_MASK 3
#define BNXT_FW_HEALTH_REG_TYPE_CFG 0
#define BNXT_FW_HEALTH_REG_TYPE_GRC 1
#define BNXT_FW_HEALTH_REG_TYPE_BAR0 2
#define BNXT_FW_HEALTH_REG_TYPE_BAR1 3
#define BNXT_FW_HEALTH_REG_TYPE(reg) ((reg) & BNXT_FW_HEALTH_REG_TYPE_MASK)
#define BNXT_FW_HEALTH_REG_OFF(reg) ((reg) & ~BNXT_FW_HEALTH_REG_TYPE_MASK)
struct bnxt {
void __iomem *bar0;
void __iomem *bar1;
@ -1581,6 +1616,7 @@ struct bnxt {
#define BNXT_FW_CAP_KONG_MB_CHNL 0x00000080
#define BNXT_FW_CAP_OVS_64BIT_HANDLE 0x00000400
#define BNXT_FW_CAP_TRUSTED_VF 0x00000800
#define BNXT_FW_CAP_ERROR_RECOVERY 0x00002000
#define BNXT_FW_CAP_PKG_VER 0x00004000
#define BNXT_FW_CAP_CFA_ADV_FLOW 0x00008000
#define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX 0x00010000
@ -1666,6 +1702,8 @@ struct bnxt {
#define BNXT_UPDATE_PHY_SP_EVENT 16
#define BNXT_RING_COAL_NOW_SP_EVENT 17
struct bnxt_fw_health *fw_health;
struct bnxt_hw_resc hw_resc;
struct bnxt_pf_info pf;
struct bnxt_ctx_mem_info *ctx;