mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-24 05:04:00 +08:00
ipmi:msghandler: retry to get device id on an error
We fail to get the BMCS's device id with low probability when loading the ipmi driver and it causes BMC device registration failed. When this issue occurs we got below kernel prints: [Wed Sep 9 19:52:03 2020] ipmi_si IPI0001:00: IPMI message handler: device id demangle failed: -22 [Wed Sep 9 19:52:03 2020] IPMI BT: using default values [Wed Sep 9 19:52:03 2020] IPMI BT: req2rsp=5 secs retries=2 [Wed Sep 9 19:52:03 2020] ipmi_si IPI0001:00: Unable to get the device id: -5 [Wed Sep 9 19:52:04 2020] ipmi_si IPI0001:00: Unable to register device: error -5 When this issue happens, we want to manually unload the driver and try to load it again, but it can't be unloaded by 'rmmod' as it is already 'in use'. We add a print in handle_one_recv_msg(), when this issue happens, the msg we received is "Recv: 1c 01 d5", which means the data_len is 1, data[0] is 0xd5 (completion code), which means "bmc cannot execute command. Command, or request parameter(s), not supported in present state". Debug code: static int handle_one_recv_msg(struct ipmi_smi *intf, struct ipmi_smi_msg *msg) { printk("Recv: %*ph\n", msg->rsp_size, msg->rsp); ... ... } Then in ipmi_demangle_device_id(), it returned '-EINVAL' as 'data_len < 7' and 'data[0] != 0'. We created this patch to retry the get device id when this error happens. We reproduced this issue again and the retry succeed on the first retry, we finally got the correct msg and then all is ok: Recv: 1c 01 00 01 81 05 84 02 af db 07 00 01 00 b9 00 10 00 So use a retry machanism in this patch to give bmc more opportunity to correctly response kernel when we received specific completion codes. Signed-off-by: Xianting Tian <tian.xianting@h3c.com> Message-Id: <20200915071817.4484-1-tian.xianting@h3c.com> [Cleaned up the verbage a bit in the header and prints.] Signed-off-by: Corey Minyard <cminyard@mvista.com>
This commit is contained in:
parent
c2b1e76d8c
commit
f8910ffa81
@ -34,6 +34,7 @@
|
|||||||
#include <linux/uuid.h>
|
#include <linux/uuid.h>
|
||||||
#include <linux/nospec.h>
|
#include <linux/nospec.h>
|
||||||
#include <linux/vmalloc.h>
|
#include <linux/vmalloc.h>
|
||||||
|
#include <linux/delay.h>
|
||||||
|
|
||||||
#define IPMI_DRIVER_VERSION "39.2"
|
#define IPMI_DRIVER_VERSION "39.2"
|
||||||
|
|
||||||
@ -60,6 +61,9 @@ enum ipmi_panic_event_op {
|
|||||||
#else
|
#else
|
||||||
#define IPMI_PANIC_DEFAULT IPMI_SEND_PANIC_EVENT_NONE
|
#define IPMI_PANIC_DEFAULT IPMI_SEND_PANIC_EVENT_NONE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define GET_DEVICE_ID_MAX_RETRY 5
|
||||||
|
|
||||||
static enum ipmi_panic_event_op ipmi_send_panic_event = IPMI_PANIC_DEFAULT;
|
static enum ipmi_panic_event_op ipmi_send_panic_event = IPMI_PANIC_DEFAULT;
|
||||||
|
|
||||||
static int panic_op_write_handler(const char *val,
|
static int panic_op_write_handler(const char *val,
|
||||||
@ -317,6 +321,7 @@ struct bmc_device {
|
|||||||
int dyn_guid_set;
|
int dyn_guid_set;
|
||||||
struct kref usecount;
|
struct kref usecount;
|
||||||
struct work_struct remove_work;
|
struct work_struct remove_work;
|
||||||
|
char cc; /* completion code */
|
||||||
};
|
};
|
||||||
#define to_bmc_device(x) container_of((x), struct bmc_device, pdev.dev)
|
#define to_bmc_device(x) container_of((x), struct bmc_device, pdev.dev)
|
||||||
|
|
||||||
@ -2381,6 +2386,8 @@ static void bmc_device_id_handler(struct ipmi_smi *intf,
|
|||||||
msg->msg.data, msg->msg.data_len, &intf->bmc->fetch_id);
|
msg->msg.data, msg->msg.data_len, &intf->bmc->fetch_id);
|
||||||
if (rv) {
|
if (rv) {
|
||||||
dev_warn(intf->si_dev, "device id demangle failed: %d\n", rv);
|
dev_warn(intf->si_dev, "device id demangle failed: %d\n", rv);
|
||||||
|
/* record completion code when error */
|
||||||
|
intf->bmc->cc = msg->msg.data[0];
|
||||||
intf->bmc->dyn_id_set = 0;
|
intf->bmc->dyn_id_set = 0;
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
@ -2426,19 +2433,34 @@ send_get_device_id_cmd(struct ipmi_smi *intf)
|
|||||||
static int __get_device_id(struct ipmi_smi *intf, struct bmc_device *bmc)
|
static int __get_device_id(struct ipmi_smi *intf, struct bmc_device *bmc)
|
||||||
{
|
{
|
||||||
int rv;
|
int rv;
|
||||||
|
unsigned int retry_count = 0;
|
||||||
bmc->dyn_id_set = 2;
|
|
||||||
|
|
||||||
intf->null_user_handler = bmc_device_id_handler;
|
intf->null_user_handler = bmc_device_id_handler;
|
||||||
|
|
||||||
|
retry:
|
||||||
|
bmc->cc = 0;
|
||||||
|
bmc->dyn_id_set = 2;
|
||||||
|
|
||||||
rv = send_get_device_id_cmd(intf);
|
rv = send_get_device_id_cmd(intf);
|
||||||
if (rv)
|
if (rv)
|
||||||
goto out_reset_handler;
|
goto out_reset_handler;
|
||||||
|
|
||||||
wait_event(intf->waitq, bmc->dyn_id_set != 2);
|
wait_event(intf->waitq, bmc->dyn_id_set != 2);
|
||||||
|
|
||||||
if (!bmc->dyn_id_set)
|
if (!bmc->dyn_id_set) {
|
||||||
|
if ((bmc->cc == IPMI_DEVICE_IN_FW_UPDATE_ERR
|
||||||
|
|| bmc->cc == IPMI_DEVICE_IN_INIT_ERR
|
||||||
|
|| bmc->cc == IPMI_NOT_IN_MY_STATE_ERR)
|
||||||
|
&& ++retry_count <= GET_DEVICE_ID_MAX_RETRY) {
|
||||||
|
msleep(500);
|
||||||
|
dev_warn(intf->si_dev,
|
||||||
|
"BMC returned 0x%2.2x, retry get bmc device id\n",
|
||||||
|
bmc->cc);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
rv = -EIO; /* Something went wrong in the fetch. */
|
rv = -EIO; /* Something went wrong in the fetch. */
|
||||||
|
}
|
||||||
|
|
||||||
/* dyn_id_set makes the id data available. */
|
/* dyn_id_set makes the id data available. */
|
||||||
smp_rmb();
|
smp_rmb();
|
||||||
@ -3246,7 +3268,6 @@ channel_handler(struct ipmi_smi *intf, struct ipmi_recv_msg *msg)
|
|||||||
/* It's the one we want */
|
/* It's the one we want */
|
||||||
if (msg->msg.data[0] != 0) {
|
if (msg->msg.data[0] != 0) {
|
||||||
/* Got an error from the channel, just go on. */
|
/* Got an error from the channel, just go on. */
|
||||||
|
|
||||||
if (msg->msg.data[0] == IPMI_INVALID_COMMAND_ERR) {
|
if (msg->msg.data[0] == IPMI_INVALID_COMMAND_ERR) {
|
||||||
/*
|
/*
|
||||||
* If the MC does not support this
|
* If the MC does not support this
|
||||||
|
@ -69,6 +69,8 @@
|
|||||||
#define IPMI_ERR_MSG_TRUNCATED 0xc6
|
#define IPMI_ERR_MSG_TRUNCATED 0xc6
|
||||||
#define IPMI_REQ_LEN_INVALID_ERR 0xc7
|
#define IPMI_REQ_LEN_INVALID_ERR 0xc7
|
||||||
#define IPMI_REQ_LEN_EXCEEDED_ERR 0xc8
|
#define IPMI_REQ_LEN_EXCEEDED_ERR 0xc8
|
||||||
|
#define IPMI_DEVICE_IN_FW_UPDATE_ERR 0xd1
|
||||||
|
#define IPMI_DEVICE_IN_INIT_ERR 0xd2
|
||||||
#define IPMI_NOT_IN_MY_STATE_ERR 0xd5 /* IPMI 2.0 */
|
#define IPMI_NOT_IN_MY_STATE_ERR 0xd5 /* IPMI 2.0 */
|
||||||
#define IPMI_LOST_ARBITRATION_ERR 0x81
|
#define IPMI_LOST_ARBITRATION_ERR 0x81
|
||||||
#define IPMI_BUS_ERR 0x82
|
#define IPMI_BUS_ERR 0x82
|
||||||
|
Loading…
Reference in New Issue
Block a user