mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-26 05:34:13 +08:00
EDAC, amd64: Define and register UMC error decode function
How we need to decode UMC errors is different from how we decode bus errors, so let's define a new function for this. We also need a way to determine the UMC channel since we're not guaranteed that there is a fixed relation between channel and MCA bank. Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com> Cc: Aravind Gopalakrishnan <aravindksg.lkml@gmail.com> Cc: linux-edac <linux-edac@vger.kernel.org> Cc: x86-ml <x86@kernel.org> Link: http://lkml.kernel.org/r/1480359593-80369-1-git-send-email-Yazen.Ghannam@amd.com [ Fold in decode_synd_reg(), simplify. ] Signed-off-by: Borislav Petkov <bp@suse.de>
This commit is contained in:
parent
d27f3a348e
commit
713ad54675
@ -2392,7 +2392,13 @@ static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
|
|||||||
string = "Failed to map error addr to a csrow";
|
string = "Failed to map error addr to a csrow";
|
||||||
break;
|
break;
|
||||||
case ERR_CHANNEL:
|
case ERR_CHANNEL:
|
||||||
string = "unknown syndrome - possible error reporting race";
|
string = "Unknown syndrome - possible error reporting race";
|
||||||
|
break;
|
||||||
|
case ERR_SYND:
|
||||||
|
string = "MCA_SYND not valid - unknown syndrome and csrow";
|
||||||
|
break;
|
||||||
|
case ERR_NORM_ADDR:
|
||||||
|
string = "Cannot decode normalized address";
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
string = "WTF error";
|
string = "WTF error";
|
||||||
@ -2441,6 +2447,76 @@ static inline void decode_bus_error(int node_id, struct mce *m)
|
|||||||
__log_ecc_error(mci, &err, ecc_type);
|
__log_ecc_error(mci, &err, ecc_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To find the UMC channel represented by this bank we need to match on its
|
||||||
|
* instance_id. The instance_id of a bank is held in the lower 32 bits of its
|
||||||
|
* IPID.
|
||||||
|
*/
|
||||||
|
static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
|
||||||
|
{
|
||||||
|
u32 umc_instance_id[] = {0x50f00, 0x150f00};
|
||||||
|
u32 instance_id = m->ipid & GENMASK(31, 0);
|
||||||
|
int i, channel = -1;
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
|
||||||
|
if (umc_instance_id[i] == instance_id)
|
||||||
|
channel = i;
|
||||||
|
|
||||||
|
return channel;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void decode_umc_error(int node_id, struct mce *m)
|
||||||
|
{
|
||||||
|
u8 ecc_type = (m->status >> 45) & 0x3;
|
||||||
|
struct mem_ctl_info *mci;
|
||||||
|
struct amd64_pvt *pvt;
|
||||||
|
struct err_info err;
|
||||||
|
u64 sys_addr;
|
||||||
|
|
||||||
|
mci = edac_mc_find(node_id);
|
||||||
|
if (!mci)
|
||||||
|
return;
|
||||||
|
|
||||||
|
pvt = mci->pvt_info;
|
||||||
|
|
||||||
|
memset(&err, 0, sizeof(err));
|
||||||
|
|
||||||
|
if (m->status & MCI_STATUS_DEFERRED)
|
||||||
|
ecc_type = 3;
|
||||||
|
|
||||||
|
err.channel = find_umc_channel(pvt, m);
|
||||||
|
if (err.channel < 0) {
|
||||||
|
err.err_code = ERR_CHANNEL;
|
||||||
|
goto log_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
|
||||||
|
err.err_code = ERR_NORM_ADDR;
|
||||||
|
goto log_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
error_address_to_page_and_offset(sys_addr, &err);
|
||||||
|
|
||||||
|
if (!(m->status & MCI_STATUS_SYNDV)) {
|
||||||
|
err.err_code = ERR_SYND;
|
||||||
|
goto log_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc_type == 2) {
|
||||||
|
u8 length = (m->synd >> 18) & 0x3f;
|
||||||
|
|
||||||
|
if (length)
|
||||||
|
err.syndrome = (m->synd >> 32) & GENMASK(length - 1, 0);
|
||||||
|
else
|
||||||
|
err.err_code = ERR_CHANNEL;
|
||||||
|
}
|
||||||
|
|
||||||
|
err.csrow = m->synd & 0x7;
|
||||||
|
|
||||||
|
log_error:
|
||||||
|
__log_ecc_error(mci, &err, ecc_type);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use pvt->F3 which contains the F3 CPU PCI device to get the related
|
* Use pvt->F3 which contains the F3 CPU PCI device to get the related
|
||||||
* F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
|
* F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
|
||||||
@ -3232,7 +3308,10 @@ static int init_one_instance(unsigned int nid)
|
|||||||
if (report_gart_errors)
|
if (report_gart_errors)
|
||||||
amd_report_gart_errors(true);
|
amd_report_gart_errors(true);
|
||||||
|
|
||||||
amd_register_ecc_decoder(decode_bus_error);
|
if (pvt->umc)
|
||||||
|
amd_register_ecc_decoder(decode_umc_error);
|
||||||
|
else
|
||||||
|
amd_register_ecc_decoder(decode_bus_error);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -3323,7 +3402,11 @@ static void remove_one_instance(unsigned int nid)
|
|||||||
|
|
||||||
/* unregister from EDAC MCE */
|
/* unregister from EDAC MCE */
|
||||||
amd_report_gart_errors(false);
|
amd_report_gart_errors(false);
|
||||||
amd_unregister_ecc_decoder(decode_bus_error);
|
|
||||||
|
if (pvt->umc)
|
||||||
|
amd_unregister_ecc_decoder(decode_umc_error);
|
||||||
|
else
|
||||||
|
amd_unregister_ecc_decoder(decode_bus_error);
|
||||||
|
|
||||||
kfree(ecc_stngs[nid]);
|
kfree(ecc_stngs[nid]);
|
||||||
ecc_stngs[nid] = NULL;
|
ecc_stngs[nid] = NULL;
|
||||||
|
@ -382,6 +382,8 @@ enum err_codes {
|
|||||||
ERR_NODE = -1,
|
ERR_NODE = -1,
|
||||||
ERR_CSROW = -2,
|
ERR_CSROW = -2,
|
||||||
ERR_CHANNEL = -3,
|
ERR_CHANNEL = -3,
|
||||||
|
ERR_SYND = -4,
|
||||||
|
ERR_NORM_ADDR = -5,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct err_info {
|
struct err_info {
|
||||||
|
Loading…
Reference in New Issue
Block a user