From f5e886ef9b45a3dbfd42b054a13c755894ea8402 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Sep 2016 13:23:25 -0700 Subject: [PATCH 01/16] x86/MCE: Do not look at panic_on_oops in the severity grading The MCE tolerance levels control whether we panic on a machine check or do something else like generating a signal and logging error information. This is controlled by the mce= command line parameter. However, if panic_on_oops is set, it will force a panic for such an MCE even though the user didn't want to. So don't check panic_on_oops in the severity grading anymore. One of the use cases for that is recovery from uncorrectable errors with mce=2. [ Boris: rewrite commit message. ] Signed-off-by: Yinghai Lu Acked-by: Tony Luck Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/20160916202325.4972-1-yinghai@kernel.org Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce-severity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 631356c8cca4..c7efbcfbeda6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -311,7 +311,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e *msg = s->msg; s->covered = 1; if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { - if (panic_on_oops || tolerant < 1) + if (tolerant < 1) return MCE_PANIC_SEVERITY; } return s->sev; From 8c203dbb78ca7a9aed4e2570c866b0f43c752e41 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 1 Nov 2016 13:04:41 +0100 Subject: [PATCH 02/16] x86/RAS: Add TSC timestamp to the injected MCE The MCE injection code does not provide the time stamp information for the injected MCE. Add it. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20161101120911.13163-3-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/ras/mce_amd_inj.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index 1ac76479c266..8730c2882fff 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -275,6 +275,8 @@ static void do_inject(void) unsigned int cpu = i_mce.extcpu; u8 b = i_mce.bank; + rdtscll(i_mce.tsc); + if (i_mce.misc) i_mce.status |= MCI_STATUS_MISCV; From cd9c57cad3fe89ea949b9266cddc947c0838f7af Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 1 Nov 2016 12:52:27 +0100 Subject: [PATCH 03/16] x86/MCE: Dump MCE to dmesg if no consumers When there are no error record consumers registered with the kernel, the only thing that appears in dmesg is something like: [ 300.000326] mce: [Hardware Error]: Machine check events logged and the error records are gone. Which is seriously counterproductive. So let's dump them to dmesg instead, in such a case. Requested-by: Eric Morton Signed-off-by: Borislav Petkov Cc: Tony Luck Link: http://lkml.kernel.org/r/20161101120911.13163-4-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 52 ++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a7fdf453d895..4ca00474804b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -207,8 +207,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log); static struct notifier_block mce_srao_nb; +static atomic_t num_notifiers; + void mce_register_decode_chain(struct notifier_block *nb) { + atomic_inc(&num_notifiers); + /* Ensure SRAO notifier has the highest priority in the decode chain. */ if (nb != &mce_srao_nb && nb->priority == INT_MAX) nb->priority -= 1; @@ -219,6 +223,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain); void mce_unregister_decode_chain(struct notifier_block *nb) { + atomic_dec(&num_notifiers); + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); } EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); @@ -270,17 +276,17 @@ struct mca_msr_regs msr_ops = { .misc = misc_reg }; -static void print_mce(struct mce *m) +static void __print_mce(struct mce *m) { - int ret = 0; - - pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", - m->extcpu, m->mcgstatus, m->bank, m->status); + pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n", + m->extcpu, + (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""), + m->mcgstatus, m->bank, m->status); if (m->ip) { pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); + m->cs, m->ip); if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); @@ -308,6 +314,13 @@ static void print_mce(struct mce *m) pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, cpu_data(m->extcpu).microcode); +} + +static void print_mce(struct mce *m) +{ + int ret = 0; + + __print_mce(m); /* * Print out human-readable details about the MCE error, @@ -569,6 +582,32 @@ static struct notifier_block mce_srao_nb = { .priority = INT_MAX, }; +static int mce_default_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *m = (struct mce *)data; + + if (!m) + return NOTIFY_DONE; + + /* + * Run the default notifier if we have only the SRAO + * notifier and us registered. + */ + if (atomic_read(&num_notifiers) > 2) + return NOTIFY_DONE; + + __print_mce(m); + + return NOTIFY_DONE; +} + +static struct notifier_block mce_default_nb = { + .notifier_call = mce_default_notifier, + /* lowest prio, we want it to run last. */ + .priority = 0, +}; + /* * Read ADDR and MISC registers. */ @@ -2138,6 +2177,7 @@ int __init mcheck_init(void) { mcheck_intel_therm_init(); mce_register_decode_chain(&mce_srao_nb); + mce_register_decode_chain(&mce_default_nb); mcheck_vendor_init_severity(); INIT_WORK(&mce_work, mce_process_work); From 79349f529ab1a629b9e43e81b4a5b2c22d1e9a65 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 1 Nov 2016 17:33:00 +0100 Subject: [PATCH 04/16] x86/RAS: Simplify SMCA bank descriptor struct Call the struct simply smca_bank, it's instance ID can be simply ->id. Makes the code much more readable. Signed-off-by: Borislav Petkov Tested-by: Yazen Ghannam Link: http://lkml.kernel.org/r/20161103125556.15482-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mce.h | 7 ++++--- arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 9bd7ff5ffbcc..4d97875d9543 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -371,12 +371,13 @@ struct smca_hwid_mcatype { u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ }; -struct smca_bank_info { +struct smca_bank { struct smca_hwid_mcatype *type; - u32 type_instance; + /* Instance ID */ + u32 id; }; -extern struct smca_bank_info smca_banks[MAX_NR_BANKS]; +extern struct smca_bank smca_banks[MAX_NR_BANKS]; #endif diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9b5403462936..ac2f4f2cad18 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -116,7 +116,7 @@ static struct smca_hwid_mcatype smca_hwid_mcatypes[] = { { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 }, }; -struct smca_bank_info smca_banks[MAX_NR_BANKS]; +struct smca_bank smca_banks[MAX_NR_BANKS]; EXPORT_SYMBOL_GPL(smca_banks); /* @@ -150,14 +150,14 @@ static void get_smca_bank_info(unsigned int bank) { unsigned int i, hwid_mcatype, cpu = smp_processor_id(); struct smca_hwid_mcatype *type; - u32 high, instanceId; + u32 high, instance_id; u16 hwid, mcatype; /* Collect bank_info using CPU 0 for now. */ if (cpu) return; - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) { + if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instance_id, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; } @@ -170,7 +170,7 @@ static void get_smca_bank_info(unsigned int bank) type = &smca_hwid_mcatypes[i]; if (hwid_mcatype == type->hwid_mcatype) { smca_banks[bank].type = type; - smca_banks[bank].type_instance = instanceId; + smca_banks[bank].id = instance_id; break; } } @@ -839,7 +839,7 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "%s_%x", smca_bank_names[bank_type].name, - smca_banks[bank].type_instance); + smca_banks[bank].id); return buf_mcatype; } From 1ce9cd7f9f0b71af7c496b816734bc2dc699363a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 2 Nov 2016 12:48:01 +0100 Subject: [PATCH 05/16] x86/RAS: Simplify SMCA HWID descriptor struct Call it simply smca_hwid and call local variables "hwid". More readable. Signed-off-by: Borislav Petkov Tested-by: Yazen Ghannam Link: http://lkml.kernel.org/r/20161103125556.15482-2-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mce.h | 4 ++-- arch/x86/kernel/cpu/mcheck/mce_amd.c | 24 +++++++++--------------- drivers/edac/mce_amd.c | 10 +++++----- 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4d97875d9543..ccc801a0da0f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -365,14 +365,14 @@ extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES]; #define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype) -struct smca_hwid_mcatype { +struct smca_hwid { unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ u32 hwid_mcatype; /* (hwid,mcatype) tuple */ u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ }; struct smca_bank { - struct smca_hwid_mcatype *type; + struct smca_hwid *hwid; /* Instance ID */ u32 id; }; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ac2f4f2cad18..ff81667af2f2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -86,7 +86,7 @@ struct smca_bank_name smca_bank_names[] = { }; EXPORT_SYMBOL_GPL(smca_bank_names); -static struct smca_hwid_mcatype smca_hwid_mcatypes[] = { +static struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype, xec_bitmap } */ /* ZN Core (HWID=0xB0) MCA types */ @@ -142,16 +142,11 @@ static void default_deferred_error_interrupt(void) } void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; -/* - * CPU Initialization - */ - static void get_smca_bank_info(unsigned int bank) { unsigned int i, hwid_mcatype, cpu = smp_processor_id(); - struct smca_hwid_mcatype *type; + struct smca_hwid *s_hwid; u32 high, instance_id; - u16 hwid, mcatype; /* Collect bank_info using CPU 0 for now. */ if (cpu) @@ -162,14 +157,13 @@ static void get_smca_bank_info(unsigned int bank) return; } - hwid = high & MCI_IPID_HWID; - mcatype = (high & MCI_IPID_MCATYPE) >> 16; - hwid_mcatype = HWID_MCATYPE(hwid, mcatype); + hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID, + (high & MCI_IPID_MCATYPE) >> 16); for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { - type = &smca_hwid_mcatypes[i]; - if (hwid_mcatype == type->hwid_mcatype) { - smca_banks[bank].type = type; + s_hwid = &smca_hwid_mcatypes[i]; + if (hwid_mcatype == s_hwid->hwid_mcatype) { + smca_banks[bank].hwid = s_hwid; smca_banks[bank].id = instance_id; break; } @@ -826,10 +820,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return th_names[bank]; } - if (!smca_banks[bank].type) + if (!smca_banks[bank].hwid) return NULL; - bank_type = smca_banks[bank].type->bank_type; + bank_type = smca_banks[bank].hwid->bank_type; if (b && bank_type == SMCA_UMC) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index daaac2c79ca7..8e96c6ddf272 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -851,7 +851,7 @@ static void decode_mc6_mce(struct mce *m) /* Decode errors according to Scalable MCA specification */ static void decode_smca_errors(struct mce *m) { - struct smca_hwid_mcatype *type; + struct smca_hwid *hwid; unsigned int bank_type; const char *ip_name; u8 xec = XEC(m->status, xec_mask); @@ -862,18 +862,18 @@ static void decode_smca_errors(struct mce *m) if (boot_cpu_data.x86 >= 0x17 && m->bank == 4) pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n"); - type = smca_banks[m->bank].type; - if (!type) + hwid = smca_banks[m->bank].hwid; + if (!hwid) return; - bank_type = type->bank_type; + bank_type = hwid->bank_type; ip_name = smca_bank_names[bank_type].long_name; pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); /* Only print the decode of valid error codes */ if (xec < smca_mce_descs[bank_type].num_descs && - (type->xec_bitmap & BIT_ULL(xec))) { + (hwid->xec_bitmap & BIT_ULL(xec))) { pr_emerg(HW_ERR "%s Error: ", ip_name); pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]); } From a9a1c0ee04aa771e5523ae33e458c702261ab547 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 2 Nov 2016 13:24:47 +0100 Subject: [PATCH 06/16] x86/RAS: Rename smca_bank_names to smca_names Make it differ more from struct smca_bank_name for better readability. Signed-off-by: Borislav Petkov Tested-by: Yazen Ghannam Link: http://lkml.kernel.org/r/20161103125556.15482-3-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mce.h | 2 +- arch/x86/kernel/cpu/mcheck/mce_amd.c | 6 +++--- drivers/edac/mce_amd.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ccc801a0da0f..8ffd21596dd7 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -361,7 +361,7 @@ struct smca_bank_name { const char *long_name; /* Long name for pretty-printing */ }; -extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES]; +extern struct smca_bank_name smca_names[N_SMCA_BANK_TYPES]; #define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ff81667af2f2..afeb02b87127 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -69,7 +69,7 @@ static const char * const smca_umc_block_names[] = { "misc_umc" }; -struct smca_bank_name smca_bank_names[] = { +struct smca_bank_name smca_names[] = { [SMCA_LS] = { "load_store", "Load Store Unit" }, [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, @@ -84,7 +84,7 @@ struct smca_bank_name smca_bank_names[] = { [SMCA_PSP] = { "psp", "Platform Security Processor" }, [SMCA_SMU] = { "smu", "System Management Unit" }, }; -EXPORT_SYMBOL_GPL(smca_bank_names); +EXPORT_SYMBOL_GPL(smca_names); static struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype, xec_bitmap } */ @@ -832,7 +832,7 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) } snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, - "%s_%x", smca_bank_names[bank_type].name, + "%s_%x", smca_names[bank_type].name, smca_banks[bank].id); return buf_mcatype; } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 8e96c6ddf272..3dee58583d25 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -867,7 +867,7 @@ static void decode_smca_errors(struct mce *m) return; bank_type = hwid->bank_type; - ip_name = smca_bank_names[bank_type].long_name; + ip_name = smca_names[bank_type].long_name; pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); From c09a8c40e0a0b4994925ac8eba91b85d76f440a3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 3 Nov 2016 21:12:33 +0100 Subject: [PATCH 07/16] x86/RAS: Hide SMCA bank names Add accessor functions and hide the smca_names array. Also, add a sanity-check to bank HWID assignment in get_smca_bank_info(). Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20161104152317.5r276t35df53qk76@pd.tnic Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mce.h | 8 +------ arch/x86/kernel/cpu/mcheck/mce_amd.c | 32 +++++++++++++++++++++++++--- drivers/edac/mce_amd.c | 2 +- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 8ffd21596dd7..748b8da8e627 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -356,13 +356,6 @@ enum smca_bank_types { N_SMCA_BANK_TYPES }; -struct smca_bank_name { - const char *name; /* Short name for sysfs */ - const char *long_name; /* Long name for pretty-printing */ -}; - -extern struct smca_bank_name smca_names[N_SMCA_BANK_TYPES]; - #define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype) struct smca_hwid { @@ -379,6 +372,7 @@ struct smca_bank { extern struct smca_bank smca_banks[MAX_NR_BANKS]; +extern const char *smca_get_long_name(enum smca_bank_types t); #endif #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index afeb02b87127..e68a305ff666 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -69,7 +69,12 @@ static const char * const smca_umc_block_names[] = { "misc_umc" }; -struct smca_bank_name smca_names[] = { +struct smca_bank_name { + const char *name; /* Short name for sysfs */ + const char *long_name; /* Long name for pretty-printing */ +}; + +static struct smca_bank_name smca_names[] = { [SMCA_LS] = { "load_store", "Load Store Unit" }, [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, @@ -84,7 +89,23 @@ struct smca_bank_name smca_names[] = { [SMCA_PSP] = { "psp", "Platform Security Processor" }, [SMCA_SMU] = { "smu", "System Management Unit" }, }; -EXPORT_SYMBOL_GPL(smca_names); + +const char *smca_get_name(enum smca_bank_types t) +{ + if (t >= N_SMCA_BANK_TYPES) + return NULL; + + return smca_names[t].name; +} + +const char *smca_get_long_name(enum smca_bank_types t) +{ + if (t >= N_SMCA_BANK_TYPES) + return NULL; + + return smca_names[t].long_name; +} +EXPORT_SYMBOL_GPL(smca_get_long_name); static struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype, xec_bitmap } */ @@ -163,6 +184,11 @@ static void get_smca_bank_info(unsigned int bank) for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { s_hwid = &smca_hwid_mcatypes[i]; if (hwid_mcatype == s_hwid->hwid_mcatype) { + + WARN(smca_banks[bank].hwid, + "Bank %s already initialized!\n", + smca_get_name(s_hwid->bank_type)); + smca_banks[bank].hwid = s_hwid; smca_banks[bank].id = instance_id; break; @@ -832,7 +858,7 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) } snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, - "%s_%x", smca_names[bank_type].name, + "%s_%x", smca_get_name(bank_type), smca_banks[bank].id); return buf_mcatype; } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 3dee58583d25..80762acd8cc8 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -867,7 +867,7 @@ static void decode_smca_errors(struct mce *m) return; bank_type = hwid->bank_type; - ip_name = smca_names[bank_type].long_name; + ip_name = smca_get_long_name(bank_type); pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); From 54467353a96577f840cd2348981417c559b21b4b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 10 Nov 2016 14:10:53 +0100 Subject: [PATCH 08/16] x86/MCE: Correct TSC timestamping of error records We did have logic in the MCE code which would TSC-timestamp an error record only when it is exact - i.e., when it wasn't detected by polling. This isn't the case anymore. So let's fix that: We have a valid TSC timestamp in the error record only when it has been a precise detection, i.e., either in the #MC handler or in one of the interrupt handlers (thresholding, deferred, ...). All other error records still have mce.time which contains the wall time in order to be able to place the error record in time at least approximately. Also, this fixes another bug where machine_check_poll() would clear mce.tsc unconditionally even if we requested precise MCP_TIMESTAMP logging. The proper fix would be to generate timestamp only when it has been requested and not always. But that would require a more thorough code audit of all mce_gather_info/mce_setup() users. Add a FIXME for now. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Cc: Tony Luck Cc: kernel test robot Cc: linux-edac Cc: lkp@01.org Link: http://lkml.kernel.org/r/20161110131053.kybsijfs5venpjnf@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 16 ++++++++++------ arch/x86/kernel/cpu/mcheck/mce_intel.c | 4 ++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4ca00474804b..aab96f8d52b0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -706,6 +706,15 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_gather_info(&m, NULL); + /* + * m.tsc was set in mce_setup(). Clear it if not requested. + * + * FIXME: Propagate @flags to mce_gather_info/mce_setup() to avoid + * that dance. + */ + if (!(flags & MCP_TIMESTAMP)) + m.tsc = 0; + for (i = 0; i < mca_cfg.banks; i++) { if (!mce_banks[i].ctl || !test_bit(i, *b)) continue; @@ -713,14 +722,12 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) m.misc = 0; m.addr = 0; m.bank = i; - m.tsc = 0; barrier(); m.status = mce_rdmsrl(msr_ops.status(i)); if (!(m.status & MCI_STATUS_VAL)) continue; - /* * Uncorrected or signalled events are handled by the exception * handler when it is enabled, so don't process those here. @@ -735,9 +742,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_read_aux(&m, i); - if (!(flags & MCP_TIMESTAMP)) - m.tsc = 0; - severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) @@ -1394,7 +1398,7 @@ static void mce_timer_fn(unsigned long data) iv = __this_cpu_read(mce_next_interval); if (mce_available(this_cpu_ptr(&cpu_info))) { - machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks)); + machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); if (mce_intel_cmci_poll()) { iv = mce_adjust_timer(iv); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 1defb8ea882c..be0b2fad47c5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -130,7 +130,7 @@ bool mce_intel_cmci_poll(void) * Reset the counter if we've logged an error in the last poll * during the storm. */ - if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned))) + if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned))) this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); else this_cpu_dec(cmci_backoff_cnt); @@ -342,7 +342,7 @@ void cmci_recheck(void) return; local_irq_save(flags); - machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); + machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)); local_irq_restore(flags); } From 859af13a103fc99a9a62064ef8f05f7d9ee58609 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 10 Nov 2016 14:32:35 -0600 Subject: [PATCH 09/16] x86/mce/AMD: Fix HWID_MCATYPE calculation by grouping arguments The calculation of the hwid_mcatype value in get_smca_bank_info() became incorrect after applying the following commit: 1ce9cd7f9f0b ("x86/RAS: Simplify SMCA HWID descriptor struct") This causes the function to not match a bank to its type. Disassembly of hwid_mcatype calculation after change: db: 8b 45 e0 mov -0x20(%rbp),%eax de: 41 89 c4 mov %eax,%r12d e1: 25 00 00 ff 0f and $0xfff0000,%eax e6: 41 c1 ec 10 shr $0x10,%r12d ea: 41 09 c4 or %eax,%r12d Disassembly of hwid_mcatype calculation in original code: 286: 8b 45 d0 mov -0x30(%rbp),%eax 289: 41 89 c5 mov %eax,%r13d 28c: c1 e8 10 shr $0x10,%eax 28f: 41 81 e5 ff 0f 00 00 and $0xfff,%r13d 296: 41 c1 e5 10 shl $0x10,%r13d 29a: 41 09 c5 or %eax,%r13d Grouping the arguments to the HWID_MCATYPE() macro fixes the issue. ( Boris suggested adding parentheses in the macro. ) Signed-off-by: Yazen Ghannam Cc: Aravind Gopalakrishnan Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 748b8da8e627..ecda6a93179f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -356,7 +356,7 @@ enum smca_bank_types { N_SMCA_BANK_TYPES }; -#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype) +#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype)) struct smca_hwid { unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ From 18807ddb7f88d4ac3797302bafb18143d573e66f Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 15 Nov 2016 15:13:53 -0600 Subject: [PATCH 10/16] x86/mce/AMD: Reset Threshold Limit after logging error The error count field in MCA_MISC does not get reset by hardware when the threshold has been reached. Software is expected to reset it. Currently, the threshold limit only gets reset during init or when a user writes to sysfs. If the user is not monitoring threshold interrupts and resetting the limit then the user will only see 1 interrupt when the limit is first hit. So if, for example, the limit is set to 10 then only 1 interrupt will be recorded after 10 errors even if 100 errors have occurred. The user may then assume that only 10 errors have occurred. Signed-off-by: Yazen Ghannam Cc: Aravind Gopalakrishnan Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/1479244433-69267-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index e68a305ff666..5b33702ca86e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -665,6 +665,7 @@ static void amd_threshold_interrupt(void) { u32 low = 0, high = 0, address = 0; unsigned int bank, block, cpu = smp_processor_id(); + struct thresh_restart tr; /* assume first bank caused it */ for (bank = 0; bank < mca_cfg.banks; ++bank) { @@ -701,6 +702,11 @@ static void amd_threshold_interrupt(void) log: __log_error(bank, false, true, ((u64)high << 32) | low); + + /* Reset threshold block after logging error. */ + memset(&tr, 0, sizeof(tr)); + tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block]; + threshold_restart_bank(&tr); } /* From c7993890e772c450764d39ba872444307bbdd460 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 10 Nov 2016 15:10:53 -0600 Subject: [PATCH 11/16] x86/amd_nb: Make amd_northbridges internal to amd_nb.c Hide amd_northbridges in amd_nb.c so that external callers will have to use the exported accessor functions. Also, fix some checkpatch.pl warnings. Signed-off-by: Yazen Ghannam Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/1478812257-5424-2-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/amd_nb.h | 18 +++--------------- arch/x86/kernel/amd_nb.c | 33 +++++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 5e828da2e18f..b442ad75b3b3 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -66,7 +66,6 @@ struct amd_northbridge_info { u64 flags; struct amd_northbridge *nb; }; -extern struct amd_northbridge_info amd_northbridges; #define AMD_NB_GART BIT(0) #define AMD_NB_L3_INDEX_DISABLE BIT(1) @@ -74,20 +73,9 @@ extern struct amd_northbridge_info amd_northbridges; #ifdef CONFIG_AMD_NB -static inline u16 amd_nb_num(void) -{ - return amd_northbridges.num; -} - -static inline bool amd_nb_has_feature(unsigned feature) -{ - return ((amd_northbridges.flags & feature) == feature); -} - -static inline struct amd_northbridge *node_to_amd_nb(int node) -{ - return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; -} +u16 amd_nb_num(void); +bool amd_nb_has_feature(unsigned int feature); +struct amd_northbridge *node_to_amd_nb(int node); static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) { diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 4fdf6230d93c..015328901896 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -44,8 +44,25 @@ const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = { { } }; -struct amd_northbridge_info amd_northbridges; -EXPORT_SYMBOL(amd_northbridges); +static struct amd_northbridge_info amd_northbridges; + +u16 amd_nb_num(void) +{ + return amd_northbridges.num; +} +EXPORT_SYMBOL(amd_nb_num); + +bool amd_nb_has_feature(unsigned int feature) +{ + return ((amd_northbridges.flags & feature) == feature); +} +EXPORT_SYMBOL(amd_nb_has_feature); + +struct amd_northbridge *node_to_amd_nb(int node) +{ + return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; +} +EXPORT_SYMBOL(node_to_amd_nb); static struct pci_dev *next_northbridge(struct pci_dev *dev, const struct pci_device_id *ids) @@ -64,7 +81,7 @@ int amd_cache_northbridges(void) struct amd_northbridge *nb; struct pci_dev *misc, *link; - if (amd_nb_num()) + if (amd_northbridges.num) return 0; misc = NULL; @@ -82,7 +99,7 @@ int amd_cache_northbridges(void) amd_northbridges.num = i; link = misc = NULL; - for (i = 0; i != amd_nb_num(); i++) { + for (i = 0; i != amd_northbridges.num; i++) { node_to_amd_nb(i)->misc = misc = next_northbridge(misc, amd_nb_misc_ids); node_to_amd_nb(i)->link = link = @@ -226,14 +243,14 @@ static void amd_cache_gart(void) if (!amd_nb_has_feature(AMD_NB_GART)) return; - flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL); + flush_words = kmalloc_array(amd_northbridges.num, sizeof(u32), GFP_KERNEL); if (!flush_words) { amd_northbridges.flags &= ~AMD_NB_GART; pr_notice("Cannot initialize GART flush words, GART support disabled\n"); return; } - for (i = 0; i != amd_nb_num(); i++) + for (i = 0; i != amd_northbridges.num; i++) pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, &flush_words[i]); } @@ -252,12 +269,12 @@ void amd_flush_garts(void) that it doesn't matter to serialize more. -AK */ spin_lock_irqsave(&gart_lock, flags); flushed = 0; - for (i = 0; i < amd_nb_num(); i++) { + for (i = 0; i < amd_northbridges.num; i++) { pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c, flush_words[i] | 1); flushed++; } - for (i = 0; i < amd_nb_num(); i++) { + for (i = 0; i < amd_northbridges.num; i++) { u32 w; /* Make sure the hardware actually executed the flush*/ for (;;) { From de6bd0835ac148a0882528fe1fe5bcee709fe911 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 10 Nov 2016 15:10:54 -0600 Subject: [PATCH 12/16] x86/amd_nb: Make all exports EXPORT_SYMBOL_GPL Make all EXPORT_SYMBOL's into EXPORT_SYMBOL_GPL. While we're at it let's fix some checkpatch warnings. Signed-off-by: Yazen Ghannam Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/1478812257-5424-3-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/kernel/amd_nb.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 015328901896..3c1cb4595595 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -26,7 +26,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, {} }; -EXPORT_SYMBOL(amd_nb_misc_ids); +EXPORT_SYMBOL_GPL(amd_nb_misc_ids); static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, @@ -50,19 +50,19 @@ u16 amd_nb_num(void) { return amd_northbridges.num; } -EXPORT_SYMBOL(amd_nb_num); +EXPORT_SYMBOL_GPL(amd_nb_num); bool amd_nb_has_feature(unsigned int feature) { return ((amd_northbridges.flags & feature) == feature); } -EXPORT_SYMBOL(amd_nb_has_feature); +EXPORT_SYMBOL_GPL(amd_nb_has_feature); struct amd_northbridge *node_to_amd_nb(int node) { return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } -EXPORT_SYMBOL(node_to_amd_nb); +EXPORT_SYMBOL_GPL(node_to_amd_nb); static struct pci_dev *next_northbridge(struct pci_dev *dev, const struct pci_device_id *ids) @@ -91,7 +91,7 @@ int amd_cache_northbridges(void) if (!i) return -ENODEV; - nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); + nb = kcalloc(i, sizeof(struct amd_northbridge), GFP_KERNEL); if (!nb) return -ENOMEM; @@ -156,13 +156,13 @@ struct resource *amd_get_mmconfig_range(struct resource *res) { u32 address; u64 base, msr; - unsigned segn_busn_bits; + unsigned int segn_busn_bits; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return NULL; /* assume all cpus from fam10h have mmconfig */ - if (boot_cpu_data.x86 < 0x10) + if (boot_cpu_data.x86 < 0x10) return NULL; address = MSR_FAM10H_MMIO_CONF_BASE; @@ -263,10 +263,12 @@ void amd_flush_garts(void) if (!amd_nb_has_feature(AMD_NB_GART)) return; - /* Avoid races between AGP and IOMMU. In theory it's not needed - but I'm not sure if the hardware won't lose flush requests - when another is pending. This whole thing is so expensive anyways - that it doesn't matter to serialize more. -AK */ + /* + * Avoid races between AGP and IOMMU. In theory it's not needed + * but I'm not sure if the hardware won't lose flush requests + * when another is pending. This whole thing is so expensive anyways + * that it doesn't matter to serialize more. -AK + */ spin_lock_irqsave(&gart_lock, flags); flushed = 0; for (i = 0; i < amd_northbridges.num; i++) { From b791c6b6a55c402367cc544f54921074253db061 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 10 Nov 2016 15:10:55 -0600 Subject: [PATCH 13/16] x86/amd_nb: Add Fam17h Data Fabric as "Northbridge" AMD Fam17h uses a Data Fabric component instead of a traditional Northbridge. However, the DF is similar to a NB in that there is one per die and it uses PCI config D18Fx registers. So let's reuse the existing AMD_NB infrastructure for Data Fabrics. Signed-off-by: Yazen Ghannam Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/1478812257-5424-4-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/kernel/amd_nb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 3c1cb4595595..86c2cec9e6cc 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -13,6 +13,9 @@ #include #include +#define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 +#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 + static u32 *flush_words; const struct pci_device_id amd_nb_misc_ids[] = { @@ -24,6 +27,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, {} }; EXPORT_SYMBOL_GPL(amd_nb_misc_ids); @@ -34,6 +38,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, {} }; From ddfe43cdc0da3189feac4bb9f0f818bef6d6e56e Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 10 Nov 2016 15:10:56 -0600 Subject: [PATCH 14/16] x86/amd_nb: Add SMN and Indirect Data Fabric access for AMD Fam17h Some devices on Fam17h can only be accessed through the System Management Network (SMN). The SMN is accessed by a pair of index/data registers in PCI config space. Add a pair of functions to read from and write to the SMN. The Data Fabric on Fam17h allows multiple devices to use the same register space. The registers of a specific device are accessed indirectly using the device's DF InstanceId. Currently, we only need to read from these devices, so only define a read function for now. Signed-off-by: Yazen Ghannam Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/1478812257-5424-5-git-send-email-Yazen.Ghannam@amd.com [ Boris: make __amd_smn_rw() even more compact. ] Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/amd_nb.h | 5 ++ arch/x86/kernel/amd_nb.c | 108 +++++++++++++++++++++++++++++++++- 2 files changed, 111 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b442ad75b3b3..00c88a01301d 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -21,6 +21,10 @@ extern int amd_numa_init(void); extern int amd_get_subcaches(int); extern int amd_set_subcaches(int, unsigned long); +extern int amd_smn_read(u16 node, u32 address, u32 *value); +extern int amd_smn_write(u16 node, u32 address, u32 value); +extern int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo); + struct amd_l3_cache { unsigned indices; u8 subcaches[4]; @@ -55,6 +59,7 @@ struct threshold_bank { }; struct amd_northbridge { + struct pci_dev *root; struct pci_dev *misc; struct pci_dev *link; struct amd_l3_cache l3_cache; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 86c2cec9e6cc..458da8509b75 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -13,11 +13,20 @@ #include #include +#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 +/* Protect the PCI config register pairs used for SMN and DF indirect access. */ +static DEFINE_MUTEX(smn_mutex); + static u32 *flush_words; +static const struct pci_device_id amd_root_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, + {} +}; + const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, @@ -80,11 +89,104 @@ static struct pci_dev *next_northbridge(struct pci_dev *dev, return dev; } +static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write) +{ + struct pci_dev *root; + int err = -ENODEV; + + if (node >= amd_northbridges.num) + goto out; + + root = node_to_amd_nb(node)->root; + if (!root) + goto out; + + mutex_lock(&smn_mutex); + + err = pci_write_config_dword(root, 0x60, address); + if (err) { + pr_warn("Error programming SMN address 0x%x.\n", address); + goto out_unlock; + } + + err = (write ? pci_write_config_dword(root, 0x64, *value) + : pci_read_config_dword(root, 0x64, value)); + if (err) + pr_warn("Error %s SMN address 0x%x.\n", + (write ? "writing to" : "reading from"), address); + +out_unlock: + mutex_unlock(&smn_mutex); + +out: + return err; +} + +int amd_smn_read(u16 node, u32 address, u32 *value) +{ + return __amd_smn_rw(node, address, value, false); +} +EXPORT_SYMBOL_GPL(amd_smn_read); + +int amd_smn_write(u16 node, u32 address, u32 value) +{ + return __amd_smn_rw(node, address, &value, true); +} +EXPORT_SYMBOL_GPL(amd_smn_write); + +/* + * Data Fabric Indirect Access uses FICAA/FICAD. + * + * Fabric Indirect Configuration Access Address (FICAA): Constructed based + * on the device's Instance Id and the PCI function and register offset of + * the desired register. + * + * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO + * and FICAD HI registers but so far we only need the LO register. + */ +int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + struct pci_dev *F4; + u32 ficaa; + int err = -ENODEV; + + if (node >= amd_northbridges.num) + goto out; + + F4 = node_to_amd_nb(node)->link; + if (!F4) + goto out; + + ficaa = 1; + ficaa |= reg & 0x3FC; + ficaa |= (func & 0x7) << 11; + ficaa |= instance_id << 16; + + mutex_lock(&smn_mutex); + + err = pci_write_config_dword(F4, 0x5C, ficaa); + if (err) { + pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); + goto out_unlock; + } + + err = pci_read_config_dword(F4, 0x98, lo); + if (err) + pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); + +out_unlock: + mutex_unlock(&smn_mutex); + +out: + return err; +} +EXPORT_SYMBOL_GPL(amd_df_indirect_read); + int amd_cache_northbridges(void) { u16 i = 0; struct amd_northbridge *nb; - struct pci_dev *misc, *link; + struct pci_dev *root, *misc, *link; if (amd_northbridges.num) return 0; @@ -103,8 +205,10 @@ int amd_cache_northbridges(void) amd_northbridges.nb = nb; amd_northbridges.num = i; - link = misc = NULL; + link = misc = root = NULL; for (i = 0; i != amd_northbridges.num; i++) { + node_to_amd_nb(i)->root = root = + next_northbridge(root, amd_root_ids); node_to_amd_nb(i)->misc = misc = next_northbridge(misc, amd_nb_misc_ids); node_to_amd_nb(i)->link = link = From f5382de9d4b1d67cc858c49fa6077720621576ea Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 17 Nov 2016 17:57:27 -0500 Subject: [PATCH 15/16] x86/mce/AMD: Add system physical address translation for AMD Fam17h The Unified Memory Controllers (UMCs) on Fam17h log a normalized address in their MCA_ADDR registers. We need to convert that normalized address to a system physical address in order to support a few facilities: 1) To offline poisoned pages in DRAM proactively in the deferred error handler. 2) To print sysaddr and page info for DRAM ECC errors in EDAC. [ Boris: fixes/cleanups ontop: * hi_addr_offset = 0 - no need for that branch. Stick it all under the HiAddrOffsetEn case. It confines hi_addr_offset's declaration too. * Move variables to the innermost scope they're used at so that we save on stack and not blow it up immediately on function entry. * Do not modify *sys_addr prematurely - we want to not exit early and have modified *sys_addr some, which callers get to see. We either convert to a sys_addr or we don't do anything. And we signal that with the retval of the function. * Rename label out -> out_err - because it is the error path. * No need to pr_err of the conversion failed case: imagine a sparsely-populated machine with UMCs which don't have DIMMs. Callers should look at the retval instead and issue a printk only when really necessary. No need for useless info in dmesg. * s/temp_reg/tmp/ and other variable names shortening => shorter code. * Use BIT() everywhere. * Make error messages more informative. * Small build fix for the !CONFIG_X86_MCE_AMD case. * ... and more minor cleanups. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Aravind Gopalakrishnan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/20161122111133.mjzpvzhf7o7yl2oa@pd.tnic [ Typo fixes. ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/mce.h | 2 + arch/x86/kernel/cpu/mcheck/mce_amd.c | 200 +++++++++++++++++++++++++++ 3 files changed, 203 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bada636d1065..5c81f3967a5d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1025,7 +1025,7 @@ config X86_MCE_INTEL config X86_MCE_AMD def_bool y prompt "AMD MCE features" - depends on X86_MCE && X86_LOCAL_APIC + depends on X86_MCE && X86_LOCAL_APIC && AMD_NB ---help--- Additional support for AMD specific MCE features such as the DRAM Error Threshold. diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ecda6a93179f..caab413c91ad 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -252,8 +252,10 @@ static inline void cmci_recheck(void) {} #ifdef CONFIG_X86_MCE_AMD void mce_amd_feature_init(struct cpuinfo_x86 *c); +int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr); #else static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } +static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; }; #endif int mce_available(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 5b33702ca86e..1a0b0a0e5e2b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -553,6 +553,206 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) deferred_error_interrupt_enable(c); } +int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) +{ + u64 dram_base_addr, dram_limit_addr, dram_hole_base; + /* We start from the normalized address */ + u64 ret_addr = norm_addr; + + u32 tmp; + + u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; + u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; + u8 intlv_addr_sel, intlv_addr_bit; + u8 num_intlv_bits, hashed_bit; + u8 lgcy_mmio_hole_en, base = 0; + u8 cs_mask, cs_id = 0; + bool hash_enabled = false; + + /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ + if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp)) + goto out_err; + + /* Remove HiAddrOffset from normalized address, if enabled: */ + if (tmp & BIT(0)) { + u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8; + + if (norm_addr >= hi_addr_offset) { + ret_addr -= hi_addr_offset; + base = 1; + } + } + + /* Read D18F0x110 (DramBaseAddress). */ + if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp)) + goto out_err; + + /* Check if address range is valid. */ + if (!(tmp & BIT(0))) { + pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", + __func__, tmp); + goto out_err; + } + + lgcy_mmio_hole_en = tmp & BIT(1); + intlv_num_chan = (tmp >> 4) & 0xF; + intlv_addr_sel = (tmp >> 8) & 0x7; + dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16; + + /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ + if (intlv_addr_sel > 3) { + pr_err("%s: Invalid interleave address select %d.\n", + __func__, intlv_addr_sel); + goto out_err; + } + + /* Read D18F0x114 (DramLimitAddress). */ + if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp)) + goto out_err; + + intlv_num_sockets = (tmp >> 8) & 0x1; + intlv_num_dies = (tmp >> 10) & 0x3; + dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); + + intlv_addr_bit = intlv_addr_sel + 8; + + /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ + switch (intlv_num_chan) { + case 0: intlv_num_chan = 0; break; + case 1: intlv_num_chan = 1; break; + case 3: intlv_num_chan = 2; break; + case 5: intlv_num_chan = 3; break; + case 7: intlv_num_chan = 4; break; + + case 8: intlv_num_chan = 1; + hash_enabled = true; + break; + default: + pr_err("%s: Invalid number of interleaved channels %d.\n", + __func__, intlv_num_chan); + goto out_err; + } + + num_intlv_bits = intlv_num_chan; + + if (intlv_num_dies > 2) { + pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", + __func__, intlv_num_dies); + goto out_err; + } + + num_intlv_bits += intlv_num_dies; + + /* Add a bit if sockets are interleaved. */ + num_intlv_bits += intlv_num_sockets; + + /* Assert num_intlv_bits <= 4 */ + if (num_intlv_bits > 4) { + pr_err("%s: Invalid interleave bits %d.\n", + __func__, num_intlv_bits); + goto out_err; + } + + if (num_intlv_bits > 0) { + u64 temp_addr_x, temp_addr_i, temp_addr_y; + u8 die_id_bit, sock_id_bit, cs_fabric_id; + + /* + * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. + * This is the fabric id for this coherent slave. Use + * umc/channel# as instance id of the coherent slave + * for FICAA. + */ + if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp)) + goto out_err; + + cs_fabric_id = (tmp >> 8) & 0xFF; + die_id_bit = 0; + + /* If interleaved over more than 1 channel: */ + if (intlv_num_chan) { + die_id_bit = intlv_num_chan; + cs_mask = (1 << die_id_bit) - 1; + cs_id = cs_fabric_id & cs_mask; + } + + sock_id_bit = die_id_bit; + + /* Read D18F1x208 (SystemFabricIdMask). */ + if (intlv_num_dies || intlv_num_sockets) + if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp)) + goto out_err; + + /* If interleaved over more than 1 die. */ + if (intlv_num_dies) { + sock_id_bit = die_id_bit + intlv_num_dies; + die_id_shift = (tmp >> 24) & 0xF; + die_id_mask = (tmp >> 8) & 0xFF; + + cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; + } + + /* If interleaved over more than 1 socket. */ + if (intlv_num_sockets) { + socket_id_shift = (tmp >> 28) & 0xF; + socket_id_mask = (tmp >> 16) & 0xFF; + + cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; + } + + /* + * The pre-interleaved address consists of XXXXXXIIIYYYYY + * where III is the ID for this CS, and XXXXXXYYYYY are the + * address bits from the post-interleaved address. + * "num_intlv_bits" has been calculated to tell us how many "I" + * bits there are. "intlv_addr_bit" tells us how many "Y" bits + * there are (where "I" starts). + */ + temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0); + temp_addr_i = (cs_id << intlv_addr_bit); + temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; + ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; + } + + /* Add dram base address */ + ret_addr += dram_base_addr; + + /* If legacy MMIO hole enabled */ + if (lgcy_mmio_hole_en) { + if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp)) + goto out_err; + + dram_hole_base = tmp & GENMASK(31, 24); + if (ret_addr >= dram_hole_base) + ret_addr += (BIT_ULL(32) - dram_hole_base); + } + + if (hash_enabled) { + /* Save some parentheses and grab ls-bit at the end. */ + hashed_bit = (ret_addr >> 12) ^ + (ret_addr >> 18) ^ + (ret_addr >> 21) ^ + (ret_addr >> 30) ^ + cs_id; + + hashed_bit &= BIT(0); + + if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0))) + ret_addr ^= BIT(intlv_addr_bit); + } + + /* Is calculated system address is above DRAM limit address? */ + if (ret_addr > dram_limit_addr) + goto out_err; + + *sys_addr = ret_addr; + return 0; + +out_err: + return -EINVAL; +} +EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); + static void __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) { From 3f5a7896a5096fd50030a04d4c3f28a7441e30a5 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 18 Nov 2016 09:48:36 -0800 Subject: [PATCH 16/16] x86/mce: Include the PPIN in MCE records when available Intel Xeons from Ivy Bridge onwards support a processor identification number set in the factory. To the user this is a handy unique number to identify a particular CPU. Intel can decode this to the fab/production run to track errors. On systems that have it, include it in the machine check record. I'm told that this would be helpful for users that run large data centers with multi-socket servers to keep track of which CPUs are seeing errors. Boris: * Add some clarifying comments and spacing. * Mask out [63:2] in the disabled-but-not-locked case * Call the MSR variable "val" for more readability. Signed-off-by: Tony Luck Cc: Ashok Raj Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/20161123114855.njguoaygp3qnbkia@pd.tnic Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 4 +++ arch/x86/include/uapi/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 4 +++ arch/x86/kernel/cpu/mcheck/mce_intel.c | 37 ++++++++++++++++++++++++++ 5 files changed, 47 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a39629206864..d625b651e526 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -193,6 +193,7 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 78f3760ca1f2..710273c617b8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -37,6 +37,10 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) /* Intel MSRs. Some also available on other CPUs */ + +#define MSR_PPIN_CTL 0x0000004e +#define MSR_PPIN 0x0000004f + #define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_FSB_FREQ 0x000000cd diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 69a6e07e3149..eb6247a7009b 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -28,6 +28,7 @@ struct mce { __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ + __u64 ppin; /* Protected Processor Inventory Number */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index aab96f8d52b0..a3cb27af4f9b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -43,6 +43,7 @@ #include #include +#include #include #include #include @@ -135,6 +136,9 @@ void mce_setup(struct mce *m) m->socketid = cpu_data(m->extcpu).phys_proc_id; m->apicid = cpu_data(m->extcpu).initial_apicid; rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); + + if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) + rdmsrl(MSR_PPIN, m->ppin); } DEFINE_PER_CPU(struct mce, injectm); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index be0b2fad47c5..190b3e6cef4d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -464,11 +466,46 @@ static void intel_clear_lmce(void) wrmsrl(MSR_IA32_MCG_EXT_CTL, val); } +static void intel_ppin_init(struct cpuinfo_x86 *c) +{ + unsigned long long val; + + /* + * Even if testing the presence of the MSR would be enough, we don't + * want to risk the situation where other models reuse this MSR for + * other purposes. + */ + switch (c->x86_model) { + case INTEL_FAM6_IVYBRIDGE_X: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_SKYLAKE_X: + if (rdmsrl_safe(MSR_PPIN_CTL, &val)) + return; + + if ((val & 3UL) == 1UL) { + /* PPIN available but disabled: */ + return; + } + + /* If PPIN is disabled, but not locked, try to enable: */ + if (!(val & 3UL)) { + wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); + rdmsrl_safe(MSR_PPIN_CTL, &val); + } + + if ((val & 3UL) == 2UL) + set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); + } +} + void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); intel_init_cmci(); intel_init_lmce(); + intel_ppin_init(c); } void mce_intel_feature_clear(struct cpuinfo_x86 *c)