mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-29 15:43:59 +08:00
trace, RAS: Add eMCA trace event interface
Add trace interface to elaborate all H/W error related information. Signed-off-by: Chen, Gong <gong.chen@linux.intel.com> Acked-by: Borislav Petkov <bp@suse.de> Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
parent
d963cd95be
commit
2dfb7d51a6
@ -370,6 +370,7 @@ config ACPI_EXTLOG
|
|||||||
tristate "Extended Error Log support"
|
tristate "Extended Error Log support"
|
||||||
depends on X86_MCE && X86_LOCAL_APIC
|
depends on X86_MCE && X86_LOCAL_APIC
|
||||||
select UEFI_CPER
|
select UEFI_CPER
|
||||||
|
select RAS
|
||||||
default n
|
default n
|
||||||
help
|
help
|
||||||
Certain usages such as Predictive Failure Analysis (PFA) require
|
Certain usages such as Predictive Failure Analysis (PFA) require
|
||||||
@ -384,6 +385,7 @@ config ACPI_EXTLOG
|
|||||||
|
|
||||||
Enhanced MCA Logging allows firmware to provide additional error
|
Enhanced MCA Logging allows firmware to provide additional error
|
||||||
information to system software, synchronous with MCE or CMCI. This
|
information to system software, synchronous with MCE or CMCI. This
|
||||||
driver adds support for that functionality.
|
driver adds support for that functionality with corresponding
|
||||||
|
tracepoint which carries that information to userspace.
|
||||||
|
|
||||||
endif # ACPI
|
endif # ACPI
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include <asm/mce.h>
|
#include <asm/mce.h>
|
||||||
|
|
||||||
#include "apei/apei-internal.h"
|
#include "apei/apei-internal.h"
|
||||||
|
#include <ras/ras_event.h>
|
||||||
|
|
||||||
#define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */
|
#define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */
|
||||||
|
|
||||||
@ -137,8 +138,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
|
|||||||
struct mce *mce = (struct mce *)data;
|
struct mce *mce = (struct mce *)data;
|
||||||
int bank = mce->bank;
|
int bank = mce->bank;
|
||||||
int cpu = mce->extcpu;
|
int cpu = mce->extcpu;
|
||||||
struct acpi_generic_status *estatus;
|
struct acpi_generic_status *estatus, *tmp;
|
||||||
int rc;
|
struct acpi_generic_data *gdata;
|
||||||
|
const uuid_le *fru_id = &NULL_UUID_LE;
|
||||||
|
char *fru_text = "";
|
||||||
|
uuid_le *sec_type;
|
||||||
|
static u32 err_seq;
|
||||||
|
|
||||||
estatus = extlog_elog_entry_check(cpu, bank);
|
estatus = extlog_elog_entry_check(cpu, bank);
|
||||||
if (estatus == NULL)
|
if (estatus == NULL)
|
||||||
@ -148,7 +153,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
|
|||||||
/* clear record status to enable BIOS to update it again */
|
/* clear record status to enable BIOS to update it again */
|
||||||
estatus->block_status = 0;
|
estatus->block_status = 0;
|
||||||
|
|
||||||
rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
|
tmp = (struct acpi_generic_status *)elog_buf;
|
||||||
|
print_extlog_rcd(NULL, tmp, cpu);
|
||||||
|
|
||||||
|
/* log event via trace */
|
||||||
|
err_seq++;
|
||||||
|
gdata = (struct acpi_generic_data *)(tmp + 1);
|
||||||
|
if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
|
||||||
|
fru_id = (uuid_le *)gdata->fru_id;
|
||||||
|
if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
|
||||||
|
fru_text = gdata->fru_text;
|
||||||
|
sec_type = (uuid_le *)gdata->section_type;
|
||||||
|
if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
|
||||||
|
struct cper_sec_mem_err *mem = (void *)(gdata + 1);
|
||||||
|
if (gdata->error_data_length >= sizeof(*mem))
|
||||||
|
trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
|
||||||
|
(u8)gdata->error_severity);
|
||||||
|
}
|
||||||
|
|
||||||
return NOTIFY_STOP;
|
return NOTIFY_STOP;
|
||||||
}
|
}
|
||||||
|
@ -207,7 +207,7 @@ const char *cper_mem_err_type_str(unsigned int etype)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
|
EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
|
||||||
|
|
||||||
static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
|
static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
|
||||||
{
|
{
|
||||||
u32 len, n;
|
u32 len, n;
|
||||||
|
|
||||||
@ -249,7 +249,7 @@ static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
|
static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
|
||||||
{
|
{
|
||||||
u32 len, n;
|
u32 len, n;
|
||||||
const char *bank = NULL, *device = NULL;
|
const char *bank = NULL, *device = NULL;
|
||||||
@ -271,8 +271,44 @@ static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
|
||||||
|
struct cper_mem_err_compact *cmem)
|
||||||
|
{
|
||||||
|
cmem->validation_bits = mem->validation_bits;
|
||||||
|
cmem->node = mem->node;
|
||||||
|
cmem->card = mem->card;
|
||||||
|
cmem->module = mem->module;
|
||||||
|
cmem->bank = mem->bank;
|
||||||
|
cmem->device = mem->device;
|
||||||
|
cmem->row = mem->row;
|
||||||
|
cmem->column = mem->column;
|
||||||
|
cmem->bit_pos = mem->bit_pos;
|
||||||
|
cmem->requestor_id = mem->requestor_id;
|
||||||
|
cmem->responder_id = mem->responder_id;
|
||||||
|
cmem->target_id = mem->target_id;
|
||||||
|
cmem->rank = mem->rank;
|
||||||
|
cmem->mem_array_handle = mem->mem_array_handle;
|
||||||
|
cmem->mem_dev_handle = mem->mem_dev_handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *cper_mem_err_unpack(struct trace_seq *p,
|
||||||
|
struct cper_mem_err_compact *cmem)
|
||||||
|
{
|
||||||
|
const char *ret = p->buffer + p->len;
|
||||||
|
|
||||||
|
if (cper_mem_err_location(cmem, rcd_decode_str))
|
||||||
|
trace_seq_printf(p, "%s", rcd_decode_str);
|
||||||
|
if (cper_dimm_err_location(cmem, rcd_decode_str))
|
||||||
|
trace_seq_printf(p, "%s", rcd_decode_str);
|
||||||
|
trace_seq_putc(p, '\0');
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
|
static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
|
||||||
{
|
{
|
||||||
|
struct cper_mem_err_compact cmem;
|
||||||
|
|
||||||
if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
|
if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
|
||||||
printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
|
printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
|
||||||
if (mem->validation_bits & CPER_MEM_VALID_PA)
|
if (mem->validation_bits & CPER_MEM_VALID_PA)
|
||||||
@ -281,14 +317,15 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
|
|||||||
if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
|
if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
|
||||||
printk("%s""physical_address_mask: 0x%016llx\n",
|
printk("%s""physical_address_mask: 0x%016llx\n",
|
||||||
pfx, mem->physical_addr_mask);
|
pfx, mem->physical_addr_mask);
|
||||||
if (cper_mem_err_location(mem, rcd_decode_str))
|
cper_mem_err_pack(mem, &cmem);
|
||||||
|
if (cper_mem_err_location(&cmem, rcd_decode_str))
|
||||||
printk("%s%s\n", pfx, rcd_decode_str);
|
printk("%s%s\n", pfx, rcd_decode_str);
|
||||||
if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
|
if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
|
||||||
u8 etype = mem->error_type;
|
u8 etype = mem->error_type;
|
||||||
printk("%s""error_type: %d, %s\n", pfx, etype,
|
printk("%s""error_type: %d, %s\n", pfx, etype,
|
||||||
cper_mem_err_type_str(etype));
|
cper_mem_err_type_str(etype));
|
||||||
}
|
}
|
||||||
if (cper_dimm_err_location(mem, rcd_decode_str))
|
if (cper_dimm_err_location(&cmem, rcd_decode_str))
|
||||||
printk("%s%s\n", pfx, rcd_decode_str);
|
printk("%s%s\n", pfx, rcd_decode_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,4 +23,7 @@ static int __init ras_init(void)
|
|||||||
}
|
}
|
||||||
subsys_initcall(ras_init);
|
subsys_initcall(ras_init);
|
||||||
|
|
||||||
|
#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
|
||||||
|
EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
|
||||||
|
#endif
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
#define LINUX_CPER_H
|
#define LINUX_CPER_H
|
||||||
|
|
||||||
#include <linux/uuid.h>
|
#include <linux/uuid.h>
|
||||||
|
#include <linux/trace_seq.h>
|
||||||
|
|
||||||
/* CPER record signature and the size */
|
/* CPER record signature and the size */
|
||||||
#define CPER_SIG_RECORD "CPER"
|
#define CPER_SIG_RECORD "CPER"
|
||||||
@ -363,6 +364,24 @@ struct cper_sec_mem_err {
|
|||||||
__u16 mem_dev_handle; /* module handle in UEFI 2.4 */
|
__u16 mem_dev_handle; /* module handle in UEFI 2.4 */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct cper_mem_err_compact {
|
||||||
|
__u64 validation_bits;
|
||||||
|
__u16 node;
|
||||||
|
__u16 card;
|
||||||
|
__u16 module;
|
||||||
|
__u16 bank;
|
||||||
|
__u16 device;
|
||||||
|
__u16 row;
|
||||||
|
__u16 column;
|
||||||
|
__u16 bit_pos;
|
||||||
|
__u64 requestor_id;
|
||||||
|
__u64 responder_id;
|
||||||
|
__u64 target_id;
|
||||||
|
__u16 rank;
|
||||||
|
__u16 mem_array_handle;
|
||||||
|
__u16 mem_dev_handle;
|
||||||
|
};
|
||||||
|
|
||||||
struct cper_sec_pcie {
|
struct cper_sec_pcie {
|
||||||
__u64 validation_bits;
|
__u64 validation_bits;
|
||||||
__u32 port_type;
|
__u32 port_type;
|
||||||
@ -406,5 +425,9 @@ const char *cper_severity_str(unsigned int);
|
|||||||
const char *cper_mem_err_type_str(unsigned int);
|
const char *cper_mem_err_type_str(unsigned int);
|
||||||
void cper_print_bits(const char *prefix, unsigned int bits,
|
void cper_print_bits(const char *prefix, unsigned int bits,
|
||||||
const char * const strs[], unsigned int strs_size);
|
const char * const strs[], unsigned int strs_size);
|
||||||
|
void cper_mem_err_pack(const struct cper_sec_mem_err *,
|
||||||
|
struct cper_mem_err_compact *);
|
||||||
|
const char *cper_mem_err_unpack(struct trace_seq *,
|
||||||
|
struct cper_mem_err_compact *);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -9,6 +9,70 @@
|
|||||||
#include <linux/edac.h>
|
#include <linux/edac.h>
|
||||||
#include <linux/ktime.h>
|
#include <linux/ktime.h>
|
||||||
#include <linux/aer.h>
|
#include <linux/aer.h>
|
||||||
|
#include <linux/cper.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MCE Extended Error Log trace event
|
||||||
|
*
|
||||||
|
* These events are generated when hardware detects a corrected or
|
||||||
|
* uncorrected event.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* memory trace event */
|
||||||
|
|
||||||
|
#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
|
||||||
|
TRACE_EVENT(extlog_mem_event,
|
||||||
|
TP_PROTO(struct cper_sec_mem_err *mem,
|
||||||
|
u32 err_seq,
|
||||||
|
const uuid_le *fru_id,
|
||||||
|
const char *fru_text,
|
||||||
|
u8 sev),
|
||||||
|
|
||||||
|
TP_ARGS(mem, err_seq, fru_id, fru_text, sev),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(u32, err_seq)
|
||||||
|
__field(u8, etype)
|
||||||
|
__field(u8, sev)
|
||||||
|
__field(u64, pa)
|
||||||
|
__field(u8, pa_mask_lsb)
|
||||||
|
__field_struct(uuid_le, fru_id)
|
||||||
|
__string(fru_text, fru_text)
|
||||||
|
__field_struct(struct cper_mem_err_compact, data)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->err_seq = err_seq;
|
||||||
|
if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
|
||||||
|
__entry->etype = mem->error_type;
|
||||||
|
else
|
||||||
|
__entry->etype = ~0;
|
||||||
|
__entry->sev = sev;
|
||||||
|
if (mem->validation_bits & CPER_MEM_VALID_PA)
|
||||||
|
__entry->pa = mem->physical_addr;
|
||||||
|
else
|
||||||
|
__entry->pa = ~0ull;
|
||||||
|
|
||||||
|
if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
|
||||||
|
__entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
|
||||||
|
else
|
||||||
|
__entry->pa_mask_lsb = ~0;
|
||||||
|
__entry->fru_id = *fru_id;
|
||||||
|
__assign_str(fru_text, fru_text);
|
||||||
|
cper_mem_err_pack(mem, &__entry->data);
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s",
|
||||||
|
__entry->err_seq,
|
||||||
|
cper_severity_str(__entry->sev),
|
||||||
|
cper_mem_err_type_str(__entry->etype),
|
||||||
|
__entry->pa,
|
||||||
|
__entry->pa_mask_lsb,
|
||||||
|
cper_mem_err_unpack(p, &__entry->data),
|
||||||
|
&__entry->fru_id,
|
||||||
|
__get_str(fru_text))
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hardware Events Report
|
* Hardware Events Report
|
||||||
|
Loading…
Reference in New Issue
Block a user