EDAC updates all over the place:

* Enablement for AMD F15h models 0x60 CPUs. Most notably DDR4 RAM
 support. Out of tree stuff is
 
  arch/x86/kernel/amd_nb.c       |   2 +
  include/linux/pci_ids.h        |   2 +
 
 adding the required PCI IDs. From Aravind Gopalakrishnan.
 
 * Enable amd64_edac for 32-bit due to popular demand. From Tomasz Pala.
 
 * Convert the AMD MCE injection module to debugfs, where it belongs.
 
 * Misc EDAC cleanups
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJUhZbcAAoJEBLB8Bhh3lVKd5kQAK77qsB4ebpke1rEBerQl9jQ
 YCVrByCKu7QTRt/xvlqU9Vyp7EvcnpNxFbRCCqzIpBcJjre9v17QVRA2/zFS0q81
 QRDTOWf9uhMWbssI2Zu1hbjDNMWYiEb9+aeZHjScVtkzPDmsgYuKGdWfTSLw9dkS
 AG/UUZ3ojwyc2dK3i0W3pCjakKYLUsCmijyTZBfb37+u4rRGuAgrQ9G8fBn2lL+l
 huptgV2BsTCQqdL554zTs64Yt912PnidsJWYCPCjMubgEPSeNcWRzTTBYDUf9NIn
 RxFXYHnOQxetPSQqfLVXlX3V/cGNQg0yEXFZ9S0tCt5uLNbbN/D8Uumtst0rq9x3
 XkJ/EGHXBFP+KwHdV/i9j6OYM5rq4z+4ql4OqbWzsvPrEDbh/4p5gRbhqd1Hhy9U
 zgHJjVPpD/l2t82Tpz0jIOscQruZ6VqGMDSYo3LiLnNt724pcrmr3DiN9mc6ljzJ
 rsNsemMH0IoH8KbBHKGtMLnBVO6HbnrtC6iKFfocNBisvo1PKKzn9s2O1pdjmsCs
 jHwz5njoM7Ki/ygkJhbKiSDMXPs67eggwoGIGzpNMoY4RWxrcQzYE9yKfKzRNxET
 Qb3xUwDWDyL8ErwHtL3xMxGwyfkhb+SZdMd5aKYA5Rdbf+TN8P6iAv05nrnfpkyk
 lPTv5o9EQYvr8/Tc9FZW
 =ULmb
 -----END PGP SIGNATURE-----

Merge tag 'edac_for_3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

Pull EDAC updates from Borislav Petkov:
 "EDAC updates all over the place:

   - Enablement for AMD F15h models 0x60 CPUs.  Most notably DDR4 RAM
     support.  Out of tree stuff is adding the required PCI IDs.  From
     Aravind Gopalakrishnan.

   - Enable amd64_edac for 32-bit due to popular demand.  From Tomasz
     Pala.

   - Convert the AMD MCE injection module to debugfs, where it belongs.

   - Misc EDAC cleanups"

* tag 'edac_for_3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
  EDAC, MCE, AMD: Correct formatting of decoded text
  EDAC, mce_amd_inj: Add an injector function
  EDAC, mce_amd_inj: Add hw-injection attributes
  EDAC, mce_amd_inj: Enable direct writes to MCE MSRs
  EDAC, mce_amd_inj: Convert mce_amd_inj module to debugfs
  EDAC: Delete unnecessary check before calling pci_dev_put()
  EDAC, pci_sysfs: remove unneccessary ifdef around entire file
  ghes_edac: Use snprintf() to silence a static checker warning
  amd64_edac: Build module on x86-32
  EDAC, MCE, AMD: Add decoding table for MC6 xec
  amd64_edac: Add F15h M60h support
  {mv64x60,ppc4xx}_edac,: Remove deprecated IRQF_DISABLED
  EDAC: Sync memory types and names
  EDAC: Add DDR3 LRDIMM entries to edac_mem_types
  x86, amd_nb: Add device IDs to NB tables for F15h M60h
  pci_ids: Add PCI device IDs for F15h M60h
This commit is contained in:
Linus Torvalds 2014-12-08 20:17:49 -08:00
commit 0160928e79
18 changed files with 458 additions and 280 deletions

View File

@ -21,6 +21,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
{}
@ -30,6 +31,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids);
static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
{}

View File

@ -61,14 +61,14 @@ config EDAC_DECODE_MCE
has been initialized.
config EDAC_MCE_INJ
tristate "Simple MCE injection interface over /sysfs"
depends on EDAC_DECODE_MCE
tristate "Simple MCE injection interface"
depends on EDAC_DECODE_MCE && DEBUG_FS
default n
help
This is a simple interface to inject MCEs over /sysfs and test
the MCE decoding code in EDAC.
This is a simple debugfs interface to inject MCEs and test different
aspects of the MCE handling code.
This is currently AMD-only.
WARNING: Do not even assume this interface is staying stable!
config EDAC_MM_EDAC
tristate "Main Memory EDAC (Error Detection And Correction) reporting"
@ -105,11 +105,11 @@ config EDAC_GHES
In doubt, say 'Y'.
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64) K8, F10h"
depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
tristate "AMD64 (Opteron, Athlon64)"
depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE
help
Support for error detection and correction of DRAM ECC errors on
the AMD64 families of memory controllers (K8 and F10h)
the AMD64 families (>= K8) of memory controllers.
config EDAC_AMD64_ERROR_INJECTION
bool "Sysfs HW Error injection facilities"

View File

@ -9,7 +9,7 @@
obj-$(CONFIG_EDAC) := edac_stub.o
obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o
edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o
edac_core-y += edac_module.o edac_device_sysfs.o
ifdef CONFIG_PCI

View File

@ -692,9 +692,19 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
{
edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr);
edac_dbg(1, " DIMM type: %sbuffered; all DIMMs support ECC: %s\n",
(dclr & BIT(16)) ? "un" : "",
(dclr & BIT(19)) ? "yes" : "no");
if (pvt->dram_type == MEM_LRDDR3) {
u32 dcsm = pvt->csels[chan].csmasks[0];
/*
* It's assumed all LRDIMMs in a DCT are going to be of
* same 'type' until proven otherwise. So, use a cs
* value of '0' here to get dcsm value.
*/
edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3));
}
edac_dbg(1, "All DIMMs support ECC:%s\n",
(dclr & BIT(19)) ? "yes" : "no");
edac_dbg(1, " PAR/ERR parity: %s\n",
(dclr & BIT(8)) ? "enabled" : "disabled");
@ -756,7 +766,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8;
} else if (pvt->fam == 0x15 && pvt->model >= 0x30) {
} else if (pvt->fam == 0x15 && pvt->model == 0x30) {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
} else {
@ -813,25 +823,63 @@ static void read_dct_base_mask(struct amd64_pvt *pvt)
}
}
static enum mem_type determine_memory_type(struct amd64_pvt *pvt, int cs)
static void determine_memory_type(struct amd64_pvt *pvt)
{
enum mem_type type;
u32 dram_ctrl, dcsm;
/* F15h supports only DDR3 */
if (pvt->fam >= 0x15)
type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3;
else if (pvt->fam == 0x10 || pvt->ext_model >= K8_REV_F) {
switch (pvt->fam) {
case 0xf:
if (pvt->ext_model >= K8_REV_F)
goto ddr3;
pvt->dram_type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
return;
case 0x10:
if (pvt->dchr0 & DDR3_MODE)
type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3;
goto ddr3;
pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
return;
case 0x15:
if (pvt->model < 0x60)
goto ddr3;
/*
* Model 0x60h needs special handling:
*
* We use a Chip Select value of '0' to obtain dcsm.
* Theoretically, it is possible to populate LRDIMMs of different
* 'Rank' value on a DCT. But this is not the common case. So,
* it's reasonable to assume all DIMMs are going to be of same
* 'type' until proven otherwise.
*/
amd64_read_dct_pci_cfg(pvt, 0, DRAM_CONTROL, &dram_ctrl);
dcsm = pvt->csels[0].csmasks[0];
if (((dram_ctrl >> 8) & 0x7) == 0x2)
pvt->dram_type = MEM_DDR4;
else if (pvt->dclr0 & BIT(16))
pvt->dram_type = MEM_DDR3;
else if (dcsm & 0x3)
pvt->dram_type = MEM_LRDDR3;
else
type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
} else {
type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
pvt->dram_type = MEM_RDDR3;
return;
case 0x16:
goto ddr3;
default:
WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
pvt->dram_type = MEM_EMPTY;
}
return;
amd64_info("CS%d: %s\n", cs, edac_mem_types[type]);
return type;
ddr3:
pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3;
}
/* Get the number of DCT channels the memory controller is using. */
@ -958,8 +1006,12 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
if (WARN_ON(!nb))
return;
pci_func = (pvt->model == 0x30) ? PCI_DEVICE_ID_AMD_15H_M30H_NB_F1
: PCI_DEVICE_ID_AMD_15H_NB_F1;
if (pvt->model == 0x60)
pci_func = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1;
else if (pvt->model == 0x30)
pci_func = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1;
else
pci_func = PCI_DEVICE_ID_AMD_15H_NB_F1;
f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc);
if (WARN_ON(!f1))
@ -1049,7 +1101,7 @@ static int ddr2_cs_size(unsigned i, bool dct_width)
}
static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
unsigned cs_mode)
unsigned cs_mode, int cs_mask_nr)
{
u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
@ -1167,8 +1219,43 @@ static int ddr3_cs_size(unsigned i, bool dct_width)
return cs_size;
}
static int ddr3_lrdimm_cs_size(unsigned i, unsigned rank_multiply)
{
unsigned shift = 0;
int cs_size = 0;
if (i < 4 || i == 6)
cs_size = -1;
else if (i == 12)
shift = 7;
else if (!(i & 0x1))
shift = i >> 1;
else
shift = (i + 1) >> 1;
if (cs_size != -1)
cs_size = rank_multiply * (128 << shift);
return cs_size;
}
static int ddr4_cs_size(unsigned i)
{
int cs_size = 0;
if (i == 0)
cs_size = -1;
else if (i == 1)
cs_size = 1024;
else
/* Min cs_size = 1G */
cs_size = 1024 * (1 << (i >> 1));
return cs_size;
}
static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
unsigned cs_mode)
unsigned cs_mode, int cs_mask_nr)
{
u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
@ -1184,18 +1271,49 @@ static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
* F15h supports only 64bit DCT interfaces
*/
static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
unsigned cs_mode)
unsigned cs_mode, int cs_mask_nr)
{
WARN_ON(cs_mode > 12);
return ddr3_cs_size(cs_mode, false);
}
/* F15h M60h supports DDR4 mapping as well.. */
static int f15_m60h_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
unsigned cs_mode, int cs_mask_nr)
{
int cs_size;
u32 dcsm = pvt->csels[dct].csmasks[cs_mask_nr];
WARN_ON(cs_mode > 12);
if (pvt->dram_type == MEM_DDR4) {
if (cs_mode > 9)
return -1;
cs_size = ddr4_cs_size(cs_mode);
} else if (pvt->dram_type == MEM_LRDDR3) {
unsigned rank_multiply = dcsm & 0xf;
if (rank_multiply == 3)
rank_multiply = 4;
cs_size = ddr3_lrdimm_cs_size(cs_mode, rank_multiply);
} else {
/* Minimum cs size is 512mb for F15hM60h*/
if (cs_mode == 0x1)
return -1;
cs_size = ddr3_cs_size(cs_mode, false);
}
return cs_size;
}
/*
* F16h and F15h model 30h have only limited cs_modes.
*/
static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
unsigned cs_mode)
unsigned cs_mode, int cs_mask_nr)
{
WARN_ON(cs_mode > 12);
@ -1757,13 +1875,20 @@ static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
size0 = 0;
if (dcsb[dimm*2] & DCSB_CS_ENABLE)
/* For f15m60h, need multiplier for LRDIMM cs_size
* calculation. We pass 'dimm' value to the dbam_to_cs
* mapper so we can find the multiplier from the
* corresponding DCSM.
*/
size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
DBAM_DIMM(dimm, dbam));
DBAM_DIMM(dimm, dbam),
dimm);
size1 = 0;
if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
size1 = pvt->ops->dbam_to_cs(pvt, ctrl,
DBAM_DIMM(dimm, dbam));
DBAM_DIMM(dimm, dbam),
dimm);
amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
dimm * 2, size0,
@ -1812,6 +1937,16 @@ static struct amd64_family_type family_types[] = {
.dbam_to_cs = f16_dbam_to_chip_select,
}
},
[F15_M60H_CPUS] = {
.ctl_name = "F15h_M60h",
.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
.f3_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F3,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
.dbam_to_cs = f15_m60h_dbam_to_chip_select,
}
},
[F16_CPUS] = {
.ctl_name = "F16h",
.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
@ -2175,6 +2310,8 @@ static void read_mc_regs(struct amd64_pvt *pvt)
}
pvt->ecc_sym_sz = 4;
determine_memory_type(pvt);
edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
if (pvt->fam >= 0x10) {
amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
@ -2238,7 +2375,8 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
*/
cs_mode = DBAM_DIMM(csrow_nr / 2, dbam);
nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, (csrow_nr / 2))
<< (20 - PAGE_SHIFT);
edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
csrow_nr, dct, cs_mode);
@ -2257,7 +2395,6 @@ static int init_csrows(struct mem_ctl_info *mci)
struct csrow_info *csrow;
struct dimm_info *dimm;
enum edac_type edac_mode;
enum mem_type mtype;
int i, j, empty = 1;
int nr_pages = 0;
u32 val;
@ -2302,8 +2439,6 @@ static int init_csrows(struct mem_ctl_info *mci)
nr_pages += row_dct1_pages;
}
mtype = determine_memory_type(pvt, i);
edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
/*
@ -2317,7 +2452,7 @@ static int init_csrows(struct mem_ctl_info *mci)
for (j = 0; j < pvt->channel_count; j++) {
dimm = csrow->channels[j]->dimm;
dimm->mtype = mtype;
dimm->mtype = pvt->dram_type;
dimm->edac_mode = edac_mode;
}
}
@ -2604,6 +2739,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
fam_type = &family_types[F15_M30H_CPUS];
pvt->ops = &family_types[F15_M30H_CPUS].ops;
break;
} else if (pvt->model == 0x60) {
fam_type = &family_types[F15_M60H_CPUS];
pvt->ops = &family_types[F15_M60H_CPUS].ops;
break;
}
fam_type = &family_types[F15_CPUS];
@ -2828,55 +2967,13 @@ static void remove_one_instance(struct pci_dev *pdev)
* inquiry this table to see if this driver is for a given device found.
*/
static const struct pci_device_id amd64_pci_table[] = {
{
.vendor = PCI_VENDOR_ID_AMD,
.device = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
},
{
.vendor = PCI_VENDOR_ID_AMD,
.device = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
},
{
.vendor = PCI_VENDOR_ID_AMD,
.device = PCI_DEVICE_ID_AMD_15H_NB_F2,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
},
{
.vendor = PCI_VENDOR_ID_AMD,
.device = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
},
{
.vendor = PCI_VENDOR_ID_AMD,
.device = PCI_DEVICE_ID_AMD_16H_NB_F2,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
},
{
.vendor = PCI_VENDOR_ID_AMD,
.device = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
},
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F2) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F2) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F2) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F2) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F2) },
{0, }
};
MODULE_DEVICE_TABLE(pci, amd64_pci_table);
@ -2938,6 +3035,11 @@ static int __init amd64_edac_init(void)
goto err_no_instances;
setup_pci_device();
#ifdef CONFIG_X86_32
amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
#endif
return 0;
err_no_instances:

View File

@ -162,10 +162,12 @@
/*
* PCI-defined configuration space registers
*/
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c
#define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
#define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c
#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F1 0x1571
#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F2 0x1572
#define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531
#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532
#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581
@ -221,6 +223,8 @@
#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE)
#define DRAM_CONTROL 0x78
#define DBAM0 0x80
#define DBAM1 0x180
@ -301,6 +305,7 @@ enum amd_families {
F10_CPUS,
F15_CPUS,
F15_M30H_CPUS,
F15_M60H_CPUS,
F16_CPUS,
F16_M30H_CPUS,
NUM_FAMILIES,
@ -379,6 +384,9 @@ struct amd64_pvt {
/* place to store error injection parameters prior to issue */
struct error_injection injection;
/* cache the dram_type */
enum mem_type dram_type;
};
enum err_codes {
@ -480,7 +488,8 @@ struct low_ops {
int (*early_channel_count) (struct amd64_pvt *pvt);
void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr,
struct err_info *);
int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode);
int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct,
unsigned cs_mode, int cs_mask_nr);
};
struct amd64_family_type {

View File

@ -125,27 +125,27 @@ static void edac_mc_dump_mci(struct mem_ctl_info *mci)
#endif /* CONFIG_EDAC_DEBUG */
/*
* keep those in sync with the enum mem_type
*/
const char * const edac_mem_types[] = {
"Empty csrow",
"Reserved csrow type",
"Unknown csrow type",
"Fast page mode RAM",
"Extended data out RAM",
"Burst Extended data out RAM",
"Single data rate SDRAM",
"Registered single data rate SDRAM",
"Double data rate SDRAM",
"Registered Double data rate SDRAM",
"Rambus DRAM",
"Unbuffered DDR2 RAM",
"Fully buffered DDR2",
"Registered DDR2 RAM",
"Rambus XDR",
"Unbuffered DDR3 RAM",
"Registered DDR3 RAM",
[MEM_EMPTY] = "Empty csrow",
[MEM_RESERVED] = "Reserved csrow type",
[MEM_UNKNOWN] = "Unknown csrow type",
[MEM_FPM] = "Fast page mode RAM",
[MEM_EDO] = "Extended data out RAM",
[MEM_BEDO] = "Burst Extended data out RAM",
[MEM_SDR] = "Single data rate SDRAM",
[MEM_RDR] = "Registered single data rate SDRAM",
[MEM_DDR] = "Double data rate SDRAM",
[MEM_RDDR] = "Registered Double data rate SDRAM",
[MEM_RMBS] = "Rambus DRAM",
[MEM_DDR2] = "Unbuffered DDR2 RAM",
[MEM_FB_DDR2] = "Fully buffered DDR2",
[MEM_RDDR2] = "Registered DDR2 RAM",
[MEM_XDR] = "Rambus XDR",
[MEM_DDR3] = "Unbuffered DDR3 RAM",
[MEM_RDDR3] = "Registered DDR3 RAM",
[MEM_LRDDR3] = "Load-Reduced DDR3 RAM",
[MEM_DDR4] = "Unbuffered DDR4 RAM",
[MEM_RDDR4] = "Registered DDR4 RAM",
};
EXPORT_SYMBOL_GPL(edac_mem_types);

View File

@ -14,9 +14,6 @@
#include "edac_core.h"
#include "edac_module.h"
/* Turn off this whole feature if PCI is not configured */
#ifdef CONFIG_PCI
#define EDAC_PCI_SYMLINK "device"
/* data variables exported via sysfs */
@ -761,5 +758,3 @@ MODULE_PARM_DESC(check_pci_errors,
module_param(edac_pci_panic_on_pe, int, 0644);
MODULE_PARM_DESC(edac_pci_panic_on_pe,
"Panic on PCI Bus Parity error: 0=off 1=on");
#endif /* CONFIG_PCI */

View File

@ -413,8 +413,8 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
/* Generate the trace event */
grain_bits = fls_long(e->grain);
sprintf(pvt->detail_location, "APEI location: %s %s",
e->location, e->other_detail);
snprintf(pvt->detail_location, sizeof(pvt->detail_location),
"APEI location: %s %s", e->location, e->other_detail);
trace_mc_event(type, e->msg, e->label, e->error_count,
mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,

View File

@ -542,8 +542,7 @@ fail1:
pci_unregister_driver(&i3000_driver);
fail0:
if (mci_pdev)
pci_dev_put(mci_pdev);
pci_dev_put(mci_pdev);
return pci_rc;
}

View File

@ -523,8 +523,7 @@ fail1:
pci_unregister_driver(&i3200_driver);
fail0:
if (mci_pdev)
pci_dev_put(mci_pdev);
pci_dev_put(mci_pdev);
return pci_rc;
}

View File

@ -458,8 +458,7 @@ static void __exit i82443bxgx_edacmc_exit(void)
if (!i82443bxgx_registered)
i82443bxgx_edacmc_remove_one(mci_pdev);
if (mci_pdev)
pci_dev_put(mci_pdev);
pci_dev_put(mci_pdev);
}
module_init(i82443bxgx_edacmc_init);

View File

@ -138,6 +138,15 @@ static const char * const mc5_mce_desc[] = {
"Retire status queue"
};
static const char * const mc6_mce_desc[] = {
"Hardware Assertion",
"Free List",
"Physical Register File",
"Retire Queue",
"Scheduler table",
"Status Register File",
};
static bool f12h_mc0_mce(u16 ec, u8 xec)
{
bool ret = false;
@ -432,8 +441,8 @@ static bool k8_mc2_mce(u16 ec, u8 xec)
pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
else if (xec == 0x0) {
if (TLB_ERROR(ec))
pr_cont(": %s error in a Page Descriptor Cache or "
"Guest TLB.\n", TT_MSG(ec));
pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
TT_MSG(ec));
else if (BUS_ERROR(ec))
pr_cont(": %s/ECC error in data read from NB: %s.\n",
R4_MSG(ec), PP_MSG(ec));
@ -672,38 +681,10 @@ static void decode_mc6_mce(struct mce *m)
pr_emerg(HW_ERR "MC6 Error: ");
switch (xec) {
case 0x0:
pr_cont("Hardware Assertion");
break;
case 0x1:
pr_cont("Free List");
break;
case 0x2:
pr_cont("Physical Register File");
break;
case 0x3:
pr_cont("Retire Queue");
break;
case 0x4:
pr_cont("Scheduler table");
break;
case 0x5:
pr_cont("Status Register File");
break;
default:
if (xec > 0x5)
goto wrong_mc6_mce;
break;
}
pr_cont(" parity error.\n");
pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
return;
wrong_mc6_mce:
@ -800,7 +781,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
pr_cont("]: 0x%016llx\n", m->status);
if (m->status & MCI_STATUS_ADDRV)
pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
if (!fam_ops)
goto err_code;

View File

@ -1,173 +1,262 @@
/*
* A simple MCE injection facility for testing the MCE decoding code. This
* driver should be built as module so that it can be loaded on production
* kernels for testing purposes.
* A simple MCE injection facility for testing different aspects of the RAS
* code. This driver should be built as module so that it can be loaded
* on production kernels for testing purposes.
*
* This file may be distributed under the terms of the GNU General Public
* License version 2.
*
* Copyright (c) 2010: Borislav Petkov <bp@alien8.de>
* Copyright (c) 2010-14: Borislav Petkov <bp@alien8.de>
* Advanced Micro Devices Inc.
*/
#include <linux/kobject.h>
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/edac.h>
#include <linux/module.h>
#include <linux/cpu.h>
#include <asm/mce.h>
#include "mce_amd.h"
struct edac_mce_attr {
struct attribute attr;
ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf);
ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr,
const char *buf, size_t count);
};
#define EDAC_MCE_ATTR(_name, _mode, _show, _store) \
static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store)
static struct kobject *mce_kobj;
/*
* Collect all the MCi_XXX settings
*/
static struct mce i_mce;
static struct dentry *dfs_inj;
#define MCE_INJECT_STORE(reg) \
static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \
struct edac_mce_attr *attr, \
const char *data, size_t count)\
#define MCE_INJECT_SET(reg) \
static int inj_##reg##_set(void *data, u64 val) \
{ \
int ret = 0; \
unsigned long value; \
struct mce *m = (struct mce *)data; \
\
ret = kstrtoul(data, 16, &value); \
if (ret < 0) \
printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \
\
i_mce.reg = value; \
\
return count; \
m->reg = val; \
return 0; \
}
MCE_INJECT_STORE(status);
MCE_INJECT_STORE(misc);
MCE_INJECT_STORE(addr);
MCE_INJECT_SET(status);
MCE_INJECT_SET(misc);
MCE_INJECT_SET(addr);
#define MCE_INJECT_SHOW(reg) \
static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \
struct edac_mce_attr *attr, \
char *buf) \
#define MCE_INJECT_GET(reg) \
static int inj_##reg##_get(void *data, u64 *val) \
{ \
return sprintf(buf, "0x%016llx\n", i_mce.reg); \
struct mce *m = (struct mce *)data; \
\
*val = m->reg; \
return 0; \
}
MCE_INJECT_SHOW(status);
MCE_INJECT_SHOW(misc);
MCE_INJECT_SHOW(addr);
MCE_INJECT_GET(status);
MCE_INJECT_GET(misc);
MCE_INJECT_GET(addr);
EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store);
EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store);
EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store);
DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
/*
* Caller needs to be make sure this cpu doesn't disappear
* from under us, i.e.: get_cpu/put_cpu.
*/
static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
{
u32 l, h;
int err;
err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
if (err) {
pr_err("%s: error reading HWCR\n", __func__);
return err;
}
enable ? (l |= BIT(18)) : (l &= ~BIT(18));
err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
if (err)
pr_err("%s: error writing HWCR\n", __func__);
return err;
}
static int flags_get(void *data, u64 *val)
{
struct mce *m = (struct mce *)data;
*val = m->inject_flags;
return 0;
}
static int flags_set(void *data, u64 val)
{
struct mce *m = (struct mce *)data;
m->inject_flags = (u8)val;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(flags_fops, flags_get, flags_set, "%llu\n");
/*
* On which CPU to inject?
*/
MCE_INJECT_GET(extcpu);
static int inj_extcpu_set(void *data, u64 val)
{
struct mce *m = (struct mce *)data;
if (val >= nr_cpu_ids || !cpu_online(val)) {
pr_err("%s: Invalid CPU: %llu\n", __func__, val);
return -EINVAL;
}
m->extcpu = val;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
static void trigger_mce(void *info)
{
asm volatile("int $18");
}
static void do_inject(void)
{
u64 mcg_status = 0;
unsigned int cpu = i_mce.extcpu;
u8 b = i_mce.bank;
if (!(i_mce.inject_flags & MCJ_EXCEPTION)) {
amd_decode_mce(NULL, 0, &i_mce);
return;
}
get_online_cpus();
if (!cpu_online(cpu))
goto err;
/* prep MCE global settings for the injection */
mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
if (!(i_mce.status & MCI_STATUS_PCC))
mcg_status |= MCG_STATUS_RIPV;
toggle_hw_mce_inject(cpu, true);
wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
(u32)mcg_status, (u32)(mcg_status >> 32));
wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
(u32)i_mce.status, (u32)(i_mce.status >> 32));
wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
(u32)i_mce.addr, (u32)(i_mce.addr >> 32));
wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
(u32)i_mce.misc, (u32)(i_mce.misc >> 32));
toggle_hw_mce_inject(cpu, false);
smp_call_function_single(cpu, trigger_mce, NULL, 0);
err:
put_online_cpus();
}
/*
* This denotes into which bank we're injecting and triggers
* the injection, at the same time.
*/
static ssize_t edac_inject_bank_store(struct kobject *kobj,
struct edac_mce_attr *attr,
const char *data, size_t count)
static int inj_bank_set(void *data, u64 val)
{
int ret = 0;
unsigned long value;
struct mce *m = (struct mce *)data;
ret = kstrtoul(data, 10, &value);
if (ret < 0) {
printk(KERN_ERR "Invalid bank value!\n");
return -EINVAL;
}
if (value > 5)
if (boot_cpu_data.x86 != 0x15 || value > 6) {
printk(KERN_ERR "Non-existent MCE bank: %lu\n", value);
if (val > 5) {
if (boot_cpu_data.x86 != 0x15 || val > 6) {
pr_err("Non-existent MCE bank: %llu\n", val);
return -EINVAL;
}
}
i_mce.bank = value;
m->bank = val;
do_inject();
amd_decode_mce(NULL, 0, &i_mce);
return count;
return 0;
}
static ssize_t edac_inject_bank_show(struct kobject *kobj,
struct edac_mce_attr *attr, char *buf)
static int inj_bank_get(void *data, u64 *val)
{
return sprintf(buf, "%d\n", i_mce.bank);
struct mce *m = (struct mce *)data;
*val = m->bank;
return 0;
}
EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store);
DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc,
&mce_attr_addr, &mce_attr_bank
struct dfs_node {
char *name;
struct dentry *d;
const struct file_operations *fops;
} dfs_fls[] = {
{ .name = "status", .fops = &status_fops },
{ .name = "misc", .fops = &misc_fops },
{ .name = "addr", .fops = &addr_fops },
{ .name = "bank", .fops = &bank_fops },
{ .name = "flags", .fops = &flags_fops },
{ .name = "cpu", .fops = &extcpu_fops },
};
static int __init edac_init_mce_inject(void)
{
struct bus_type *edac_subsys = NULL;
int i, err = 0;
edac_subsys = edac_get_sysfs_subsys();
if (!edac_subsys)
return -EINVAL;
mce_kobj = kobject_create_and_add("mce", &edac_subsys->dev_root->kobj);
if (!mce_kobj) {
printk(KERN_ERR "Error creating a mce kset.\n");
err = -ENOMEM;
goto err_mce_kobj;
}
for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) {
err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr);
if (err) {
printk(KERN_ERR "Error creating %s in sysfs.\n",
sysfs_attrs[i]->attr.name);
goto err_sysfs_create;
}
}
return 0;
err_sysfs_create:
while (--i >= 0)
sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
kobject_del(mce_kobj);
err_mce_kobj:
edac_put_sysfs_subsys();
return err;
}
static void __exit edac_exit_mce_inject(void)
static int __init init_mce_inject(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++)
sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
dfs_inj = debugfs_create_dir("mce-inject", NULL);
if (!dfs_inj)
return -EINVAL;
kobject_del(mce_kobj);
for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
S_IRUSR | S_IWUSR,
dfs_inj,
&i_mce,
dfs_fls[i].fops);
edac_put_sysfs_subsys();
if (!dfs_fls[i].d)
goto err_dfs_add;
}
return 0;
err_dfs_add:
while (--i >= 0)
debugfs_remove(dfs_fls[i].d);
debugfs_remove(dfs_inj);
dfs_inj = NULL;
return -ENOMEM;
}
module_init(edac_init_mce_inject);
module_exit(edac_exit_mce_inject);
static void __exit exit_mce_inject(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
debugfs_remove(dfs_fls[i].d);
memset(&dfs_fls, 0, sizeof(dfs_fls));
debugfs_remove(dfs_inj);
dfs_inj = NULL;
}
module_init(init_mce_inject);
module_exit(exit_mce_inject);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Borislav Petkov <bp@alien8.de>");
MODULE_AUTHOR("AMD Inc.");
MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding");
MODULE_DESCRIPTION("MCE injection facility for RAS testing");

View File

@ -178,7 +178,7 @@ static int mv64x60_pci_err_probe(struct platform_device *pdev)
res = devm_request_irq(&pdev->dev,
pdata->irq,
mv64x60_pci_isr,
IRQF_DISABLED,
0,
"[EDAC] PCI err",
pci);
if (res < 0) {
@ -345,7 +345,7 @@ static int mv64x60_sram_err_probe(struct platform_device *pdev)
res = devm_request_irq(&pdev->dev,
pdata->irq,
mv64x60_sram_isr,
IRQF_DISABLED,
0,
"[EDAC] SRAM err",
edac_dev);
if (res < 0) {
@ -540,7 +540,7 @@ static int mv64x60_cpu_err_probe(struct platform_device *pdev)
res = devm_request_irq(&pdev->dev,
pdata->irq,
mv64x60_cpu_isr,
IRQF_DISABLED,
0,
"[EDAC] CPU err",
edac_dev);
if (res < 0) {
@ -800,7 +800,7 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev)
res = devm_request_irq(&pdev->dev,
pdata->irq,
mv64x60_mc_isr,
IRQF_DISABLED,
0,
"[EDAC] MC err",
mci);
if (res < 0) {

View File

@ -1120,7 +1120,7 @@ static int ppc4xx_edac_register_irq(struct platform_device *op,
status = request_irq(ded_irq,
ppc4xx_edac_isr,
IRQF_DISABLED,
0,
"[EDAC] MC ECCDED",
mci);
@ -1134,7 +1134,7 @@ static int ppc4xx_edac_register_irq(struct platform_device *op,
status = request_irq(sec_irq,
ppc4xx_edac_isr,
IRQF_DISABLED,
0,
"[EDAC] MC ECCSEC",
mci);

View File

@ -500,8 +500,7 @@ fail1:
pci_unregister_driver(&x38_driver);
fail0:
if (mci_pdev)
pci_dev_put(mci_pdev);
pci_dev_put(mci_pdev);
return pci_rc;
}

View File

@ -194,7 +194,8 @@ static inline char *mc_event_error_type(const unsigned int err_type)
* @MEM_DDR3: DDR3 RAM
* @MEM_RDDR3: Registered DDR3 RAM
* This is a variant of the DDR3 memories.
* @MEM_DDR4: DDR4 RAM
* @MEM_LRDDR3 Load-Reduced DDR3 memory.
* @MEM_DDR4: Unbuffered DDR4 RAM
* @MEM_RDDR4: Registered DDR4 RAM
* This is a variant of the DDR4 memories.
*/
@ -216,6 +217,7 @@ enum mem_type {
MEM_XDR,
MEM_DDR3,
MEM_RDDR3,
MEM_LRDDR3,
MEM_DDR4,
MEM_RDDR4,
};

View File

@ -522,6 +522,8 @@
#define PCI_DEVICE_ID_AMD_15H_M10H_F3 0x1403
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F3 0x141d
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F4 0x141e
#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F3 0x1573
#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F4 0x1574
#define PCI_DEVICE_ID_AMD_15H_NB_F0 0x1600
#define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
#define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602