From 3a8f21f170dc7c85f9c0fc5cc49a2fd31bd00628 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 21 Mar 2016 11:01:38 -0500 Subject: [PATCH 01/25] EDAC, altera: Make L2C depend on L2x0 cache controller Make L2 cache depend instead of forcibly select the L2 cache support. Signed-off-by: Thor Thayer Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1458576106-24505-2-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- drivers/edac/Kconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 37755e63cc28..6ca7474baf4a 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -378,12 +378,11 @@ config EDAC_ALTERA config EDAC_ALTERA_L2C bool "Altera L2 Cache ECC" - depends on EDAC_ALTERA=y - select CACHE_L2X0 + depends on EDAC_ALTERA=y && CACHE_L2X0 help Support for error detection and correction on the Altera L2 cache Memory for Altera SoCs. This option - requires L2 cache so it will force that selection. + requires L2 cache. config EDAC_ALTERA_OCRAM bool "Altera On-Chip RAM ECC" From 05b088b6f8f21ed6a451c9a5fc8ec23cec665f12 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 21 Mar 2016 11:01:39 -0500 Subject: [PATCH 02/25] EDAC, altera: Move device structs and defines to the header Move the device structs and defines to altera_edac.h in preparation for adding the Arria10 L2 cache ECC. Signed-off-by: Thor Thayer Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1458576106-24505-3-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 43 ------------------------------------- drivers/edac/altera_edac.h | 44 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 43 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 63e42098726d..eee7a395666e 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -78,27 +78,6 @@ static const struct altr_sdram_prv_data a10_data = { .ue_set_mask = A10_DIAGINT_TDERRA_MASK, }; -/************************** EDAC Device Defines **************************/ - -/* OCRAM ECC Management Group Defines */ -#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04 -#define ALTR_OCR_ECC_EN BIT(0) -#define ALTR_OCR_ECC_INJS BIT(1) -#define ALTR_OCR_ECC_INJD BIT(2) -#define ALTR_OCR_ECC_SERR BIT(3) -#define ALTR_OCR_ECC_DERR BIT(4) - -/* L2 ECC Management Group Defines */ -#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00 -#define ALTR_L2_ECC_EN BIT(0) -#define ALTR_L2_ECC_INJS BIT(1) -#define ALTR_L2_ECC_INJD BIT(2) - -#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */ -#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */ -#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */ -#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */ - /*********************** EDAC Memory Controller Functions ****************/ /* The SDRAM controller uses the EDAC Memory Controller framework. */ @@ -571,28 +550,6 @@ module_platform_driver(altr_edac_driver); const struct edac_device_prv_data ocramecc_data; const struct edac_device_prv_data l2ecc_data; -struct edac_device_prv_data { - int (*setup)(struct platform_device *pdev, void __iomem *base); - int ce_clear_mask; - int ue_clear_mask; - char dbgfs_name[20]; - void * (*alloc_mem)(size_t size, void **other); - void (*free_mem)(void *p, size_t size, void *other); - int ecc_enable_mask; - int ce_set_mask; - int ue_set_mask; - int trig_alloc_sz; -}; - -struct altr_edac_device_dev { - void __iomem *base; - int sb_irq; - int db_irq; - const struct edac_device_prv_data *data; - struct dentry *debugfs_dir; - char *edac_dev_name; -}; - static irqreturn_t altr_edac_device_handler(int irq, void *dev_id) { irqreturn_t ret_value = IRQ_NONE; diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 953077d3e4f3..993bb0a7a372 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -195,4 +195,48 @@ struct altr_sdram_mc_data { const struct altr_sdram_prv_data *data; }; +/************************** EDAC Device Defines **************************/ +/***** General Device Trigger Defines *****/ +#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */ +#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */ +#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */ +#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */ + +/******* Cyclone5 and Arria5 Defines *******/ +/* OCRAM ECC Management Group Defines */ +#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04 +#define ALTR_OCR_ECC_EN BIT(0) +#define ALTR_OCR_ECC_INJS BIT(1) +#define ALTR_OCR_ECC_INJD BIT(2) +#define ALTR_OCR_ECC_SERR BIT(3) +#define ALTR_OCR_ECC_DERR BIT(4) + +/* L2 ECC Management Group Defines */ +#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00 +#define ALTR_L2_ECC_EN BIT(0) +#define ALTR_L2_ECC_INJS BIT(1) +#define ALTR_L2_ECC_INJD BIT(2) + +struct edac_device_prv_data { + int (*setup)(struct platform_device *pdev, void __iomem *base); + int ce_clear_mask; + int ue_clear_mask; + char dbgfs_name[20]; + void * (*alloc_mem)(size_t size, void **other); + void (*free_mem)(void *p, size_t size, void *other); + int ecc_enable_mask; + int ce_set_mask; + int ue_set_mask; + int trig_alloc_sz; +}; + +struct altr_edac_device_dev { + void __iomem *base; + int sb_irq; + int db_irq; + const struct edac_device_prv_data *data; + struct dentry *debugfs_dir; + char *edac_dev_name; +}; + #endif /* #ifndef _ALTERA_EDAC_H */ From 328ca7ae81dd842ff56b35f1e8422e6af1c80c14 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 21 Mar 2016 11:01:40 -0500 Subject: [PATCH 03/25] EDAC, altera: Remove platform device from check_deps() In preparation for the Arria10 peripheral ECCs, remove the platform device parameter from the check_deps() functions because it is not needed and makes the Arria10 check_deps() cleaner. Signed-off-by: Thor Thayer Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1458576106-24505-4-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 10 +++++----- drivers/edac/altera_edac.h | 4 +++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index eee7a395666e..e20a045cbf20 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -746,7 +746,7 @@ static int altr_edac_device_probe(struct platform_device *pdev) /* Check specific dependencies for the module */ if (drvdata->data->setup) { - res = drvdata->data->setup(pdev, drvdata->base); + res = drvdata->data->setup(drvdata); if (res) goto fail1; } @@ -856,9 +856,9 @@ static void ocram_free_mem(void *p, size_t size, void *other) * Can't turn on ECC here because accessing un-initialized * memory will cause CE/UE errors possibly causing an ABORT. */ -static int altr_ocram_check_deps(struct platform_device *pdev, - void __iomem *base) +static int altr_ocram_check_deps(struct altr_edac_device_dev *device) { + void __iomem *base = device->base; if (readl(base) & ALTR_OCR_ECC_EN) return 0; @@ -923,9 +923,9 @@ static void l2_free_mem(void *p, size_t size, void *other) * Bail if ECC is not enabled. * Note that L2 Cache Enable is forced at build time. */ -static int altr_l2_check_deps(struct platform_device *pdev, - void __iomem *base) +static int altr_l2_check_deps(struct altr_edac_device_dev *device) { + void __iomem *base = device->base; if (readl(base) & ALTR_L2_ECC_EN) return 0; diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 993bb0a7a372..32c798a6f980 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -217,8 +217,10 @@ struct altr_sdram_mc_data { #define ALTR_L2_ECC_INJS BIT(1) #define ALTR_L2_ECC_INJD BIT(2) +struct altr_edac_device_dev; + struct edac_device_prv_data { - int (*setup)(struct platform_device *pdev, void __iomem *base); + int (*setup)(struct altr_edac_device_dev *device); int ce_clear_mask; int ue_clear_mask; char dbgfs_name[20]; From 27439a1a632d9936159863fb11f5bc4d55eaab04 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 21 Mar 2016 11:01:41 -0500 Subject: [PATCH 04/25] EDAC, altera: Abstract ECC Enable Mask in check_deps() In preparation for the Arria10 peripheral ECCs, use the ECC Enable mask in place of hard coded masks in the check dependency functions. Signed-off-by: Thor Thayer Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1458576106-24505-5-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index e20a045cbf20..0dbfa473f853 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -859,7 +859,9 @@ static void ocram_free_mem(void *p, size_t size, void *other) static int altr_ocram_check_deps(struct altr_edac_device_dev *device) { void __iomem *base = device->base; - if (readl(base) & ALTR_OCR_ECC_EN) + const struct edac_device_prv_data *prv = device->data; + + if (readl(base) & prv->ecc_enable_mask) return 0; edac_printk(KERN_ERR, EDAC_DEVICE, @@ -926,7 +928,10 @@ static void l2_free_mem(void *p, size_t size, void *other) static int altr_l2_check_deps(struct altr_edac_device_dev *device) { void __iomem *base = device->base; - if (readl(base) & ALTR_L2_ECC_EN) + const struct edac_device_prv_data *prv = device->data; + + if ((readl(base) & prv->ecc_enable_mask) == + prv->ecc_enable_mask) return 0; edac_printk(KERN_ERR, EDAC_DEVICE, From 811fce4f2a7aea0cd93815d0eaf42fbcc98bd930 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 21 Mar 2016 11:01:42 -0500 Subject: [PATCH 05/25] EDAC, altera: Add register offset for ECC Error Inject In preparation for the Arria10 peripheral ECCs, add a register offset from the ECC base to the private data structure to index to the error injection register. Signed-off-by: Thor Thayer Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1458576106-24505-6-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 7 +++++-- drivers/edac/altera_edac.h | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 0dbfa473f853..502bf1fcf9e5 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -622,8 +622,9 @@ static ssize_t altr_edac_device_trig(struct file *file, if (ACCESS_ONCE(ptemp[i])) result = -1; /* Toggle Error bit (it is latched), leave ECC enabled */ - writel(error_mask, drvdata->base); - writel(priv->ecc_enable_mask, drvdata->base); + writel(error_mask, (drvdata->base + priv->set_err_ofst)); + writel(priv->ecc_enable_mask, (drvdata->base + + priv->set_err_ofst)); ptemp[i] = i; } /* Ensure it has been written out */ @@ -879,6 +880,7 @@ const struct edac_device_prv_data ocramecc_data = { .ecc_enable_mask = ALTR_OCR_ECC_EN, .ce_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJS), .ue_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJD), + .set_err_ofst = ALTR_OCR_ECC_REG_OFFSET, .trig_alloc_sz = ALTR_TRIG_OCRAM_BYTE_SIZE, }; @@ -949,6 +951,7 @@ const struct edac_device_prv_data l2ecc_data = { .ecc_enable_mask = ALTR_L2_ECC_EN, .ce_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJS), .ue_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJD), + .set_err_ofst = ALTR_L2_ECC_REG_OFFSET, .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE, }; diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 32c798a6f980..d7ef94c13b98 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -205,6 +205,7 @@ struct altr_sdram_mc_data { /******* Cyclone5 and Arria5 Defines *******/ /* OCRAM ECC Management Group Defines */ #define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04 +#define ALTR_OCR_ECC_REG_OFFSET 0x00 #define ALTR_OCR_ECC_EN BIT(0) #define ALTR_OCR_ECC_INJS BIT(1) #define ALTR_OCR_ECC_INJD BIT(2) @@ -213,6 +214,7 @@ struct altr_sdram_mc_data { /* L2 ECC Management Group Defines */ #define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00 +#define ALTR_L2_ECC_REG_OFFSET 0x00 #define ALTR_L2_ECC_EN BIT(0) #define ALTR_L2_ECC_INJS BIT(1) #define ALTR_L2_ECC_INJD BIT(2) @@ -229,6 +231,7 @@ struct edac_device_prv_data { int ecc_enable_mask; int ce_set_mask; int ue_set_mask; + int set_err_ofst; int trig_alloc_sz; }; From 8b39ab7290d571b91867b15c02a59edf0a5b00bb Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 21 Mar 2016 11:01:43 -0500 Subject: [PATCH 06/25] Documentation, dt, socfpga: Add Altera Arria10 L2 cache binding Add the device tree bindings needed to support the Altera L2 cache on the Arria10 chip. Since all the peripherals share IRQs, the IRQ fields are now in the ecc_manager. Signed-off-by: Thor Thayer Acked-by: Rob Herring Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1458576106-24505-7-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- .../bindings/arm/altera/socfpga-eccmgr.txt | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt index 885f93d14ef9..37ff9bfea5f4 100644 --- a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt +++ b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt @@ -3,6 +3,7 @@ This driver uses the EDAC framework to implement the SOCFPGA ECC Manager. The ECC Manager counts and corrects single bit errors and counts/handles double bit errors which are uncorrectable. +Cyclone5 and Arria5 ECC Manager Required Properties: - compatible : Should be "altr,socfpga-ecc-manager" - #address-cells: must be 1 @@ -47,3 +48,42 @@ Example: interrupts = <0 178 1>, <0 179 1>; }; }; + +Arria10 SoCFPGA ECC Manager +The Arria10 SoC ECC Manager handles the IRQs for each peripheral +in a shared register instead of individual IRQs like the Cyclone5 +and Arria5. Therefore the device tree is different as well. + +Required Properties: +- compatible : Should be "altr,socfpga-a10-ecc-manager" +- altr,sysgr-syscon : phandle to Arria10 System Manager Block + containing the ECC manager registers. +- #address-cells: must be 1 +- #size-cells: must be 1 +- interrupts : Should be single bit error interrupt, then double bit error + interrupt. Note the rising edge type. +- ranges : standard definition, should translate from local addresses + +Subcomponents: + +L2 Cache ECC +Required Properties: +- compatible : Should be "altr,socfpga-a10-l2-ecc" +- reg : Address and size for ECC error interrupt clear registers. + +Example: + + eccmgr: eccmgr@ffd06000 { + compatible = "altr,socfpga-a10-ecc-manager"; + altr,sysmgr-syscon = <&sysmgr>; + #address-cells = <1>; + #size-cells = <1>; + interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>, + <0 0 IRQ_TYPE_LEVEL_HIGH>; + ranges; + + l2-ecc@ffd06010 { + compatible = "altr,socfpga-a10-l2-ecc"; + reg = <0xffd06010 0x4>; + }; + }; From 588cb03ea208b303e6dee7e916f329043fd0fc26 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 21 Mar 2016 11:01:44 -0500 Subject: [PATCH 07/25] EDAC, altera: Add Arria10 L2 Cache ECC handling Add a private data structure for Arria10 L2 cache ECC and the probe function for it. The Arria10 ECC device IRQs are in a shared register so the ECC Manager parent/child relationship requires a different probe function. Signed-off-by: Thor Thayer Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1458576106-24505-8-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 231 +++++++++++++++++++++++++++++++++++++ drivers/edac/altera_edac.h | 42 +++++++ 2 files changed, 273 insertions(+) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 502bf1fcf9e5..0afdc582766e 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -549,6 +550,7 @@ module_platform_driver(altr_edac_driver); const struct edac_device_prv_data ocramecc_data; const struct edac_device_prv_data l2ecc_data; +const struct edac_device_prv_data a10_l2ecc_data; static irqreturn_t altr_edac_device_handler(int irq, void *dev_id) { @@ -673,6 +675,8 @@ static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, static const struct of_device_id altr_edac_device_of_match[] = { #ifdef CONFIG_EDAC_ALTERA_L2C { .compatible = "altr,socfpga-l2-ecc", .data = (void *)&l2ecc_data }, + { .compatible = "altr,socfpga-a10-l2-ecc", + .data = (void *)&a10_l2ecc_data }, #endif #ifdef CONFIG_EDAC_ALTERA_OCRAM { .compatible = "altr,socfpga-ocram-ecc", @@ -941,6 +945,24 @@ static int altr_l2_check_deps(struct altr_edac_device_dev *device) return -ENODEV; } +static irqreturn_t altr_edac_a10_l2_irq(struct altr_edac_device_dev *dci, + bool sberr) +{ + if (sberr) { + regmap_write(dci->edac->ecc_mgr_map, + A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST, + A10_SYSGMR_MPU_CLEAR_L2_ECC_SB); + edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name); + } else { + regmap_write(dci->edac->ecc_mgr_map, + A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST, + A10_SYSGMR_MPU_CLEAR_L2_ECC_MB); + edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name); + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n"); + } + return IRQ_HANDLED; +} + const struct edac_device_prv_data l2ecc_data = { .setup = altr_l2_check_deps, .ce_clear_mask = 0, @@ -955,8 +977,217 @@ const struct edac_device_prv_data l2ecc_data = { .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE, }; +const struct edac_device_prv_data a10_l2ecc_data = { + .setup = altr_l2_check_deps, + .ce_clear_mask = ALTR_A10_L2_ECC_SERR_CLR, + .ue_clear_mask = ALTR_A10_L2_ECC_MERR_CLR, + .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_L2, + .dbgfs_name = "altr_l2_trigger", + .alloc_mem = l2_alloc_mem, + .free_mem = l2_free_mem, + .ecc_enable_mask = ALTR_A10_L2_ECC_EN_CTL, + .ce_set_mask = ALTR_A10_L2_ECC_CE_INJ_MASK, + .ue_set_mask = ALTR_A10_L2_ECC_UE_INJ_MASK, + .set_err_ofst = ALTR_A10_L2_ECC_INJ_OFST, + .ecc_irq_handler = altr_edac_a10_l2_irq, + .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE, +}; + #endif /* CONFIG_EDAC_ALTERA_L2C */ +/********************* Arria10 EDAC Device Functions *************************/ + +/* + * The Arria10 EDAC Device Functions differ from the Cyclone5/Arria5 + * because 2 IRQs are shared among the all ECC peripherals. The ECC + * manager manages the IRQs and the children. + * Based on xgene_edac.c peripheral code. + */ + +static irqreturn_t altr_edac_a10_irq_handler(int irq, void *dev_id) +{ + irqreturn_t rc = IRQ_NONE; + struct altr_arria10_edac *edac = dev_id; + struct altr_edac_device_dev *dci; + int irq_status; + bool sberr = (irq == edac->sb_irq) ? 1 : 0; + int sm_offset = sberr ? A10_SYSMGR_ECC_INTSTAT_SERR_OFST : + A10_SYSMGR_ECC_INTSTAT_DERR_OFST; + + regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status); + + if ((irq != edac->sb_irq) && (irq != edac->db_irq)) { + WARN_ON(1); + } else { + list_for_each_entry(dci, &edac->a10_ecc_devices, next) { + if (irq_status & dci->data->irq_status_mask) + rc = dci->data->ecc_irq_handler(dci, sberr); + } + } + + return rc; +} + +static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, + struct device_node *np) +{ + struct edac_device_ctl_info *dci; + struct altr_edac_device_dev *altdev; + char *ecc_name = (char *)np->name; + struct resource res; + int edac_idx; + int rc = 0; + const struct edac_device_prv_data *prv; + /* Get matching node and check for valid result */ + const struct of_device_id *pdev_id = + of_match_node(altr_edac_device_of_match, np); + if (IS_ERR_OR_NULL(pdev_id)) + return -ENODEV; + + /* Get driver specific data for this EDAC device */ + prv = pdev_id->data; + if (IS_ERR_OR_NULL(prv)) + return -ENODEV; + + if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL)) + return -ENOMEM; + + rc = of_address_to_resource(np, 0, &res); + if (rc < 0) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: no resource address\n", ecc_name); + goto err_release_group; + } + + edac_idx = edac_device_alloc_index(); + dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name, + 1, ecc_name, 1, 0, NULL, 0, + edac_idx); + + if (!dci) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: Unable to allocate EDAC device\n", ecc_name); + rc = -ENOMEM; + goto err_release_group; + } + + altdev = dci->pvt_info; + dci->dev = edac->dev; + altdev->edac_dev_name = ecc_name; + altdev->edac_idx = edac_idx; + altdev->edac = edac; + altdev->edac_dev = dci; + altdev->data = prv; + altdev->ddev = *edac->dev; + dci->dev = &altdev->ddev; + dci->ctl_name = "Altera ECC Manager"; + dci->mod_name = ecc_name; + dci->dev_name = ecc_name; + + altdev->base = devm_ioremap_resource(edac->dev, &res); + if (IS_ERR(altdev->base)) { + rc = PTR_ERR(altdev->base); + goto err_release_group1; + } + + /* Check specific dependencies for the module */ + if (altdev->data->setup) { + rc = altdev->data->setup(altdev); + if (rc) + goto err_release_group1; + } + + rc = edac_device_add_device(dci); + if (rc) { + dev_err(edac->dev, "edac_device_add_device failed\n"); + rc = -ENOMEM; + goto err_release_group1; + } + + altr_create_edacdev_dbgfs(dci, prv); + + list_add(&altdev->next, &edac->a10_ecc_devices); + + devres_remove_group(edac->dev, altr_edac_a10_device_add); + + return 0; + +err_release_group1: + edac_device_free_ctl_info(dci); +err_release_group: + edac_printk(KERN_ALERT, EDAC_DEVICE, "%s: %d\n", __func__, __LINE__); + devres_release_group(edac->dev, NULL); + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s:Error setting up EDAC device: %d\n", ecc_name, rc); + + return rc; +} + +static int altr_edac_a10_probe(struct platform_device *pdev) +{ + struct altr_arria10_edac *edac; + struct device_node *child; + int rc; + + edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL); + if (!edac) + return -ENOMEM; + + edac->dev = &pdev->dev; + platform_set_drvdata(pdev, edac); + INIT_LIST_HEAD(&edac->a10_ecc_devices); + + edac->ecc_mgr_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "altr,sysmgr-syscon"); + if (IS_ERR(edac->ecc_mgr_map)) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to get syscon altr,sysmgr-syscon\n"); + return PTR_ERR(edac->ecc_mgr_map); + } + + edac->sb_irq = platform_get_irq(pdev, 0); + rc = devm_request_irq(&pdev->dev, edac->sb_irq, + altr_edac_a10_irq_handler, + IRQF_SHARED, dev_name(&pdev->dev), edac); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, "No SBERR IRQ resource\n"); + return rc; + } + + edac->db_irq = platform_get_irq(pdev, 1); + rc = devm_request_irq(&pdev->dev, edac->db_irq, + altr_edac_a10_irq_handler, + IRQF_SHARED, dev_name(&pdev->dev), edac); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n"); + return rc; + } + + for_each_child_of_node(pdev->dev.of_node, child) { + if (!of_device_is_available(child)) + continue; + if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc")) + altr_edac_a10_device_add(edac, child); + } + + return 0; +} + +static const struct of_device_id altr_edac_a10_of_match[] = { + { .compatible = "altr,socfpga-a10-ecc-manager" }, + {}, +}; +MODULE_DEVICE_TABLE(of, altr_edac_a10_of_match); + +static struct platform_driver altr_edac_a10_driver = { + .probe = altr_edac_a10_probe, + .driver = { + .name = "socfpga_a10_ecc_manager", + .of_match_table = altr_edac_a10_of_match, + }, +}; +module_platform_driver(altr_edac_a10_driver); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Thor Thayer"); MODULE_DESCRIPTION("EDAC Driver for Altera Memories"); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index d7ef94c13b98..b0a17d03c715 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -219,12 +219,39 @@ struct altr_sdram_mc_data { #define ALTR_L2_ECC_INJS BIT(1) #define ALTR_L2_ECC_INJD BIT(2) +/* Arria10 General ECC Block Module Defines */ +#define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C +#define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 +#define A10_SYSMGR_ECC_INTSTAT_L2 BIT(0) + +#define A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST 0xA8 +#define A10_SYSGMR_MPU_CLEAR_L2_ECC_SB BIT(15) +#define A10_SYSGMR_MPU_CLEAR_L2_ECC_MB BIT(31) + +/* Arria 10 L2 ECC Management Group Defines */ +#define ALTR_A10_L2_ECC_CTL_OFST 0x0 +#define ALTR_A10_L2_ECC_EN_CTL BIT(0) + +#define ALTR_A10_L2_ECC_STATUS 0xFFD060A4 +#define ALTR_A10_L2_ECC_STAT_OFST 0xA4 +#define ALTR_A10_L2_ECC_SERR_PEND BIT(0) +#define ALTR_A10_L2_ECC_MERR_PEND BIT(0) + +#define ALTR_A10_L2_ECC_CLR_OFST 0x4 +#define ALTR_A10_L2_ECC_SERR_CLR BIT(15) +#define ALTR_A10_L2_ECC_MERR_CLR BIT(31) + +#define ALTR_A10_L2_ECC_INJ_OFST ALTR_A10_L2_ECC_CTL_OFST +#define ALTR_A10_L2_ECC_CE_INJ_MASK 0x00000101 +#define ALTR_A10_L2_ECC_UE_INJ_MASK 0x00010101 + struct altr_edac_device_dev; struct edac_device_prv_data { int (*setup)(struct altr_edac_device_dev *device); int ce_clear_mask; int ue_clear_mask; + int irq_status_mask; char dbgfs_name[20]; void * (*alloc_mem)(size_t size, void **other); void (*free_mem)(void *p, size_t size, void *other); @@ -232,16 +259,31 @@ struct edac_device_prv_data { int ce_set_mask; int ue_set_mask; int set_err_ofst; + irqreturn_t (*ecc_irq_handler)(struct altr_edac_device_dev *dci, + bool sb); int trig_alloc_sz; }; struct altr_edac_device_dev { + struct list_head next; void __iomem *base; int sb_irq; int db_irq; const struct edac_device_prv_data *data; struct dentry *debugfs_dir; char *edac_dev_name; + struct altr_arria10_edac *edac; + struct edac_device_ctl_info *edac_dev; + struct device ddev; + int edac_idx; +}; + +struct altr_arria10_edac { + struct device *dev; + struct regmap *ecc_mgr_map; + int sb_irq; + int db_irq; + struct list_head a10_ecc_devices; }; #endif /* #ifndef _ALTERA_EDAC_H */ From ff6fd1478c531a40279cf013227279f31ff90b41 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 21 Mar 2016 11:01:45 -0500 Subject: [PATCH 08/25] ARM: socfpga: Enable Arria10 L2 cache ECC on startup Enable ECC for Arria10 L2 cache on machine startup. The ECC has to be enabled before data is stored in memory otherwise the ECC will fail on reads. Use DT_MACHINE to select Arria10 L2 cache function. Signed-off-by: Thor Thayer Acked-by: Dinh Nguyen Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1458576106-24505-9-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- arch/arm/mach-socfpga/core.h | 1 + arch/arm/mach-socfpga/l2_cache.c | 49 ++++++++++++++++++++++++++++++++ arch/arm/mach-socfpga/socfpga.c | 10 ++++++- 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index 575195be6687..bfbc78df15dd 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -38,6 +38,7 @@ extern void socfpga_init_clocks(void); extern void socfpga_sysmgr_init(void); void socfpga_init_l2_ecc(void); void socfpga_init_ocram_ecc(void); +void socfpga_init_arria10_l2_ecc(void); extern void __iomem *sys_manager_base_addr; extern void __iomem *rst_manager_base_addr; diff --git a/arch/arm/mach-socfpga/l2_cache.c b/arch/arm/mach-socfpga/l2_cache.c index e3907ab58d05..4267c95f2158 100644 --- a/arch/arm/mach-socfpga/l2_cache.c +++ b/arch/arm/mach-socfpga/l2_cache.c @@ -17,6 +17,20 @@ #include #include +#include "core.h" + +/* A10 System Manager L2 ECC Control register */ +#define A10_MPU_CTRL_L2_ECC_OFST 0x0 +#define A10_MPU_CTRL_L2_ECC_EN BIT(0) + +/* A10 System Manager Global IRQ Mask register */ +#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define A10_SYSMGR_ECC_INTMASK_CLR_L2 BIT(0) + +/* A10 System Manager L2 ECC IRQ Clear register */ +#define A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST 0xA8 +#define A10_SYSMGR_MPU_CLEAR_L2_ECC (BIT(31) | BIT(15)) + void socfpga_init_l2_ecc(void) { struct device_node *np; @@ -39,3 +53,38 @@ void socfpga_init_l2_ecc(void) writel(0x01, mapped_l2_edac_addr); iounmap(mapped_l2_edac_addr); } + +void socfpga_init_arria10_l2_ecc(void) +{ + struct device_node *np; + void __iomem *mapped_l2_edac_addr; + + /* Find the L2 EDAC device tree node */ + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-l2-ecc"); + if (!np) { + pr_err("Unable to find socfpga-a10-l2-ecc in dtb\n"); + return; + } + + mapped_l2_edac_addr = of_iomap(np, 0); + of_node_put(np); + if (!mapped_l2_edac_addr) { + pr_err("Unable to find L2 ECC mapping in dtb\n"); + return; + } + + if (!sys_manager_base_addr) { + pr_err("System Mananger not mapped for L2 ECC\n"); + goto exit; + } + /* Clear any pending IRQs */ + writel(A10_SYSMGR_MPU_CLEAR_L2_ECC, (sys_manager_base_addr + + A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST)); + /* Enable ECC */ + writel(A10_SYSMGR_ECC_INTMASK_CLR_L2, sys_manager_base_addr + + A10_SYSMGR_ECC_INTMASK_CLR_OFST); + writel(A10_MPU_CTRL_L2_ECC_EN, mapped_l2_edac_addr + + A10_MPU_CTRL_L2_ECC_OFST); +exit: + iounmap(mapped_l2_edac_addr); +} diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c index 7e0aad2ec3d1..e9b5b603df4b 100644 --- a/arch/arm/mach-socfpga/socfpga.c +++ b/arch/arm/mach-socfpga/socfpga.c @@ -66,6 +66,14 @@ static void __init socfpga_init_irq(void) socfpga_init_ocram_ecc(); } +static void __init socfpga_arria10_init_irq(void) +{ + irqchip_init(); + socfpga_sysmgr_init(); + if (IS_ENABLED(CONFIG_EDAC_ALTERA_L2C)) + socfpga_init_arria10_l2_ecc(); +} + static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd) { u32 temp; @@ -113,7 +121,7 @@ static const char *altera_a10_dt_match[] = { DT_MACHINE_START(SOCFPGA_A10, "Altera SOCFPGA Arria10") .l2c_aux_val = 0, .l2c_aux_mask = ~0, - .init_irq = socfpga_init_irq, + .init_irq = socfpga_arria10_init_irq, .restart = socfpga_arria10_restart, .dt_compat = altera_a10_dt_match, MACHINE_END From e17ced2cb61ac63efb5ab54fee8e45aaa437c874 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 31 Mar 2016 13:48:01 -0500 Subject: [PATCH 09/25] EDAC, altera: Extract error inject operations to a struct fops In preparation for the Arria10 peripheral ECCs, extract the inject file operations because the Arria10 IRQ trigger mechanism is different than Cyclone5/Arria5 and Arria10 L2 cache. Signed-off-by: Thor Thayer Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1459450087-24792-2-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 5 ++++- drivers/edac/altera_edac.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 0afdc582766e..fb6fe568d0dc 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -668,7 +668,7 @@ static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, if (!edac_debugfs_create_file(priv->dbgfs_name, S_IWUSR, drvdata->debugfs_dir, edac_dci, - &altr_edac_device_inject_fops)) + priv->inject_fops)) debugfs_remove_recursive(drvdata->debugfs_dir); } @@ -886,6 +886,7 @@ const struct edac_device_prv_data ocramecc_data = { .ue_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJD), .set_err_ofst = ALTR_OCR_ECC_REG_OFFSET, .trig_alloc_sz = ALTR_TRIG_OCRAM_BYTE_SIZE, + .inject_fops = &altr_edac_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_OCRAM */ @@ -975,6 +976,7 @@ const struct edac_device_prv_data l2ecc_data = { .ue_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJD), .set_err_ofst = ALTR_L2_ECC_REG_OFFSET, .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE, + .inject_fops = &altr_edac_device_inject_fops, }; const struct edac_device_prv_data a10_l2ecc_data = { @@ -991,6 +993,7 @@ const struct edac_device_prv_data a10_l2ecc_data = { .set_err_ofst = ALTR_A10_L2_ECC_INJ_OFST, .ecc_irq_handler = altr_edac_a10_l2_irq, .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE, + .inject_fops = &altr_edac_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_L2C */ diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index b0a17d03c715..c995388b415b 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -262,6 +262,7 @@ struct edac_device_prv_data { irqreturn_t (*ecc_irq_handler)(struct altr_edac_device_dev *dci, bool sb); int trig_alloc_sz; + const struct file_operations *inject_fops; }; struct altr_edac_device_dev { From 943ad9179836b12433beadfde2d03b215c4367d6 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 31 Mar 2016 13:48:02 -0500 Subject: [PATCH 10/25] EDAC, altera: Add register offset for ECC Enable In preparation for the Arria10 peripheral ECCs, add a register offset from the ECC base to index to the ECC enable register. Signed-off-by: Thor Thayer Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1459450087-24792-3-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 3 ++- drivers/edac/altera_edac.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index fb6fe568d0dc..f0a6de748fae 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -866,7 +866,7 @@ static int altr_ocram_check_deps(struct altr_edac_device_dev *device) void __iomem *base = device->base; const struct edac_device_prv_data *prv = device->data; - if (readl(base) & prv->ecc_enable_mask) + if (readl(base + prv->ecc_en_ofst) & prv->ecc_enable_mask) return 0; edac_printk(KERN_ERR, EDAC_DEVICE, @@ -882,6 +882,7 @@ const struct edac_device_prv_data ocramecc_data = { .alloc_mem = ocram_alloc_mem, .free_mem = ocram_free_mem, .ecc_enable_mask = ALTR_OCR_ECC_EN, + .ecc_en_ofst = ALTR_OCR_ECC_REG_OFFSET, .ce_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJS), .ue_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJD), .set_err_ofst = ALTR_OCR_ECC_REG_OFFSET, diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index c995388b415b..cb6b2b9079e8 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -256,6 +256,7 @@ struct edac_device_prv_data { void * (*alloc_mem)(size_t size, void **other); void (*free_mem)(void *p, size_t size, void *other); int ecc_enable_mask; + int ecc_en_ofst; int ce_set_mask; int ue_set_mask; int set_err_ofst; From aa1f06dcc0e58e1a1b5118fc265418a2970e11aa Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 31 Mar 2016 13:48:03 -0500 Subject: [PATCH 11/25] EDAC, altera: Make OCRAM ECC dependency check generic In preparation for the Arria10 peripheral ECCs, move the OCRAM ECC dependency check into the general ECC area since this same function can be used by other memories. Signed-off-by: Thor Thayer Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1459450087-24792-4-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 43 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index f0a6de748fae..f7ffc77998a4 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -648,6 +648,26 @@ static ssize_t altr_edac_device_trig(struct file *file, return count; } +/* + * Test for memory's ECC dependencies upon entry because platform specific + * startup should have initialized the memory and enabled the ECC. + * Can't turn on ECC here because accessing un-initialized memory will + * cause CE/UE errors possibly causing an ABORT. + */ +static int altr_check_ecc_deps(struct altr_edac_device_dev *device) +{ + void __iomem *base = device->base; + const struct edac_device_prv_data *prv = device->data; + + if (readl(base + prv->ecc_en_ofst) & prv->ecc_enable_mask) + return 0; + + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: No ECC present or ECC disabled.\n", + device->edac_dev_name); + return -ENODEV; +} + static const struct file_operations altr_edac_device_inject_fops = { .open = simple_open, .write = altr_edac_device_trig, @@ -853,29 +873,8 @@ static void ocram_free_mem(void *p, size_t size, void *other) gen_pool_free((struct gen_pool *)other, (u32)p, size); } -/* - * altr_ocram_check_deps() - * Test for OCRAM cache ECC dependencies upon entry because - * platform specific startup should have initialized the - * On-Chip RAM memory and enabled the ECC. - * Can't turn on ECC here because accessing un-initialized - * memory will cause CE/UE errors possibly causing an ABORT. - */ -static int altr_ocram_check_deps(struct altr_edac_device_dev *device) -{ - void __iomem *base = device->base; - const struct edac_device_prv_data *prv = device->data; - - if (readl(base + prv->ecc_en_ofst) & prv->ecc_enable_mask) - return 0; - - edac_printk(KERN_ERR, EDAC_DEVICE, - "OCRAM: No ECC present or ECC disabled.\n"); - return -ENODEV; -} - const struct edac_device_prv_data ocramecc_data = { - .setup = altr_ocram_check_deps, + .setup = altr_check_ecc_deps, .ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR), .ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR), .dbgfs_name = "altr_ocram_trigger", From abd56b3c8483ca73413ade0e7c7a0d13b9870016 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 31 Mar 2016 13:48:04 -0500 Subject: [PATCH 12/25] Documentation: dt: socfpga: Add Altera Arria10 OCRAM binding Add the device tree bindings needed to support the Altera On-Chip RAM ECC on the Arria10 chip. Signed-off-by: Thor Thayer Acked-by: Rob Herring Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1459450087-24792-5-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- .../devicetree/bindings/arm/altera/socfpga-eccmgr.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt index 37ff9bfea5f4..5a6b16070a33 100644 --- a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt +++ b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt @@ -71,6 +71,11 @@ Required Properties: - compatible : Should be "altr,socfpga-a10-l2-ecc" - reg : Address and size for ECC error interrupt clear registers. +On-Chip RAM ECC +Required Properties: +- compatible : Should be "altr,socfpga-a10-ocram-ecc" +- reg : Address and size for ECC block registers. + Example: eccmgr: eccmgr@ffd06000 { @@ -86,4 +91,9 @@ Example: compatible = "altr,socfpga-a10-l2-ecc"; reg = <0xffd06010 0x4>; }; + + ocram-ecc@ff8c3000 { + compatible = "altr,socfpga-a10-ocram-ecc"; + reg = <0xff8c3000 0x90>; + }; }; From c7b4be8db8bc33ec60d21940b3d78b203cdffaac Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Wed, 6 Apr 2016 20:22:54 -0500 Subject: [PATCH 13/25] EDAC, altera: Add Arria10 OCRAM ECC support Add Arria10 On-Chip RAM ECC handling. Signed-off-by: Thor Thayer Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1459992174-8015-1-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 78 ++++++++++++++++++++++++++++++++++++++ drivers/edac/altera_edac.h | 35 +++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index f7ffc77998a4..11775dc0b139 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -550,6 +550,7 @@ module_platform_driver(altr_edac_driver); const struct edac_device_prv_data ocramecc_data; const struct edac_device_prv_data l2ecc_data; +const struct edac_device_prv_data a10_ocramecc_data; const struct edac_device_prv_data a10_l2ecc_data; static irqreturn_t altr_edac_device_handler(int irq, void *dev_id) @@ -674,6 +675,16 @@ static const struct file_operations altr_edac_device_inject_fops = { .llseek = generic_file_llseek, }; +static ssize_t altr_edac_a10_device_trig(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos); + +static const struct file_operations altr_edac_a10_device_inject_fops = { + .open = simple_open, + .write = altr_edac_a10_device_trig, + .llseek = generic_file_llseek, +}; + static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, const struct edac_device_prv_data *priv) { @@ -701,6 +712,8 @@ static const struct of_device_id altr_edac_device_of_match[] = { #ifdef CONFIG_EDAC_ALTERA_OCRAM { .compatible = "altr,socfpga-ocram-ecc", .data = (void *)&ocramecc_data }, + { .compatible = "altr,socfpga-a10-ocram-ecc", + .data = (void *)&a10_ocramecc_data }, #endif {}, }; @@ -889,6 +902,24 @@ const struct edac_device_prv_data ocramecc_data = { .inject_fops = &altr_edac_device_inject_fops, }; +static irqreturn_t altr_edac_a10_ecc_irq(struct altr_edac_device_dev *dci, + bool sberr); + +const struct edac_device_prv_data a10_ocramecc_data = { + .setup = altr_check_ecc_deps, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_OCRAM, + .dbgfs_name = "altr_ocram_trigger", + .ecc_enable_mask = ALTR_A10_OCRAM_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + #endif /* CONFIG_EDAC_ALTERA_OCRAM */ /********************* L2 Cache EDAC Device Functions ********************/ @@ -1007,6 +1038,50 @@ const struct edac_device_prv_data a10_l2ecc_data = { * Based on xgene_edac.c peripheral code. */ +static ssize_t altr_edac_a10_device_trig(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dci = file->private_data; + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + void __iomem *set_addr = (drvdata->base + priv->set_err_ofst); + unsigned long flags; + u8 trig_type; + + if (!user_buf || get_user(trig_type, user_buf)) + return -EFAULT; + + local_irq_save(flags); + if (trig_type == ALTR_UE_TRIGGER_CHAR) + writel(priv->ue_set_mask, set_addr); + else + writel(priv->ce_set_mask, set_addr); + /* Ensure the interrupt test bits are set */ + wmb(); + local_irq_restore(flags); + + return count; +} + +static irqreturn_t altr_edac_a10_ecc_irq(struct altr_edac_device_dev *dci, + bool sberr) +{ + void __iomem *base = dci->base; + + if (sberr) { + writel(ALTR_A10_ECC_SERRPENA, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name); + } else { + writel(ALTR_A10_ECC_DERRPENA, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name); + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n"); + } + return IRQ_HANDLED; +} + static irqreturn_t altr_edac_a10_irq_handler(int irq, void *dev_id) { irqreturn_t rc = IRQ_NONE; @@ -1171,6 +1246,9 @@ static int altr_edac_a10_probe(struct platform_device *pdev) continue; if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc")) altr_edac_a10_device_add(edac, child); + else if (of_device_is_compatible(child, + "altr,socfpga-a10-ocram-ecc")) + altr_edac_a10_device_add(edac, child); } return 0; diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index cb6b2b9079e8..42090f36ba6e 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -220,9 +220,41 @@ struct altr_sdram_mc_data { #define ALTR_L2_ECC_INJD BIT(2) /* Arria10 General ECC Block Module Defines */ +#define ALTR_A10_ECC_CTRL_OFST 0x08 +#define ALTR_A10_ECC_EN BIT(0) +#define ALTR_A10_ECC_INITA BIT(16) +#define ALTR_A10_ECC_INITB BIT(24) + +#define ALTR_A10_ECC_INITSTAT_OFST 0x0C +#define ALTR_A10_ECC_INITCOMPLETEA BIT(0) +#define ALTR_A10_ECC_INITCOMPLETEB BIT(8) + +#define ALTR_A10_ECC_ERRINTEN_OFST 0x10 +#define ALTR_A10_ECC_SERRINTEN BIT(0) + +#define ALTR_A10_ECC_INTSTAT_OFST 0x20 +#define ALTR_A10_ECC_SERRPENA BIT(0) +#define ALTR_A10_ECC_DERRPENA BIT(8) +#define ALTR_A10_ECC_ERRPENA_MASK (ALTR_A10_ECC_SERRPENA | \ + ALTR_A10_ECC_DERRPENA) +#define ALTR_A10_ECC_SERRPENB BIT(16) +#define ALTR_A10_ECC_DERRPENB BIT(24) +#define ALTR_A10_ECC_ERRPENB_MASK (ALTR_A10_ECC_SERRPENB | \ + ALTR_A10_ECC_DERRPENB) + +#define ALTR_A10_ECC_INTTEST_OFST 0x24 +#define ALTR_A10_ECC_TSERRA BIT(0) +#define ALTR_A10_ECC_TDERRA BIT(8) + +/* ECC Manager Defines */ +#define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 +#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) + #define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C #define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 #define A10_SYSMGR_ECC_INTSTAT_L2 BIT(0) +#define A10_SYSMGR_ECC_INTSTAT_OCRAM BIT(1) #define A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST 0xA8 #define A10_SYSGMR_MPU_CLEAR_L2_ECC_SB BIT(15) @@ -245,6 +277,9 @@ struct altr_sdram_mc_data { #define ALTR_A10_L2_ECC_CE_INJ_MASK 0x00000101 #define ALTR_A10_L2_ECC_UE_INJ_MASK 0x00010101 +/* Arria 10 OCRAM ECC Management Group Defines */ +#define ALTR_A10_OCRAM_ECC_EN_CTL (BIT(1) | BIT(0)) + struct altr_edac_device_dev; struct edac_device_prv_data { From 2364d423a7b34bd972761eeed1f764c962980c23 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Wed, 6 Apr 2016 20:09:42 -0500 Subject: [PATCH 14/25] ARM: socfpga: Enable Arria10 OCRAM ECC on startup Enable ECC for Arria10 On-Chip RAM on machine startup. The ECC has to be enabled and memory initialized before data is stored in memory otherwise the ECC will fail on reads. Signed-off-by: Thor Thayer Acked-by: Dinh Nguyen Cc: devicetree@vger.kernel.org Cc: dinguyen@opensource.altera.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Cc: linux-edac Link: http://lkml.kernel.org/r/1459991382-7859-1-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- arch/arm/mach-socfpga/ocram.c | 133 ++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/arch/arm/mach-socfpga/ocram.c b/arch/arm/mach-socfpga/ocram.c index 60ec643ac2be..10d673252395 100644 --- a/arch/arm/mach-socfpga/ocram.c +++ b/arch/arm/mach-socfpga/ocram.c @@ -13,12 +13,15 @@ * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ +#include #include #include #include #include #include +#include "core.h" + #define ALTR_OCRAM_CLEAR_ECC 0x00000018 #define ALTR_OCRAM_ECC_EN 0x00000019 @@ -47,3 +50,133 @@ void socfpga_init_ocram_ecc(void) iounmap(mapped_ocr_edac_addr); } + +/* Arria10 OCRAM Section */ +#define ALTR_A10_ECC_CTRL_OFST 0x08 +#define ALTR_A10_OCRAM_ECC_EN_CTL (BIT(1) | BIT(0)) +#define ALTR_A10_ECC_INITA BIT(16) + +#define ALTR_A10_ECC_INITSTAT_OFST 0x0C +#define ALTR_A10_ECC_INITCOMPLETEA BIT(0) +#define ALTR_A10_ECC_INITCOMPLETEB BIT(8) + +#define ALTR_A10_ECC_ERRINTEN_OFST 0x10 +#define ALTR_A10_ECC_SERRINTEN BIT(0) + +#define ALTR_A10_ECC_INTSTAT_OFST 0x20 +#define ALTR_A10_ECC_SERRPENA BIT(0) +#define ALTR_A10_ECC_DERRPENA BIT(8) +#define ALTR_A10_ECC_ERRPENA_MASK (ALTR_A10_ECC_SERRPENA | \ + ALTR_A10_ECC_DERRPENA) +/* ECC Manager Defines */ +#define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 +#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) + +#define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000 + +static inline void ecc_set_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + value |= bit_mask; + writel(value, ioaddr); +} + +static inline void ecc_clear_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + value &= ~bit_mask; + writel(value, ioaddr); +} + +static inline int ecc_test_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + return (value & bit_mask) ? 1 : 0; +} + +/* + * This function uses the memory initialization block in the Arria10 ECC + * controller to initialize/clear the entire memory data and ECC data. + */ +static int altr_init_memory_port(void __iomem *ioaddr) +{ + int limit = ALTR_A10_ECC_INIT_WATCHDOG_10US; + + ecc_set_bits(ALTR_A10_ECC_INITA, (ioaddr + ALTR_A10_ECC_CTRL_OFST)); + while (limit--) { + if (ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA, + (ioaddr + ALTR_A10_ECC_INITSTAT_OFST))) + break; + udelay(1); + } + if (limit < 0) + return -EBUSY; + + /* Clear any pending ECC interrupts */ + writel(ALTR_A10_ECC_ERRPENA_MASK, + (ioaddr + ALTR_A10_ECC_INTSTAT_OFST)); + + return 0; +} + +void socfpga_init_arria10_ocram_ecc(void) +{ + struct device_node *np; + int ret = 0; + void __iomem *ecc_block_base; + + if (!sys_manager_base_addr) { + pr_err("SOCFPGA: sys-mgr is not initialized\n"); + return; + } + + /* Find the OCRAM EDAC device tree node */ + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-ocram-ecc"); + if (!np) { + pr_err("Unable to find socfpga-a10-ocram-ecc\n"); + return; + } + + /* Map the ECC Block */ + ecc_block_base = of_iomap(np, 0); + of_node_put(np); + if (!ecc_block_base) { + pr_err("Unable to map OCRAM ECC block\n"); + return; + } + + /* Disable ECC */ + writel(ALTR_A10_OCRAM_ECC_EN_CTL, + sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_SET_OFST); + ecc_clear_bits(ALTR_A10_ECC_SERRINTEN, + (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST)); + ecc_clear_bits(ALTR_A10_OCRAM_ECC_EN_CTL, + (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); + + /* Ensure all writes complete */ + wmb(); + + /* Use HW initialization block to initialize memory for ECC */ + ret = altr_init_memory_port(ecc_block_base); + if (ret) { + pr_err("ECC: cannot init OCRAM PORTA memory\n"); + goto exit; + } + + /* Enable ECC */ + ecc_set_bits(ALTR_A10_OCRAM_ECC_EN_CTL, + (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); + ecc_set_bits(ALTR_A10_ECC_SERRINTEN, + (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST)); + writel(ALTR_A10_OCRAM_ECC_EN_CTL, + sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_CLR_OFST); + + /* Ensure all writes complete */ + wmb(); +exit: + iounmap(ecc_block_base); +} From 2c911f6cac9388830d2afb350d29f43f115b1a28 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 16 Apr 2016 22:13:55 +0200 Subject: [PATCH 15/25] EDAC, altera: Remove useless casts The altera EDAC driver refers to its per-device data using a cast to '(void *)', which makes the pointer non-const, though both the source and destination are actually const. Removing the annotation makes the reference (almost) fit into a single line for improved readability, and ensures that it is actually defined as const. Signed-off-by: Arnd Bergmann Acked-by: Thor Thayer Cc: Alan Tull Cc: Dinh Nguyen Cc: linux-edac Link: http://lkml.kernel.org/r/1460837650-1237650-1-git-send-email-arnd@arndb.de Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 11775dc0b139..cc987b4ce908 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -232,8 +232,8 @@ static unsigned long get_total_mem(void) } static const struct of_device_id altr_sdram_ctrl_of_match[] = { - { .compatible = "altr,sdram-edac", .data = (void *)&c5_data}, - { .compatible = "altr,sdram-edac-a10", .data = (void *)&a10_data}, + { .compatible = "altr,sdram-edac", .data = &c5_data}, + { .compatible = "altr,sdram-edac-a10", .data = &a10_data}, {}, }; MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); @@ -705,15 +705,12 @@ static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, static const struct of_device_id altr_edac_device_of_match[] = { #ifdef CONFIG_EDAC_ALTERA_L2C - { .compatible = "altr,socfpga-l2-ecc", .data = (void *)&l2ecc_data }, - { .compatible = "altr,socfpga-a10-l2-ecc", - .data = (void *)&a10_l2ecc_data }, + { .compatible = "altr,socfpga-l2-ecc", .data = &l2ecc_data }, + { .compatible = "altr,socfpga-a10-l2-ecc", .data = &a10_l2ecc_data }, #endif #ifdef CONFIG_EDAC_ALTERA_OCRAM - { .compatible = "altr,socfpga-ocram-ecc", - .data = (void *)&ocramecc_data }, - { .compatible = "altr,socfpga-a10-ocram-ecc", - .data = (void *)&a10_ocramecc_data }, + { .compatible = "altr,socfpga-ocram-ecc", .data = &ocramecc_data }, + { .compatible = "altr,socfpga-a10-ocram-ecc", .data = &a10_ocramecc_data }, #endif {}, }; From 1aa6eb5c5b35771cfb28265eccd99b4b203b4154 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 16 Apr 2016 22:13:56 +0200 Subject: [PATCH 16/25] EDAC, altera: Avoid unused function warnings The recently added Arria10 OCRAM ECC support caused some new harmless warnings about unused functions when it is disabled: drivers/edac/altera_edac.c:1067:20: error: 'altr_edac_a10_ecc_irq' defined but not used [-Werror=unused-function] drivers/edac/altera_edac.c:658:12: error: 'altr_check_ecc_deps' defined but not used [-Werror=unused-function] This rearranges the code slightly to have those two functions inside of the same #ifdef that hides their callers. It also manages to avoid a forward declaration of the IRQ handler in the process. Signed-off-by: Arnd Bergmann Acked-by: Thor Thayer Cc: Alan Tull Cc: Dinh Nguyen Cc: linux-edac Fixes: c7b4be8db8bc ("EDAC, altera: Add Arria10 OCRAM ECC support") Link: http://lkml.kernel.org/r/1460837650-1237650-2-git-send-email-arnd@arndb.de Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 78 ++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index cc987b4ce908..5b4d223d6d68 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -649,26 +649,6 @@ static ssize_t altr_edac_device_trig(struct file *file, return count; } -/* - * Test for memory's ECC dependencies upon entry because platform specific - * startup should have initialized the memory and enabled the ECC. - * Can't turn on ECC here because accessing un-initialized memory will - * cause CE/UE errors possibly causing an ABORT. - */ -static int altr_check_ecc_deps(struct altr_edac_device_dev *device) -{ - void __iomem *base = device->base; - const struct edac_device_prv_data *prv = device->data; - - if (readl(base + prv->ecc_en_ofst) & prv->ecc_enable_mask) - return 0; - - edac_printk(KERN_ERR, EDAC_DEVICE, - "%s: No ECC present or ECC disabled.\n", - device->edac_dev_name); - return -ENODEV; -} - static const struct file_operations altr_edac_device_inject_fops = { .open = simple_open, .write = altr_edac_device_trig, @@ -848,6 +828,25 @@ module_platform_driver(altr_edac_device_driver); /*********************** OCRAM EDAC Device Functions *********************/ #ifdef CONFIG_EDAC_ALTERA_OCRAM +/* + * Test for memory's ECC dependencies upon entry because platform specific + * startup should have initialized the memory and enabled the ECC. + * Can't turn on ECC here because accessing un-initialized memory will + * cause CE/UE errors possibly causing an ABORT. + */ +static int altr_check_ecc_deps(struct altr_edac_device_dev *device) +{ + void __iomem *base = device->base; + const struct edac_device_prv_data *prv = device->data; + + if (readl(base + prv->ecc_en_ofst) & prv->ecc_enable_mask) + return 0; + + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: No ECC present or ECC disabled.\n", + device->edac_dev_name); + return -ENODEV; +} static void *ocram_alloc_mem(size_t size, void **other) { @@ -883,6 +882,24 @@ static void ocram_free_mem(void *p, size_t size, void *other) gen_pool_free((struct gen_pool *)other, (u32)p, size); } +static irqreturn_t altr_edac_a10_ecc_irq(struct altr_edac_device_dev *dci, + bool sberr) +{ + void __iomem *base = dci->base; + + if (sberr) { + writel(ALTR_A10_ECC_SERRPENA, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name); + } else { + writel(ALTR_A10_ECC_DERRPENA, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name); + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n"); + } + return IRQ_HANDLED; +} + const struct edac_device_prv_data ocramecc_data = { .setup = altr_check_ecc_deps, .ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR), @@ -899,9 +916,6 @@ const struct edac_device_prv_data ocramecc_data = { .inject_fops = &altr_edac_device_inject_fops, }; -static irqreturn_t altr_edac_a10_ecc_irq(struct altr_edac_device_dev *dci, - bool sberr); - const struct edac_device_prv_data a10_ocramecc_data = { .setup = altr_check_ecc_deps, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, @@ -1061,24 +1075,6 @@ static ssize_t altr_edac_a10_device_trig(struct file *file, return count; } -static irqreturn_t altr_edac_a10_ecc_irq(struct altr_edac_device_dev *dci, - bool sberr) -{ - void __iomem *base = dci->base; - - if (sberr) { - writel(ALTR_A10_ECC_SERRPENA, - base + ALTR_A10_ECC_INTSTAT_OFST); - edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name); - } else { - writel(ALTR_A10_ECC_DERRPENA, - base + ALTR_A10_ECC_INTSTAT_OFST); - edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name); - panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n"); - } - return IRQ_HANDLED; -} - static irqreturn_t altr_edac_a10_irq_handler(int irq, void *dev_id) { irqreturn_t rc = IRQ_NONE; From ab67b6c22d8506b060a66ed0ce1a3e14e3b075e4 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 21 Apr 2016 10:34:14 -0700 Subject: [PATCH 17/25] EDAC: Fix used after kfree() error in edac_unregister_sysfs() Code flow looks like this: device_unregister(&mci->dev); -> kobject_put+0x25/0x50 -> kobject_cleanup+0x77/0x190 -> device_release+0x32/0xa0 -> mci_attr_release+0x36/0x70 -> kfree(mci); bus_unregister(mci->bus); Fix is to grab a local copy of "mci->bus" and use that when we call bus_unregister(). Signed-off-by: Tony Luck Acked-by: Aristeu Rozanski Cc: Mauro Carvalho Chehab Cc: linux-edac Link: http://lkml.kernel.org/r/21d595b0ab3d718d9cb206647f4ec91c05e62ec4.1461261078.git.tony.luck@intel.com Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc_sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 26e65ab5932a..10c305b4a2e1 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -998,11 +998,12 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) void edac_unregister_sysfs(struct mem_ctl_info *mci) { + struct bus_type *bus = mci->bus; const char *name = mci->bus->name; edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); - bus_unregister(mci->bus); + bus_unregister(bus); kfree(name); } From ad08c4e97485694fee5ebb181983514facedbb19 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 15 Apr 2016 14:50:32 -0700 Subject: [PATCH 18/25] EDAC, sb_edac: Remove double buffering of error records In the bad old days the functions from x86_mce_decoder_chain could be called in machine check context. So we used to carefully copy them and defer processing until later. But in f29a7aff4bd60 ("x86/mce: Avoid potential deadlock due to printk() in MCE context") we switched the logging code to save the record in a genpool, and call the functions that registered to be notified later from a work queue. So drop all the double buffering and do all the work we want to do as soon as sbridge_mce_check_error() is called. Signed-off-by: Tony Luck Cc: Aristeu Rozanski Cc: Mauro Carvalho Chehab Cc: linux-edac Cc: patrickg@supermicro.com Link: http://lkml.kernel.org/r/100025611cd780d9bca72792b2b2146760da53e0.1460756761.git.tony.luck@intel.com Signed-off-by: Borislav Petkov --- drivers/edac/sb_edac.c | 88 ++---------------------------------------- 1 file changed, 3 insertions(+), 85 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 93f0d4120289..342167496626 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -363,16 +363,6 @@ struct sbridge_pvt { /* Memory type detection */ bool is_mirrored, is_lockstep, is_close_pg; - /* Fifo double buffers */ - struct mce mce_entry[MCE_LOG_LEN]; - struct mce mce_outentry[MCE_LOG_LEN]; - - /* Fifo in/out counters */ - unsigned mce_in, mce_out; - - /* Count indicator to show errors not got */ - unsigned mce_overrun; - /* Memory description */ u64 tolm, tohm; struct knl_pvt knl; @@ -3075,63 +3065,8 @@ err_parsing: } /* - * sbridge_check_error Retrieve and process errors reported by the - * hardware. Called by the Core module. - */ -static void sbridge_check_error(struct mem_ctl_info *mci) -{ - struct sbridge_pvt *pvt = mci->pvt_info; - int i; - unsigned count = 0; - struct mce *m; - - /* - * MCE first step: Copy all mce errors into a temporary buffer - * We use a double buffering here, to reduce the risk of - * loosing an error. - */ - smp_rmb(); - count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) - % MCE_LOG_LEN; - if (!count) - return; - - m = pvt->mce_outentry; - if (pvt->mce_in + count > MCE_LOG_LEN) { - unsigned l = MCE_LOG_LEN - pvt->mce_in; - - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); - smp_wmb(); - pvt->mce_in = 0; - count -= l; - m += l; - } - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); - smp_wmb(); - pvt->mce_in += count; - - smp_rmb(); - if (pvt->mce_overrun) { - sbridge_printk(KERN_ERR, "Lost %d memory errors\n", - pvt->mce_overrun); - smp_wmb(); - pvt->mce_overrun = 0; - } - - /* - * MCE second step: parse errors and display - */ - for (i = 0; i < count; i++) - sbridge_mce_output_error(mci, &pvt->mce_outentry[i]); -} - -/* - * sbridge_mce_check_error Replicates mcelog routine to get errors - * This routine simply queues mcelog errors, and - * return. The error itself should be handled later - * by sbridge_check_error. - * WARNING: As this routine should be called at NMI time, extra care should - * be taken to avoid deadlocks, and to be as fast as possible. + * Check that logging is enabled and that this is the right type + * of error for us to handle. */ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) @@ -3176,21 +3111,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, "%u APIC %x\n", mce->cpuvendor, mce->cpuid, mce->time, mce->socketid, mce->apicid); - smp_rmb(); - if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { - smp_wmb(); - pvt->mce_overrun++; - return NOTIFY_DONE; - } - - /* Copy memory error at the ringbuffer */ - memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); - smp_wmb(); - pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; - - /* Handle fatal errors immediately */ - if (mce->mcgstatus & 1) - sbridge_check_error(mci); + sbridge_mce_output_error(mci, mce); /* Advice mcelog that the error were handled */ return NOTIFY_STOP; @@ -3276,9 +3197,6 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; - /* Set the function pointer to an actual operation function */ - mci->edac_check = sbridge_check_error; - pvt->info.type = type; switch (type) { case IVY_BRIDGE: From 993f88f1cc7f0879047ff353e824e5cc8f10adfc Mon Sep 17 00:00:00 2001 From: Emmanouil Maroudas Date: Sat, 23 Apr 2016 18:33:00 +0300 Subject: [PATCH 19/25] EDAC: Increment correct counter in edac_inc_ue_error() Fix typo in edac_inc_ue_error() to increment ue_noinfo_count instead of ce_noinfo_count. Signed-off-by: Emmanouil Maroudas Cc: Mauro Carvalho Chehab Cc: linux-edac Fixes: 4275be635597 ("edac: Change internal representation to work with layers") Link: http://lkml.kernel.org/r/1461425580-5898-1-git-send-email-emmanouil.maroudas@gmail.com Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 1472f48c8ac6..6aa256b0a1ed 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -923,7 +923,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci, mci->ue_mc += count; if (!enable_per_layer_report) { - mci->ce_noinfo_count += count; + mci->ue_noinfo_count += count; return; } From c5fb04cc96c1812eb09a3b0f3672f4a00d76730c Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 11 Apr 2016 12:01:34 -0500 Subject: [PATCH 20/25] ARM: socfpga: Initialize Arria10 OCRAM ECC on startup Initialize ECC for Arria10 On-Chip RAM on machine startup. The OCRAM memory must be initialized before data is stored in memory otherwise the ECC will fail on reads. The previous check-in 2364d423a7b3 ("ARM: socfpga: Enable Arria10 OCRAM ECC on startup") added the OCRAM enable and initialization code but was not called on startup. Signed-off-by: Thor Thayer Acked-by: Dinh Nguyen Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Cc: linux-edac Link: http://lkml.kernel.org/r/1460394094-23326-1-git-send-email-tthayer@opensource.altera.com Signed-off-by: Borislav Petkov --- arch/arm/mach-socfpga/core.h | 1 + arch/arm/mach-socfpga/socfpga.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index bfbc78df15dd..65e1817d8afe 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -39,6 +39,7 @@ extern void socfpga_sysmgr_init(void); void socfpga_init_l2_ecc(void); void socfpga_init_ocram_ecc(void); void socfpga_init_arria10_l2_ecc(void); +void socfpga_init_arria10_ocram_ecc(void); extern void __iomem *sys_manager_base_addr; extern void __iomem *rst_manager_base_addr; diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c index e9b5b603df4b..dde14f7bf2c3 100644 --- a/arch/arm/mach-socfpga/socfpga.c +++ b/arch/arm/mach-socfpga/socfpga.c @@ -72,6 +72,8 @@ static void __init socfpga_arria10_init_irq(void) socfpga_sysmgr_init(); if (IS_ENABLED(CONFIG_EDAC_ALTERA_L2C)) socfpga_init_arria10_l2_ecc(); + if (IS_ENABLED(CONFIG_EDAC_ALTERA_OCRAM)) + socfpga_init_arria10_ocram_ecc(); } static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd) From de0336b30de1b3e3ad6b76b5ccf0b4920d328313 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 27 Apr 2016 12:21:21 +0200 Subject: [PATCH 21/25] EDAC, amd64_edac: Issue driver banner only on success ... and don't mislead users into thinking that the driver has loaded successfully. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d87a47547ba5..651d96c93f22 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3006,8 +3006,6 @@ static int __init amd64_edac_init(void) { int err = -ENODEV; - printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION); - opstate_init(); if (amd_cache_northbridges() < 0) @@ -3036,6 +3034,8 @@ static int __init amd64_edac_init(void) amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR); #endif + printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION); + return 0; err_no_instances: From 5359534505c74841dbb2c6baf41db1a395acd34d Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 28 Apr 2016 07:52:11 -0700 Subject: [PATCH 22/25] EDAC, i7core: Remove double buffering of error records In the bad old days the functions from x86_mce_decoder_chain could be called in machine check context. So we used to carefully copy them and defer processing until later. But in f29a7aff4bd60 ("x86/mce: Avoid potential deadlock due to printk() in MCE context") we switched the logging code to save the record in a genpool, and call the functions that registered to be notified later from a work queue. So drop all the double buffering and do all the work we want to do as soon as i7core_mce_check_error() is called. Signed-off-by: Tony Luck Acked-by: Mauro Carvalho Chehab Cc: linux-edac Link: http://lkml.kernel.org/r/29ab2c370915c6e132fc5d88e7b72cb834bedbfe.1461855008.git.tony.luck@intel.com Signed-off-by: Borislav Petkov --- drivers/edac/i7core_edac.c | 81 +++----------------------------------- 1 file changed, 5 insertions(+), 76 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 01087a38da22..60e0bb53e9c9 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -271,16 +271,6 @@ struct i7core_pvt { bool is_registered, enable_scrub; - /* Fifo double buffers */ - struct mce mce_entry[MCE_LOG_LEN]; - struct mce mce_outentry[MCE_LOG_LEN]; - - /* Fifo in/out counters */ - unsigned mce_in, mce_out; - - /* Count indicator to show errors not got */ - unsigned mce_overrun; - /* DCLK Frequency used for computing scrub rate */ int dclk_freq; @@ -1792,56 +1782,15 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, * i7core_check_error Retrieve and process errors reported by the * hardware. Called by the Core module. */ -static void i7core_check_error(struct mem_ctl_info *mci) +static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m) { struct i7core_pvt *pvt = mci->pvt_info; - int i; - unsigned count = 0; - struct mce *m; - /* - * MCE first step: Copy all mce errors into a temporary buffer - * We use a double buffering here, to reduce the risk of - * losing an error. - */ - smp_rmb(); - count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) - % MCE_LOG_LEN; - if (!count) - goto check_ce_error; - - m = pvt->mce_outentry; - if (pvt->mce_in + count > MCE_LOG_LEN) { - unsigned l = MCE_LOG_LEN - pvt->mce_in; - - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); - smp_wmb(); - pvt->mce_in = 0; - count -= l; - m += l; - } - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); - smp_wmb(); - pvt->mce_in += count; - - smp_rmb(); - if (pvt->mce_overrun) { - i7core_printk(KERN_ERR, "Lost %d memory errors\n", - pvt->mce_overrun); - smp_wmb(); - pvt->mce_overrun = 0; - } - - /* - * MCE second step: parse errors and display - */ - for (i = 0; i < count; i++) - i7core_mce_output_error(mci, &pvt->mce_outentry[i]); + i7core_mce_output_error(mci, m); /* * Now, let's increment CE error counts */ -check_ce_error: if (!pvt->is_registered) i7core_udimm_check_mc_ecc_err(mci); else @@ -1849,12 +1798,8 @@ check_ce_error: } /* - * i7core_mce_check_error Replicates mcelog routine to get errors - * This routine simply queues mcelog errors, and - * return. The error itself should be handled later - * by i7core_check_error. - * WARNING: As this routine should be called at NMI time, extra care should - * be taken to avoid deadlocks, and to be as fast as possible. + * Check that logging is enabled and that this is the right type + * of error for us to handle. */ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) @@ -1882,21 +1827,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->bank != 8) return NOTIFY_DONE; - smp_rmb(); - if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { - smp_wmb(); - pvt->mce_overrun++; - return NOTIFY_DONE; - } - - /* Copy memory error at the ringbuffer */ - memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); - smp_wmb(); - pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; - - /* Handle fatal errors immediately */ - if (mce->mcgstatus & 1) - i7core_check_error(mci); + i7core_check_error(mci, mce); /* Advise mcelog that the errors were handled */ return NOTIFY_STOP; @@ -2243,8 +2174,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) get_dimm_config(mci); /* record ptr to the generic device */ mci->pdev = &i7core_dev->pdev[0]->dev; - /* Set the function pointer to an actual operation function */ - mci->edac_check = i7core_check_error; /* Enable scrubrate setting */ if (pvt->enable_scrub) From 2c1ea4c700af3dcfc8eabd94c91d1830b42c5461 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 28 Apr 2016 15:40:00 -0700 Subject: [PATCH 23/25] EDAC, sb_edac: Use cpu family/model in driver detection Instead of picking a random PCI ID from the dozen or so we need to access, just use x86_match_cpu() to pick based on CPU model number. The choosing of PCI devices has been problematic in the past, see 11249e739929 ("sb_edac: Fix detection on SNB machines") which fixed problems introduced by d0585cd815fa ("sb_edac: Claim a different PCI device"). This is especially ugly if future hardware might not even have EDAC-relevant registers in PCI config space and we would still be required to choose some "random" PCI devices to scan for just so our driver loads. Is this cleaner/clearer? It deletes much more code than it adds. Only tested on Broadwell. The driver loads/unloads and loads again. Still decodes errors too. Signed-off-by: Tony Luck Suggested-by: Borislav Petkov Signed-off-by: Borislav Petkov --- drivers/edac/sb_edac.c | 134 +++++++++++------------------------------ 1 file changed, 36 insertions(+), 98 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 342167496626..be398e0cf08a 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -28,8 +30,6 @@ /* Static vars */ static LIST_HEAD(sbridge_edac_list); -static DEFINE_MUTEX(sbridge_edac_lock); -static int probed; /* * Alter this version for the module when modifications are made @@ -651,18 +651,6 @@ static const struct pci_id_table pci_dev_descr_broadwell_table[] = { {0,} /* 0 terminated list. */ }; -/* - * pci_device_id table for which devices we are looking for - */ -static const struct pci_device_id sbridge_pci_tbl[] = { - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0)}, - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)}, - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0)}, - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0)}, - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0)}, - {0,} /* 0 terminated list. */ -}; - /**************************************************************************** Ancillary status routines @@ -3344,62 +3332,40 @@ fail0: return rc; } +#define ICPU(model, table) \ + { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table } + +/* Order here must match "enum type" */ +static const struct x86_cpu_id sbridge_cpuids[] = { + ICPU(0x2d, pci_dev_descr_sbridge_table), /* SANDY_BRIDGE */ + ICPU(0x3e, pci_dev_descr_ibridge_table), /* IVY_BRIDGE */ + ICPU(0x3f, pci_dev_descr_haswell_table), /* HASWELL */ + ICPU(0x4f, pci_dev_descr_broadwell_table), /* BROADWELL */ + ICPU(0x57, pci_dev_descr_knl_table), /* KNIGHTS_LANDING */ + { } +}; +MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids); + /* - * sbridge_probe Probe for ONE instance of device to see if it is + * sbridge_probe Get all devices and register memory controllers * present. * return: * 0 for FOUND a device * < 0 for error code */ -static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) +static int sbridge_probe(const struct x86_cpu_id *id) { int rc = -ENODEV; u8 mc, num_mc = 0; struct sbridge_dev *sbridge_dev; - enum type type = SANDY_BRIDGE; + struct pci_id_table *ptable = (struct pci_id_table *)id->driver_data; /* get the pci devices we want to reserve for our use */ - mutex_lock(&sbridge_edac_lock); + rc = sbridge_get_all_devices(&num_mc, ptable); - /* - * All memory controllers are allocated at the first pass. - */ - if (unlikely(probed >= 1)) { - mutex_unlock(&sbridge_edac_lock); - return -ENODEV; - } - probed++; - - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA: - rc = sbridge_get_all_devices(&num_mc, - pci_dev_descr_ibridge_table); - type = IVY_BRIDGE; - break; - case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0: - rc = sbridge_get_all_devices(&num_mc, - pci_dev_descr_sbridge_table); - type = SANDY_BRIDGE; - break; - case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0: - rc = sbridge_get_all_devices(&num_mc, - pci_dev_descr_haswell_table); - type = HASWELL; - break; - case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0: - rc = sbridge_get_all_devices(&num_mc, - pci_dev_descr_broadwell_table); - type = BROADWELL; - break; - case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0: - rc = sbridge_get_all_devices_knl(&num_mc, - pci_dev_descr_knl_table); - type = KNIGHTS_LANDING; - break; - } if (unlikely(rc < 0)) { - edac_dbg(0, "couldn't get all devices for 0x%x\n", pdev->device); + edac_dbg(0, "couldn't get all devices\n"); goto fail0; } @@ -3410,14 +3376,13 @@ static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) mc, mc + 1, num_mc); sbridge_dev->mc = mc++; - rc = sbridge_register_mci(sbridge_dev, type); + rc = sbridge_register_mci(sbridge_dev, id - sbridge_cpuids); if (unlikely(rc < 0)) goto fail1; } sbridge_printk(KERN_INFO, "%s\n", SBRIDGE_REVISION); - mutex_unlock(&sbridge_edac_lock); return 0; fail1: @@ -3426,74 +3391,47 @@ fail1: sbridge_put_all_devices(); fail0: - mutex_unlock(&sbridge_edac_lock); return rc; } /* - * sbridge_remove destructor for one instance of device + * sbridge_remove cleanup * */ -static void sbridge_remove(struct pci_dev *pdev) +static void sbridge_remove(void) { struct sbridge_dev *sbridge_dev; edac_dbg(0, "\n"); - /* - * we have a trouble here: pdev value for removal will be wrong, since - * it will point to the X58 register used to detect that the machine - * is a Nehalem or upper design. However, due to the way several PCI - * devices are grouped together to provide MC functionality, we need - * to use a different method for releasing the devices - */ - - mutex_lock(&sbridge_edac_lock); - - if (unlikely(!probed)) { - mutex_unlock(&sbridge_edac_lock); - return; - } - list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) sbridge_unregister_mci(sbridge_dev); /* Release PCI resources */ sbridge_put_all_devices(); - - probed--; - - mutex_unlock(&sbridge_edac_lock); } -MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl); - -/* - * sbridge_driver pci_driver structure for this module - * - */ -static struct pci_driver sbridge_driver = { - .name = "sbridge_edac", - .probe = sbridge_probe, - .remove = sbridge_remove, - .id_table = sbridge_pci_tbl, -}; - /* * sbridge_init Module entry function * Try to initialize this module for its devices */ static int __init sbridge_init(void) { - int pci_rc; + const struct x86_cpu_id *id; + int rc; edac_dbg(2, "\n"); + id = x86_match_cpu(sbridge_cpuids); + if (!id) + return -ENODEV; + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); - pci_rc = pci_register_driver(&sbridge_driver); - if (pci_rc >= 0) { + rc = sbridge_probe(id); + + if (rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); if (get_edac_report_status() == EDAC_REPORTING_DISABLED) sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); @@ -3501,9 +3439,9 @@ static int __init sbridge_init(void) } sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", - pci_rc); + rc); - return pci_rc; + return rc; } /* @@ -3513,7 +3451,7 @@ static int __init sbridge_init(void) static void __exit sbridge_exit(void) { edac_dbg(2, "\n"); - pci_unregister_driver(&sbridge_driver); + sbridge_remove(); mce_unregister_decode_chain(&sbridge_mce_dec); } From 953dee9bbd245f5515173126b9cc8b1a2c340797 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 6 May 2016 11:18:47 -0400 Subject: [PATCH 24/25] EDAC, ie31200_edac: Add Skylake support Skylake adjusts some register locations, but otherwise follows the existing model quite closely. I was able to verify that the 'ce_count' increments when 'bad dimms' are used. The accounting of 'ce_count' and 'ue_count' is the primary functionality of interest for us. Tested on Intel(R) Xeon(R) CPU E3-1260L v5 @ 2.90GHz. Signed-off-by: Jason Baron Acked-by: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1462547927-22679-1-git-send-email-jbaron@akamai.com Signed-off-by: Borislav Petkov --- drivers/edac/ie31200_edac.c | 119 +++++++++++++++++++++++++++--------- 1 file changed, 89 insertions(+), 30 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 18d77ace4813..1c88d9707495 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -17,6 +17,7 @@ * 015c: Xeon E3-1200 v2/3rd Gen Core processor DRAM Controller * 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller * 0c08: Xeon E3-1200 v3 Processor DRAM Controller + * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers * * Based on Intel specification: * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf @@ -55,6 +56,7 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c #define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04 #define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918 #define IE31200_DIMMS 4 #define IE31200_RANKS 8 @@ -105,8 +107,11 @@ * 1 Multiple Bit Error Status (MERRSTS) * 0 Correctable Error Status (CERRSTS) */ + #define IE31200_C0ECCERRLOG 0x40c8 #define IE31200_C1ECCERRLOG 0x44c8 +#define IE31200_C0ECCERRLOG_SKL 0x4048 +#define IE31200_C1ECCERRLOG_SKL 0x4448 #define IE31200_ECCERRLOG_CE BIT(0) #define IE31200_ECCERRLOG_UE BIT(1) #define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27) @@ -123,17 +128,28 @@ #define IE31200_CAPID0_DDPCD BIT(6) #define IE31200_CAPID0_ECC BIT(1) -#define IE31200_MAD_DIMM_0_OFFSET 0x5004 -#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0) -#define IE31200_MAD_DIMM_A_RANK BIT(17) -#define IE31200_MAD_DIMM_A_WIDTH BIT(19) +#define IE31200_MAD_DIMM_0_OFFSET 0x5004 +#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C +#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0) +#define IE31200_MAD_DIMM_A_RANK BIT(17) +#define IE31200_MAD_DIMM_A_RANK_SHIFT 17 +#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10) +#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10 +#define IE31200_MAD_DIMM_A_WIDTH BIT(19) +#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19 +#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8) +#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8 -#define IE31200_PAGES(n) (n << (28 - PAGE_SHIFT)) +/* Skylake reports 1GB increments, everything else is 256MB */ +#define IE31200_PAGES(n, skl) \ + (n << (28 + (2 * skl) - PAGE_SHIFT)) static int nr_channels; struct ie31200_priv { void __iomem *window; + void __iomem *c0errlog; + void __iomem *c1errlog; }; enum ie31200_chips { @@ -157,9 +173,9 @@ static const struct ie31200_dev_info ie31200_devs[] = { }; struct dimm_data { - u8 size; /* in 256MB multiples */ + u8 size; /* in multiples of 256MB, except Skylake is 1GB */ u8 dual_rank : 1, - x16_width : 1; /* 0 means x8 width */ + x16_width : 2; /* 0 means x8 width */ }; static int how_many_channels(struct pci_dev *pdev) @@ -197,11 +213,10 @@ static bool ecc_capable(struct pci_dev *pdev) return true; } -static int eccerrlog_row(int channel, u64 log) +static int eccerrlog_row(u64 log) { - int rank = ((log & IE31200_ECCERRLOG_RANK_BITS) >> - IE31200_ECCERRLOG_RANK_SHIFT); - return rank | (channel * IE31200_RANKS_PER_CHANNEL); + return ((log & IE31200_ECCERRLOG_RANK_BITS) >> + IE31200_ECCERRLOG_RANK_SHIFT); } static void ie31200_clear_error_info(struct mem_ctl_info *mci) @@ -219,7 +234,6 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, { struct pci_dev *pdev; struct ie31200_priv *priv = mci->pvt_info; - void __iomem *window = priv->window; pdev = to_pci_dev(mci->pdev); @@ -232,9 +246,9 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, if (!(info->errsts & IE31200_ERRSTS_BITS)) return; - info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG); + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); if (nr_channels == 2) - info->eccerrlog[1] = lo_hi_readq(window + IE31200_C1ECCERRLOG); + info->eccerrlog[1] = lo_hi_readq(priv->c1errlog); pci_read_config_word(pdev, IE31200_ERRSTS, &info->errsts2); @@ -245,10 +259,10 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, * should be UE info. */ if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { - info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG); + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); if (nr_channels == 2) info->eccerrlog[1] = - lo_hi_readq(window + IE31200_C1ECCERRLOG); + lo_hi_readq(priv->c1errlog); } ie31200_clear_error_info(mci); @@ -274,14 +288,14 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci, if (log & IE31200_ECCERRLOG_UE) { edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, - eccerrlog_row(channel, log), + eccerrlog_row(log), channel, -1, "ie31200 UE", ""); } else if (log & IE31200_ECCERRLOG_CE) { edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, IE31200_ECCERRLOG_SYNDROME(log), - eccerrlog_row(channel, log), + eccerrlog_row(log), channel, -1, "ie31200 CE", ""); } @@ -326,6 +340,33 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return window; } +static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode, + int chan) +{ + dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE; + dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0; + dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >> + (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4))); +} + +static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode, + int chan) +{ + dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE; + dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0; + dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0; +} + +static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan, + bool skl) +{ + if (skl) + __skl_populate_dimm_info(dd, addr_decode, chan); + else + __populate_dimm_info(dd, addr_decode, chan); +} + + static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) { int i, j, ret; @@ -334,7 +375,8 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL]; void __iomem *window; struct ie31200_priv *priv; - u32 addr_decode; + u32 addr_decode, mad_offset; + bool skl = (pdev->device == PCI_DEVICE_ID_INTEL_IE31200_HB_8); edac_dbg(0, "MC:\n"); @@ -363,7 +405,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) edac_dbg(3, "MC: init mci\n"); mci->pdev = &pdev->dev; - mci->mtype_cap = MEM_FLAG_DDR3; + if (skl) + mci->mtype_cap = MEM_FLAG_DDR4; + else + mci->mtype_cap = MEM_FLAG_DDR3; mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; @@ -374,19 +419,24 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) mci->ctl_page_to_phys = NULL; priv = mci->pvt_info; priv->window = window; + if (skl) { + priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL; + priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL; + mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL; + } else { + priv->c0errlog = window + IE31200_C0ECCERRLOG; + priv->c1errlog = window + IE31200_C1ECCERRLOG; + mad_offset = IE31200_MAD_DIMM_0_OFFSET; + } /* populate DIMM info */ for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode = readl(window + IE31200_MAD_DIMM_0_OFFSET + + addr_decode = readl(window + mad_offset + (i * 4)); edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { - dimm_info[i][j].size = (addr_decode >> (j * 8)) & - IE31200_MAD_DIMM_SIZE; - dimm_info[i][j].dual_rank = (addr_decode & - (IE31200_MAD_DIMM_A_RANK << j)) ? 1 : 0; - dimm_info[i][j].x16_width = (addr_decode & - (IE31200_MAD_DIMM_A_WIDTH << j)) ? 1 : 0; + populate_dimm_info(&dimm_info[i][j], addr_decode, j, + skl); edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", dimm_info[i][j].size, dimm_info[i][j].dual_rank, @@ -405,7 +455,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) struct dimm_info *dimm; unsigned long nr_pages; - nr_pages = IE31200_PAGES(dimm_info[j][i].size); + nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl); if (nr_pages == 0) continue; @@ -417,7 +467,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* just a guess */ - dimm->mtype = MEM_DDR3; + if (skl) + dimm->mtype = MEM_DDR4; + else + dimm->mtype = MEM_DDR3; dimm->dtype = DEV_UNKNOWN; dimm->edac_mode = EDAC_UNKNOWN; } @@ -426,7 +479,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* same guess */ - dimm->mtype = MEM_DDR3; + if (skl) + dimm->mtype = MEM_DDR4; + else + dimm->mtype = MEM_DDR3; dimm->dtype = DEV_UNKNOWN; dimm->edac_mode = EDAC_UNKNOWN; } @@ -500,6 +556,9 @@ static const struct pci_device_id ie31200_pci_tbl[] = { { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200}, + { + PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + IE31200}, { 0, } /* 0 terminated list. */ From 3f37a36b6282621d7c5a99b6911275f989766996 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 6 May 2016 19:44:27 +0200 Subject: [PATCH 25/25] EDAC, amd64_edac: Drop pci_register_driver() use - remove homegrown instances counting. - take F3 PCI device from amd_nb caching instead of F2 which was used with the PCI core. With those changes, the driver doesn't need to register a PCI driver and relies on the northbridges caching which we do anyway on AMD. Signed-off-by: Borislav Petkov Cc: Yazen Ghannam --- drivers/edac/amd64_edac.c | 125 +++++++++++++------------------------- drivers/edac/amd64_edac.h | 2 +- 2 files changed, 44 insertions(+), 83 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 651d96c93f22..624e2f78339c 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -15,11 +15,6 @@ module_param(ecc_enable_override, int, 0644); static struct msr __percpu *msrs; -/* - * count successfully initialized driver instances for setup_pci_device() - */ -static atomic_t drv_instances = ATOMIC_INIT(0); - /* Per-node stuff */ static struct ecc_settings **ecc_stngs; @@ -1918,7 +1913,7 @@ static struct amd64_family_type family_types[] = { [K8_CPUS] = { .ctl_name = "K8", .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, - .f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC, + .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, .ops = { .early_channel_count = k8_early_channel_count, .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, @@ -1928,7 +1923,7 @@ static struct amd64_family_type family_types[] = { [F10_CPUS] = { .ctl_name = "F10h", .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP, - .f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC, + .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -1938,7 +1933,7 @@ static struct amd64_family_type family_types[] = { [F15_CPUS] = { .ctl_name = "F15h", .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3, + .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -1948,7 +1943,7 @@ static struct amd64_family_type family_types[] = { [F15_M30H_CPUS] = { .ctl_name = "F15h_M30h", .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F3, + .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -1958,7 +1953,7 @@ static struct amd64_family_type family_types[] = { [F15_M60H_CPUS] = { .ctl_name = "F15h_M60h", .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F3, + .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -1968,7 +1963,7 @@ static struct amd64_family_type family_types[] = { [F16_CPUS] = { .ctl_name = "F16h", .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3, + .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -1978,7 +1973,7 @@ static struct amd64_family_type family_types[] = { [F16_M30H_CPUS] = { .ctl_name = "F16h_M30h", .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F3, + .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2227,13 +2222,13 @@ static inline void decode_bus_error(int node_id, struct mce *m) } /* - * Use pvt->F2 which contains the F2 CPU PCI device to get the related - * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error. + * Use pvt->F3 which contains the F3 CPU PCI device to get the related + * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error. */ -static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id) +static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f2_id) { /* Reserve the ADDRESS MAP Device */ - pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2); + pvt->F1 = pci_get_related_function(pvt->F3->vendor, f1_id, pvt->F3); if (!pvt->F1) { amd64_err("error address map device not found: " "vendor %x device 0x%x (broken BIOS?)\n", @@ -2241,15 +2236,15 @@ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id) return -ENODEV; } - /* Reserve the MISC Device */ - pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2); - if (!pvt->F3) { + /* Reserve the DCT Device */ + pvt->F2 = pci_get_related_function(pvt->F3->vendor, f2_id, pvt->F3); + if (!pvt->F2) { pci_dev_put(pvt->F1); pvt->F1 = NULL; - amd64_err("error F3 device not found: " + amd64_err("error F2 device not found: " "vendor %x device 0x%x (broken BIOS?)\n", - PCI_VENDOR_ID_AMD, f3_id); + PCI_VENDOR_ID_AMD, f2_id); return -ENODEV; } @@ -2263,7 +2258,7 @@ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id) static void free_mc_sibling_devs(struct amd64_pvt *pvt) { pci_dev_put(pvt->F1); - pci_dev_put(pvt->F3); + pci_dev_put(pvt->F2); } /* @@ -2778,14 +2773,14 @@ static const struct attribute_group *amd64_edac_attr_groups[] = { NULL }; -static int init_one_instance(struct pci_dev *F2) +static int init_one_instance(unsigned int nid) { - struct amd64_pvt *pvt = NULL; + struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct amd64_family_type *fam_type = NULL; struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; + struct amd64_pvt *pvt = NULL; int err = 0, ret; - u16 nid = amd_pci_dev_to_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2793,7 +2788,7 @@ static int init_one_instance(struct pci_dev *F2) goto err_ret; pvt->mc_node_id = nid; - pvt->F2 = F2; + pvt->F3 = F3; ret = -EINVAL; fam_type = per_family_init(pvt); @@ -2801,7 +2796,7 @@ static int init_one_instance(struct pci_dev *F2) goto err_free; ret = -ENODEV; - err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id); + err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f2_id); if (err) goto err_free; @@ -2836,7 +2831,7 @@ static int init_one_instance(struct pci_dev *F2) goto err_siblings; mci->pvt_info = pvt; - mci->pdev = &pvt->F2->dev; + mci->pdev = &pvt->F3->dev; setup_mci_misc_attrs(mci, fam_type); @@ -2855,8 +2850,6 @@ static int init_one_instance(struct pci_dev *F2) amd_register_ecc_decoder(decode_bus_error); - atomic_inc(&drv_instances); - return 0; err_add_mc: @@ -2872,19 +2865,11 @@ err_ret: return ret; } -static int probe_one_instance(struct pci_dev *pdev, - const struct pci_device_id *mc_type) +static int probe_one_instance(unsigned int nid) { - u16 nid = amd_pci_dev_to_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; - int ret = 0; - - ret = pci_enable_device(pdev); - if (ret < 0) { - edac_dbg(0, "ret=%d\n", ret); - return -EIO; - } + int ret; ret = -ENOMEM; s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); @@ -2905,7 +2890,7 @@ static int probe_one_instance(struct pci_dev *pdev, goto err_enable; } - ret = init_one_instance(pdev); + ret = init_one_instance(nid); if (ret < 0) { amd64_err("Error probing instance: %d\n", nid); restore_ecc_error_reporting(s, nid, F3); @@ -2921,19 +2906,18 @@ err_out: return ret; } -static void remove_one_instance(struct pci_dev *pdev) +static void remove_one_instance(unsigned int nid) { - struct mem_ctl_info *mci; - struct amd64_pvt *pvt; - u16 nid = amd_pci_dev_to_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; + struct mem_ctl_info *mci; + struct amd64_pvt *pvt; - mci = find_mci_by_dev(&pdev->dev); + mci = find_mci_by_dev(&F3->dev); WARN_ON(!mci); /* Remove from EDAC CORE tracking list */ - mci = edac_mc_del_mc(&pdev->dev); + mci = edac_mc_del_mc(&F3->dev); if (!mci) return; @@ -2957,31 +2941,6 @@ static void remove_one_instance(struct pci_dev *pdev) edac_mc_free(mci); } -/* - * This table is part of the interface for loading drivers for PCI devices. The - * PCI core identifies what devices are on a system during boot, and then - * inquiry this table to see if this driver is for a given device found. - */ -static const struct pci_device_id amd64_pci_table[] = { - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F2) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F2) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F2) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F2) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F2) }, - {0, } -}; -MODULE_DEVICE_TABLE(pci, amd64_pci_table); - -static struct pci_driver amd64_pci_driver = { - .name = EDAC_MOD_STR, - .probe = probe_one_instance, - .remove = remove_one_instance, - .id_table = amd64_pci_table, - .driver.probe_type = PROBE_FORCE_SYNCHRONOUS, -}; - static void setup_pci_device(void) { struct mem_ctl_info *mci; @@ -3005,6 +2964,7 @@ static void setup_pci_device(void) static int __init amd64_edac_init(void) { int err = -ENODEV; + int i; opstate_init(); @@ -3020,13 +2980,14 @@ static int __init amd64_edac_init(void) if (!msrs) goto err_free; - err = pci_register_driver(&amd64_pci_driver); - if (err) - goto err_pci; + for (i = 0; i < amd_nb_num(); i++) + if (probe_one_instance(i)) { + /* unwind properly */ + while (--i >= 0) + remove_one_instance(i); - err = -ENODEV; - if (!atomic_read(&drv_instances)) - goto err_no_instances; + goto err_pci; + } setup_pci_device(); @@ -3038,9 +2999,6 @@ static int __init amd64_edac_init(void) return 0; -err_no_instances: - pci_unregister_driver(&amd64_pci_driver); - err_pci: msrs_free(msrs); msrs = NULL; @@ -3055,10 +3013,13 @@ err_ret: static void __exit amd64_edac_exit(void) { + int i; + if (pci_ctl) edac_pci_release_generic_ctl(pci_ctl); - pci_unregister_driver(&amd64_pci_driver); + for (i = 0; i < amd_nb_num(); i++) + remove_one_instance(i); kfree(ecc_stngs); ecc_stngs = NULL; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index c0f248f3aaf9..c08870479054 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -422,7 +422,7 @@ struct low_ops { struct amd64_family_type { const char *ctl_name; - u16 f1_id, f3_id; + u16 f1_id, f2_id; struct low_ops ops; };