diff --git a/Documentation/admin-guide/RAS/address-translation.rst b/Documentation/admin-guide/RAS/address-translation.rst new file mode 100644 index 000000000000..f0ca17b43cd3 --- /dev/null +++ b/Documentation/admin-guide/RAS/address-translation.rst @@ -0,0 +1,24 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Address translation +=================== + +x86 AMD +------- + +Zen-based AMD systems include a Data Fabric that manages the layout of +physical memory. Devices attached to the Fabric, like memory controllers, +I/O, etc., may not have a complete view of the system physical memory map. +These devices may provide a "normalized", i.e. device physical, address +when reporting memory errors. Normalized addresses must be translated to +a system physical address for the kernel to action on the memory. + +AMD Address Translation Library (CONFIG_AMD_ATL) provides translation for +this case. + +Glossary of acronyms used in address translation for Zen-based systems + +* CCM = Cache Coherent Moderator +* COD = Cluster-on-Die +* COH_ST = Coherent Station +* DF = Data Fabric diff --git a/Documentation/RAS/ras.rst b/Documentation/admin-guide/RAS/error-decoding.rst similarity index 73% rename from Documentation/RAS/ras.rst rename to Documentation/admin-guide/RAS/error-decoding.rst index 2556b397cd27..26a72f3fe5de 100644 --- a/Documentation/RAS/ras.rst +++ b/Documentation/admin-guide/RAS/error-decoding.rst @@ -1,15 +1,10 @@ .. SPDX-License-Identifier: GPL-2.0 -Reliability, Availability and Serviceability features -===================================================== - -This documents different aspects of the RAS functionality present in the -kernel. - Error decoding ---------------- +============== -* x86 +x86 +--- Error decoding on AMD systems should be done using the rasdaemon tool: https://github.com/mchehab/rasdaemon/ diff --git a/Documentation/admin-guide/RAS/index.rst b/Documentation/admin-guide/RAS/index.rst new file mode 100644 index 000000000000..f4087040a7c0 --- /dev/null +++ b/Documentation/admin-guide/RAS/index.rst @@ -0,0 +1,7 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. toctree:: + :maxdepth: 2 + + main + error-decoding + address-translation diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/RAS/main.rst similarity index 99% rename from Documentation/admin-guide/ras.rst rename to Documentation/admin-guide/RAS/main.rst index 8e03751d126d..7ac1d4ccc509 100644 --- a/Documentation/admin-guide/ras.rst +++ b/Documentation/admin-guide/RAS/main.rst @@ -1,8 +1,12 @@ +.. SPDX-License-Identifier: GPL-2.0 .. include:: -============================================ -Reliability, Availability and Serviceability -============================================ +================================================== +Reliability, Availability and Serviceability (RAS) +================================================== + +This documents different aspects of the RAS functionality present in the +kernel. RAS concepts ************ diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index fb40a1f6f79e..dfc06fab9432 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -122,7 +122,7 @@ configure specific aspects of kernel behavior to your liking. pmf pnp rapidio - ras + RAS/index rtc serial-console svga diff --git a/Documentation/index.rst b/Documentation/index.rst index 36e61783437c..9dfdc826618c 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -113,7 +113,6 @@ to ReStructured Text format, or are simply too old. :maxdepth: 1 staging/index - RAS/ras Translations diff --git a/MAINTAINERS b/MAINTAINERS index 747f492ce6cc..a3c7accd3877 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -897,6 +897,12 @@ Q: https://patchwork.kernel.org/project/linux-rdma/list/ F: drivers/infiniband/hw/efa/ F: include/uapi/rdma/efa-abi.h +AMD ADDRESS TRANSLATION LIBRARY (ATL) +M: Yazen Ghannam +L: linux-edac@vger.kernel.org +S: Supported +F: drivers/ras/amd/atl/* + AMD AXI W1 DRIVER M: Kris Chaplin R: Thomas Delev @@ -7583,7 +7589,6 @@ R: Robert Richter L: linux-edac@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git edac-for-next -F: Documentation/admin-guide/ras.rst F: Documentation/driver-api/edac.rst F: drivers/edac/ F: include/linux/edac.h @@ -18379,11 +18384,17 @@ M: Tony Luck M: Borislav Petkov L: linux-edac@vger.kernel.org S: Maintained -F: Documentation/admin-guide/ras.rst +F: Documentation/admin-guide/RAS F: drivers/ras/ F: include/linux/ras.h F: include/ras/ras_event.h +RAS FRU MEMORY POISON MANAGER (FMPM) +M: Yazen Ghannam +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/ras/amd/fmpm.c + RC-CORE / LIRC FRAMEWORK M: Sean Young L: linux-media@vger.kernel.org diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 76b1d87f1531..abe3a8f22cbd 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -224,7 +224,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu) static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } -static inline unsigned int topology_amd_nodes_per_pkg(void) { return 0; }; +static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } #endif /* !CONFIG_SMP */ static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 5a7f3fabee22..16c8de5050e5 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -78,6 +78,7 @@ config EDAC_GHES config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" depends on AMD_NB && EDAC_DECODE_MCE + imply AMD_ATL help Support for error detection and correction of DRAM ECC errors on the AMD64 families (>= K8) of memory controllers. diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 2b8c20bb926a..1f3520d76861 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include "amd64_edac.h" #include @@ -1051,281 +1052,6 @@ static int fixup_node_id(int node_id, struct mce *m) return nid - gpu_node_map.base_node_id + 1; } -/* Protect the PCI config register pairs used for DF indirect access. */ -static DEFINE_MUTEX(df_indirect_mutex); - -/* - * Data Fabric Indirect Access uses FICAA/FICAD. - * - * Fabric Indirect Configuration Access Address (FICAA): Constructed based - * on the device's Instance Id and the PCI function and register offset of - * the desired register. - * - * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO - * and FICAD HI registers but so far we only need the LO register. - * - * Use Instance Id 0xFF to indicate a broadcast read. - */ -#define DF_BROADCAST 0xFF -static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) -{ - struct pci_dev *F4; - u32 ficaa; - int err = -ENODEV; - - if (node >= amd_nb_num()) - goto out; - - F4 = node_to_amd_nb(node)->link; - if (!F4) - goto out; - - ficaa = (instance_id == DF_BROADCAST) ? 0 : 1; - ficaa |= reg & 0x3FC; - ficaa |= (func & 0x7) << 11; - ficaa |= instance_id << 16; - - mutex_lock(&df_indirect_mutex); - - err = pci_write_config_dword(F4, 0x5C, ficaa); - if (err) { - pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); - goto out_unlock; - } - - err = pci_read_config_dword(F4, 0x98, lo); - if (err) - pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); - -out_unlock: - mutex_unlock(&df_indirect_mutex); - -out: - return err; -} - -static int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) -{ - return __df_indirect_read(node, func, reg, instance_id, lo); -} - -static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) -{ - return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); -} - -struct addr_ctx { - u64 ret_addr; - u32 tmp; - u16 nid; - u8 inst_id; -}; - -static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) -{ - u64 dram_base_addr, dram_limit_addr, dram_hole_base; - - u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; - u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; - u8 intlv_addr_sel, intlv_addr_bit; - u8 num_intlv_bits, hashed_bit; - u8 lgcy_mmio_hole_en, base = 0; - u8 cs_mask, cs_id = 0; - bool hash_enabled = false; - - struct addr_ctx ctx; - - memset(&ctx, 0, sizeof(ctx)); - - /* Start from the normalized address */ - ctx.ret_addr = norm_addr; - - ctx.nid = nid; - ctx.inst_id = umc; - - /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ - if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp)) - goto out_err; - - /* Remove HiAddrOffset from normalized address, if enabled: */ - if (ctx.tmp & BIT(0)) { - u64 hi_addr_offset = (ctx.tmp & GENMASK_ULL(31, 20)) << 8; - - if (norm_addr >= hi_addr_offset) { - ctx.ret_addr -= hi_addr_offset; - base = 1; - } - } - - /* Read D18F0x110 (DramBaseAddress). */ - if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &ctx.tmp)) - goto out_err; - - /* Check if address range is valid. */ - if (!(ctx.tmp & BIT(0))) { - pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", - __func__, ctx.tmp); - goto out_err; - } - - lgcy_mmio_hole_en = ctx.tmp & BIT(1); - intlv_num_chan = (ctx.tmp >> 4) & 0xF; - intlv_addr_sel = (ctx.tmp >> 8) & 0x7; - dram_base_addr = (ctx.tmp & GENMASK_ULL(31, 12)) << 16; - - /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ - if (intlv_addr_sel > 3) { - pr_err("%s: Invalid interleave address select %d.\n", - __func__, intlv_addr_sel); - goto out_err; - } - - /* Read D18F0x114 (DramLimitAddress). */ - if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp)) - goto out_err; - - intlv_num_sockets = (ctx.tmp >> 8) & 0x1; - intlv_num_dies = (ctx.tmp >> 10) & 0x3; - dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); - - intlv_addr_bit = intlv_addr_sel + 8; - - /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ - switch (intlv_num_chan) { - case 0: intlv_num_chan = 0; break; - case 1: intlv_num_chan = 1; break; - case 3: intlv_num_chan = 2; break; - case 5: intlv_num_chan = 3; break; - case 7: intlv_num_chan = 4; break; - - case 8: intlv_num_chan = 1; - hash_enabled = true; - break; - default: - pr_err("%s: Invalid number of interleaved channels %d.\n", - __func__, intlv_num_chan); - goto out_err; - } - - num_intlv_bits = intlv_num_chan; - - if (intlv_num_dies > 2) { - pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", - __func__, intlv_num_dies); - goto out_err; - } - - num_intlv_bits += intlv_num_dies; - - /* Add a bit if sockets are interleaved. */ - num_intlv_bits += intlv_num_sockets; - - /* Assert num_intlv_bits <= 4 */ - if (num_intlv_bits > 4) { - pr_err("%s: Invalid interleave bits %d.\n", - __func__, num_intlv_bits); - goto out_err; - } - - if (num_intlv_bits > 0) { - u64 temp_addr_x, temp_addr_i, temp_addr_y; - u8 die_id_bit, sock_id_bit, cs_fabric_id; - - /* - * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. - * This is the fabric id for this coherent slave. Use - * umc/channel# as instance id of the coherent slave - * for FICAA. - */ - if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp)) - goto out_err; - - cs_fabric_id = (ctx.tmp >> 8) & 0xFF; - die_id_bit = 0; - - /* If interleaved over more than 1 channel: */ - if (intlv_num_chan) { - die_id_bit = intlv_num_chan; - cs_mask = (1 << die_id_bit) - 1; - cs_id = cs_fabric_id & cs_mask; - } - - sock_id_bit = die_id_bit; - - /* Read D18F1x208 (SystemFabricIdMask). */ - if (intlv_num_dies || intlv_num_sockets) - if (df_indirect_read_broadcast(nid, 1, 0x208, &ctx.tmp)) - goto out_err; - - /* If interleaved over more than 1 die. */ - if (intlv_num_dies) { - sock_id_bit = die_id_bit + intlv_num_dies; - die_id_shift = (ctx.tmp >> 24) & 0xF; - die_id_mask = (ctx.tmp >> 8) & 0xFF; - - cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; - } - - /* If interleaved over more than 1 socket. */ - if (intlv_num_sockets) { - socket_id_shift = (ctx.tmp >> 28) & 0xF; - socket_id_mask = (ctx.tmp >> 16) & 0xFF; - - cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; - } - - /* - * The pre-interleaved address consists of XXXXXXIIIYYYYY - * where III is the ID for this CS, and XXXXXXYYYYY are the - * address bits from the post-interleaved address. - * "num_intlv_bits" has been calculated to tell us how many "I" - * bits there are. "intlv_addr_bit" tells us how many "Y" bits - * there are (where "I" starts). - */ - temp_addr_y = ctx.ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0); - temp_addr_i = (cs_id << intlv_addr_bit); - temp_addr_x = (ctx.ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; - ctx.ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; - } - - /* Add dram base address */ - ctx.ret_addr += dram_base_addr; - - /* If legacy MMIO hole enabled */ - if (lgcy_mmio_hole_en) { - if (df_indirect_read_broadcast(nid, 0, 0x104, &ctx.tmp)) - goto out_err; - - dram_hole_base = ctx.tmp & GENMASK(31, 24); - if (ctx.ret_addr >= dram_hole_base) - ctx.ret_addr += (BIT_ULL(32) - dram_hole_base); - } - - if (hash_enabled) { - /* Save some parentheses and grab ls-bit at the end. */ - hashed_bit = (ctx.ret_addr >> 12) ^ - (ctx.ret_addr >> 18) ^ - (ctx.ret_addr >> 21) ^ - (ctx.ret_addr >> 30) ^ - cs_id; - - hashed_bit &= BIT(0); - - if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0))) - ctx.ret_addr ^= BIT(intlv_addr_bit); - } - - /* Is calculated system address is above DRAM limit address? */ - if (ctx.ret_addr > dram_limit_addr) - goto out_err; - - *sys_addr = ctx.ret_addr; - return 0; - -out_err: - return -EINVAL; -} - static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); /* @@ -3073,9 +2799,10 @@ static void decode_umc_error(int node_id, struct mce *m) { u8 ecc_type = (m->status >> 45) & 0x3; struct mem_ctl_info *mci; + unsigned long sys_addr; struct amd64_pvt *pvt; + struct atl_err a_err; struct err_info err; - u64 sys_addr; node_id = fixup_node_id(node_id, m); @@ -3106,7 +2833,12 @@ static void decode_umc_error(int node_id, struct mce *m) pvt->ops->get_err_info(m, &err); - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { + a_err.addr = m->addr; + a_err.ipid = m->ipid; + a_err.cpu = m->extcpu; + + sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err); + if (IS_ERR_VALUE(sys_addr)) { err.err_code = ERR_NORM_ADDR; goto log_error; } diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 2b83d6de9352..3fd22a1eb1a9 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -951,6 +951,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 2b0ecdeba5cd..cdd8480e7368 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -238,6 +238,7 @@ static struct work_struct ecclog_work; #define DID_ADL_N_SKU9 0x4678 #define DID_ADL_N_SKU10 0x4679 #define DID_ADL_N_SKU11 0x467c +#define DID_ADL_N_SKU12 0x4632 /* Compute die IDs for Raptor Lake-P with IBECC */ #define DID_RPL_P_SKU1 0xa706 @@ -583,6 +584,7 @@ static const struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg }, { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index 709babce43ba..5527055b0964 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -1324,11 +1324,9 @@ static int mc_probe(struct platform_device *pdev) struct synps_edac_priv *priv; struct mem_ctl_info *mci; void __iomem *baseaddr; - struct resource *res; int rc; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - baseaddr = devm_ioremap_resource(&pdev->dev, res); + baseaddr = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(baseaddr)) return PTR_ERR(baseaddr); diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c index 62caf454b567..1688a5050f63 100644 --- a/drivers/edac/versal_edac.c +++ b/drivers/edac/versal_edac.c @@ -42,8 +42,11 @@ #define ECCW0_FLIP_CTRL 0x109C #define ECCW0_FLIP0_OFFSET 0x10A0 +#define ECCW0_FLIP0_BITS 31 +#define ECCW0_FLIP1_OFFSET 0x10A4 #define ECCW1_FLIP_CTRL 0x10AC #define ECCW1_FLIP0_OFFSET 0x10B0 +#define ECCW1_FLIP1_OFFSET 0x10B4 #define ECCR0_CERR_STAT_OFFSET 0x10BC #define ECCR0_CE_ADDR_LO_OFFSET 0x10C0 #define ECCR0_CE_ADDR_HI_OFFSET 0x10C4 @@ -116,9 +119,6 @@ #define XDDR_BUS_WIDTH_32 1 #define XDDR_BUS_WIDTH_16 2 -#define ECC_CEPOISON_MASK 0x1 -#define ECC_UEPOISON_MASK 0x3 - #define XDDR_MAX_ROW_CNT 18 #define XDDR_MAX_COL_CNT 10 #define XDDR_MAX_RANK_CNT 2 @@ -133,6 +133,7 @@ * https://docs.xilinx.com/r/en-US/am012-versal-register-reference/PCSR_LOCK-XRAM_SLCR-Register */ #define PCSR_UNLOCK_VAL 0xF9E8D7C6 +#define PCSR_LOCK_VAL 1 #define XDDR_ERR_TYPE_CE 0 #define XDDR_ERR_TYPE_UE 1 @@ -142,6 +143,7 @@ #define XILINX_DRAM_SIZE_12G 3 #define XILINX_DRAM_SIZE_16G 4 #define XILINX_DRAM_SIZE_32G 5 +#define NUM_UE_BITPOS 2 /** * struct ecc_error_info - ECC error log information. @@ -479,7 +481,7 @@ static void err_callback(const u32 *payload, void *data) writel(regval, priv->ddrmc_baseaddr + XDDR_ISR_OFFSET); /* Lock the PCSR registers */ - writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); edac_dbg(3, "Total error count CE %d UE %d\n", priv->ce_cnt, priv->ue_cnt); } @@ -650,7 +652,7 @@ static void enable_intr(struct edac_priv *priv) writel(XDDR_IRQ_UE_MASK, priv->ddrmc_baseaddr + XDDR_IRQ1_EN_OFFSET); /* Lock the PCSR registers */ - writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); } static void disable_intr(struct edac_priv *priv) @@ -663,7 +665,7 @@ static void disable_intr(struct edac_priv *priv) priv->ddrmc_baseaddr + XDDR_IRQ_DIS_OFFSET); /* Lock the PCSR registers */ - writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); } #define to_mci(k) container_of(k, struct mem_ctl_info, dev) @@ -734,38 +736,63 @@ static void poison_setup(struct edac_priv *priv) writel(regval, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC15_OFFSET); } -static ssize_t xddr_inject_data_poison_store(struct mem_ctl_info *mci, - const char __user *data) +static void xddr_inject_data_ce_store(struct mem_ctl_info *mci, u8 ce_bitpos) { + u32 ecc0_flip0, ecc1_flip0, ecc0_flip1, ecc1_flip1; struct edac_priv *priv = mci->pvt_info; - writel(0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET); - writel(0, priv->ddrmc_baseaddr + ECCW1_FLIP0_OFFSET); - - if (strncmp(data, "CE", 2) == 0) { - writel(ECC_CEPOISON_MASK, priv->ddrmc_baseaddr + - ECCW0_FLIP0_OFFSET); - writel(ECC_CEPOISON_MASK, priv->ddrmc_baseaddr + - ECCW1_FLIP0_OFFSET); + if (ce_bitpos < ECCW0_FLIP0_BITS) { + ecc0_flip0 = BIT(ce_bitpos); + ecc1_flip0 = BIT(ce_bitpos); + ecc0_flip1 = 0; + ecc1_flip1 = 0; } else { - writel(ECC_UEPOISON_MASK, priv->ddrmc_baseaddr + - ECCW0_FLIP0_OFFSET); - writel(ECC_UEPOISON_MASK, priv->ddrmc_baseaddr + - ECCW1_FLIP0_OFFSET); + ce_bitpos = ce_bitpos - ECCW0_FLIP0_BITS; + ecc0_flip1 = BIT(ce_bitpos); + ecc1_flip1 = BIT(ce_bitpos); + ecc0_flip0 = 0; + ecc1_flip0 = 0; } - /* Lock the PCSR registers */ - writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); - - return 0; + writel(ecc0_flip0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET); + writel(ecc1_flip0, priv->ddrmc_baseaddr + ECCW1_FLIP0_OFFSET); + writel(ecc0_flip1, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET); + writel(ecc1_flip1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); } -static ssize_t inject_data_poison_store(struct file *file, const char __user *data, - size_t count, loff_t *ppos) +/* + * To inject a correctable error, the following steps are needed: + * + * - Write the correctable error bit position value: + * echo > /sys/kernel/debug/edac//inject_ce + * + * poison_setup() derives the row, column, bank, group and rank and + * writes to the ADEC registers based on the address given by the user. + * + * The ADEC12 and ADEC13 are mask registers; write 0 to make sure default + * configuration is there and no addresses are masked. + * + * The row, column, bank, group and rank registers are written to the + * match ADEC bit to generate errors at the particular address. ADEC14 + * and ADEC15 have the match bits. + * + * xddr_inject_data_ce_store() updates the ECC FLIP registers with the + * bits to be corrupted based on the bit position given by the user. + * + * Upon doing a read to the address the errors are injected. + */ +static ssize_t inject_data_ce_store(struct file *file, const char __user *data, + size_t count, loff_t *ppos) { struct device *dev = file->private_data; struct mem_ctl_info *mci = to_mci(dev); struct edac_priv *priv = mci->pvt_info; + u8 ce_bitpos; + int ret; + + ret = kstrtou8_from_user(data, count, 0, &ce_bitpos); + if (ret) + return ret; /* Unlock the PCSR registers */ writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); @@ -773,17 +800,110 @@ static ssize_t inject_data_poison_store(struct file *file, const char __user *da poison_setup(priv); + xddr_inject_data_ce_store(mci, ce_bitpos); + ret = count; + /* Lock the PCSR registers */ - writel(1, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); - xddr_inject_data_poison_store(mci, data); + return ret; +} +static const struct file_operations xddr_inject_ce_fops = { + .open = simple_open, + .write = inject_data_ce_store, + .llseek = generic_file_llseek, +}; + +static void xddr_inject_data_ue_store(struct mem_ctl_info *mci, u32 val0, u32 val1) +{ + struct edac_priv *priv = mci->pvt_info; + + writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET); + writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET); + writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); + writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); +} + +/* + * To inject an uncorrectable error, the following steps are needed: + * echo > /sys/kernel/debug/edac//inject_ue + * + * poison_setup() derives the row, column, bank, group and rank and + * writes to the ADEC registers based on the address given by the user. + * + * The ADEC12 and ADEC13 are mask registers; write 0 so that none of the + * addresses are masked. The row, column, bank, group and rank registers + * are written to the match ADEC bit to generate errors at the + * particular address. ADEC14 and ADEC15 have the match bits. + * + * xddr_inject_data_ue_store() updates the ECC FLIP registers with the + * bits to be corrupted based on the bit position given by the user. For + * uncorrectable errors + * 2 bit errors are injected. + * + * Upon doing a read to the address the errors are injected. + */ +static ssize_t inject_data_ue_store(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + struct edac_priv *priv = mci->pvt_info; + char buf[6], *pbuf, *token[2]; + u32 val0 = 0, val1 = 0; + u8 len, ue0, ue1; + int i, ret; + + len = min_t(size_t, count, sizeof(buf)); + if (copy_from_user(buf, data, len)) + return -EFAULT; + + buf[len] = '\0'; + pbuf = &buf[0]; + for (i = 0; i < NUM_UE_BITPOS; i++) + token[i] = strsep(&pbuf, ","); + + ret = kstrtou8(token[0], 0, &ue0); + if (ret) + return ret; + + ret = kstrtou8(token[1], 0, &ue1); + if (ret) + return ret; + + if (ue0 < ECCW0_FLIP0_BITS) { + val0 = BIT(ue0); + } else { + ue0 = ue0 - ECCW0_FLIP0_BITS; + val1 = BIT(ue0); + } + + if (ue1 < ECCW0_FLIP0_BITS) { + val0 |= BIT(ue1); + } else { + ue1 = ue1 - ECCW0_FLIP0_BITS; + val1 |= BIT(ue1); + } + + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_UNLOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + + poison_setup(priv); + + xddr_inject_data_ue_store(mci, val0, val1); + + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); return count; } -static const struct file_operations xddr_inject_enable_fops = { +static const struct file_operations xddr_inject_ue_fops = { .open = simple_open, - .write = inject_data_poison_store, + .write = inject_data_ue_store, .llseek = generic_file_llseek, }; @@ -795,8 +915,17 @@ static void create_debugfs_attributes(struct mem_ctl_info *mci) if (!priv->debugfs) return; - edac_debugfs_create_file("inject_error", 0200, priv->debugfs, - &mci->dev, &xddr_inject_enable_fops); + if (!edac_debugfs_create_file("inject_ce", 0200, priv->debugfs, + &mci->dev, &xddr_inject_ce_fops)) { + debugfs_remove_recursive(priv->debugfs); + return; + } + + if (!edac_debugfs_create_file("inject_ue", 0200, priv->debugfs, + &mci->dev, &xddr_inject_ue_fops)) { + debugfs_remove_recursive(priv->debugfs); + return; + } debugfs_create_x64("address", 0600, priv->debugfs, &priv->err_inject_addr); mci->debugfs = priv->debugfs; @@ -1031,7 +1160,7 @@ free_edac_mc: return rc; } -static int mc_remove(struct platform_device *pdev) +static void mc_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); struct edac_priv *priv = mci->pvt_info; @@ -1049,8 +1178,6 @@ static int mc_remove(struct platform_device *pdev) XPM_EVENT_ERROR_MASK_DDRMC_NCR, err_callback, mci); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - - return 0; } static struct platform_driver xilinx_ddr_edac_mc_driver = { @@ -1059,7 +1186,7 @@ static struct platform_driver xilinx_ddr_edac_mc_driver = { .of_match_table = xlnx_edac_match, }, .probe = mc_probe, - .remove = mc_remove, + .remove_new = mc_remove, }; module_platform_driver(xilinx_ddr_edac_mc_driver); diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index c2a236f2e846..fc4f4bb94a4c 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -32,5 +32,18 @@ menuconfig RAS if RAS source "arch/x86/ras/Kconfig" +source "drivers/ras/amd/atl/Kconfig" + +config RAS_FMPM + tristate "FRU Memory Poison Manager" + default m + depends on AMD_ATL && ACPI_APEI + help + Support saving and restoring memory error information across reboot + using ACPI ERST as persistent storage. Error information is saved with + the UEFI CPER "FRU Memory Poison" section format. + + Memory will be retired during boot time and run time depending on + platform-specific policies. endif diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 6f0404f50107..11f95d59d397 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -2,3 +2,6 @@ obj-$(CONFIG_RAS) += ras.o obj-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_RAS_CEC) += cec.o + +obj-$(CONFIG_RAS_FMPM) += amd/fmpm.o +obj-y += amd/atl/ diff --git a/drivers/ras/amd/atl/Kconfig b/drivers/ras/amd/atl/Kconfig new file mode 100644 index 000000000000..df49c23e7f62 --- /dev/null +++ b/drivers/ras/amd/atl/Kconfig @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# AMD Address Translation Library Kconfig +# +# Copyright (c) 2023, Advanced Micro Devices, Inc. +# All Rights Reserved. +# +# Author: Yazen Ghannam + +config AMD_ATL + tristate "AMD Address Translation Library" + depends on AMD_NB && X86_64 && RAS + depends on MEMORY_FAILURE + default N + help + This library includes support for implementation-specific + address translation procedures needed for various error + handling cases. + + Enable this option if using DRAM ECC on Zen-based systems + and OS-based error handling. diff --git a/drivers/ras/amd/atl/Makefile b/drivers/ras/amd/atl/Makefile new file mode 100644 index 000000000000..4acd5f05bd9c --- /dev/null +++ b/drivers/ras/amd/atl/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# AMD Address Translation Library Makefile +# +# Copyright (c) 2023, Advanced Micro Devices, Inc. +# All Rights Reserved. +# +# Author: Yazen Ghannam + +amd_atl-y := access.o +amd_atl-y += core.o +amd_atl-y += dehash.o +amd_atl-y += denormalize.o +amd_atl-y += map.o +amd_atl-y += system.o +amd_atl-y += umc.o + +obj-$(CONFIG_AMD_ATL) += amd_atl.o diff --git a/drivers/ras/amd/atl/access.c b/drivers/ras/amd/atl/access.c new file mode 100644 index 000000000000..ee4661ed28ba --- /dev/null +++ b/drivers/ras/amd/atl/access.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * access.c : DF Indirect Access functions + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +/* Protect the PCI config register pairs used for DF indirect access. */ +static DEFINE_MUTEX(df_indirect_mutex); + +/* + * Data Fabric Indirect Access uses FICAA/FICAD. + * + * Fabric Indirect Configuration Access Address (FICAA): constructed based + * on the device's Instance Id and the PCI function and register offset of + * the desired register. + * + * Fabric Indirect Configuration Access Data (FICAD): there are FICAD + * low and high registers but so far only the low register is needed. + * + * Use Instance Id 0xFF to indicate a broadcast read. + */ +#define DF_BROADCAST 0xFF + +#define DF_FICAA_INST_EN BIT(0) +#define DF_FICAA_REG_NUM GENMASK(10, 1) +#define DF_FICAA_FUNC_NUM GENMASK(13, 11) +#define DF_FICAA_INST_ID GENMASK(23, 16) + +#define DF_FICAA_REG_NUM_LEGACY GENMASK(10, 2) + +static u16 get_accessible_node(u16 node) +{ + /* + * On heterogeneous systems, not all AMD Nodes are accessible + * through software-visible registers. The Node ID needs to be + * adjusted for register accesses. But its value should not be + * changed for the translation methods. + */ + if (df_cfg.flags.heterogeneous) { + /* Only Node 0 is accessible on DF3.5 systems. */ + if (df_cfg.rev == DF3p5) + node = 0; + + /* + * Only the first Node in each Socket is accessible on + * DF4.5 systems, and this is visible to software as one + * Fabric per Socket. The Socket ID can be derived from + * the Node ID and global shift values. + */ + if (df_cfg.rev == DF4p5) + node >>= df_cfg.socket_id_shift - df_cfg.node_id_shift; + } + + return node; +} + +static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + u32 ficaa_addr = 0x8C, ficad_addr = 0xB8; + struct pci_dev *F4; + int err = -ENODEV; + u32 ficaa = 0; + + node = get_accessible_node(node); + if (node >= amd_nb_num()) + goto out; + + F4 = node_to_amd_nb(node)->link; + if (!F4) + goto out; + + /* Enable instance-specific access. */ + if (instance_id != DF_BROADCAST) { + ficaa |= FIELD_PREP(DF_FICAA_INST_EN, 1); + ficaa |= FIELD_PREP(DF_FICAA_INST_ID, instance_id); + } + + /* + * The two least-significant bits are masked when inputing the + * register offset to FICAA. + */ + reg >>= 2; + + if (df_cfg.flags.legacy_ficaa) { + ficaa_addr = 0x5C; + ficad_addr = 0x98; + + ficaa |= FIELD_PREP(DF_FICAA_REG_NUM_LEGACY, reg); + } else { + ficaa |= FIELD_PREP(DF_FICAA_REG_NUM, reg); + } + + ficaa |= FIELD_PREP(DF_FICAA_FUNC_NUM, func); + + mutex_lock(&df_indirect_mutex); + + err = pci_write_config_dword(F4, ficaa_addr, ficaa); + if (err) { + pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); + goto out_unlock; + } + + err = pci_read_config_dword(F4, ficad_addr, lo); + if (err) + pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); + + pr_debug("node=%u inst=0x%x func=0x%x reg=0x%x val=0x%x", + node, instance_id, func, reg << 2, *lo); + +out_unlock: + mutex_unlock(&df_indirect_mutex); + +out: + return err; +} + +int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + return __df_indirect_read(node, func, reg, instance_id, lo); +} + +int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) +{ + return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); +} diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c new file mode 100644 index 000000000000..6dc4e06305f7 --- /dev/null +++ b/drivers/ras/amd/atl/core.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * core.c : Module init and base translation functions + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include +#include + +#include "internal.h" + +struct df_config df_cfg __read_mostly; + +static int addr_over_limit(struct addr_ctx *ctx) +{ + u64 dram_limit_addr; + + if (df_cfg.rev >= DF4) + dram_limit_addr = FIELD_GET(DF4_DRAM_LIMIT_ADDR, ctx->map.limit); + else + dram_limit_addr = FIELD_GET(DF2_DRAM_LIMIT_ADDR, ctx->map.limit); + + dram_limit_addr <<= DF_DRAM_BASE_LIMIT_LSB; + dram_limit_addr |= GENMASK(DF_DRAM_BASE_LIMIT_LSB - 1, 0); + + /* Is calculated system address above DRAM limit address? */ + if (ctx->ret_addr > dram_limit_addr) { + atl_debug(ctx, "Calculated address (0x%016llx) > DRAM limit (0x%016llx)", + ctx->ret_addr, dram_limit_addr); + return -EINVAL; + } + + return 0; +} + +static bool legacy_hole_en(struct addr_ctx *ctx) +{ + u32 reg = ctx->map.base; + + if (df_cfg.rev >= DF4) + reg = ctx->map.ctl; + + return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg); +} + +static int add_legacy_hole(struct addr_ctx *ctx) +{ + u32 dram_hole_base; + u8 func = 0; + + if (!legacy_hole_en(ctx)) + return 0; + + if (df_cfg.rev >= DF4) + func = 7; + + if (df_indirect_read_broadcast(ctx->node_id, func, 0x104, &dram_hole_base)) + return -EINVAL; + + dram_hole_base &= DF_DRAM_HOLE_BASE_MASK; + + if (ctx->ret_addr >= dram_hole_base) + ctx->ret_addr += (BIT_ULL(32) - dram_hole_base); + + return 0; +} + +static u64 get_base_addr(struct addr_ctx *ctx) +{ + u64 base_addr; + + if (df_cfg.rev >= DF4) + base_addr = FIELD_GET(DF4_BASE_ADDR, ctx->map.base); + else + base_addr = FIELD_GET(DF2_BASE_ADDR, ctx->map.base); + + return base_addr << DF_DRAM_BASE_LIMIT_LSB; +} + +static int add_base_and_hole(struct addr_ctx *ctx) +{ + ctx->ret_addr += get_base_addr(ctx); + + if (add_legacy_hole(ctx)) + return -EINVAL; + + return 0; +} + +static bool late_hole_remove(struct addr_ctx *ctx) +{ + if (df_cfg.rev == DF3p5) + return true; + + if (df_cfg.rev == DF4) + return true; + + if (ctx->map.intlv_mode == DF3_6CHAN) + return true; + + return false; +} + +unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr) +{ + struct addr_ctx ctx; + + if (df_cfg.rev == UNKNOWN) + return -EINVAL; + + memset(&ctx, 0, sizeof(ctx)); + + /* Start from the normalized address */ + ctx.ret_addr = addr; + ctx.inst_id = coh_st_inst_id; + + ctx.inputs.norm_addr = addr; + ctx.inputs.socket_id = socket_id; + ctx.inputs.die_id = die_id; + ctx.inputs.coh_st_inst_id = coh_st_inst_id; + + if (determine_node_id(&ctx, socket_id, die_id)) + return -EINVAL; + + if (get_address_map(&ctx)) + return -EINVAL; + + if (denormalize_address(&ctx)) + return -EINVAL; + + if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx)) + return -EINVAL; + + if (dehash_address(&ctx)) + return -EINVAL; + + if (late_hole_remove(&ctx) && add_base_and_hole(&ctx)) + return -EINVAL; + + if (addr_over_limit(&ctx)) + return -EINVAL; + + return ctx.ret_addr; +} + +static void check_for_legacy_df_access(void) +{ + /* + * All Zen-based systems before Family 19h use the legacy + * DF Indirect Access (FICAA/FICAD) offsets. + */ + if (boot_cpu_data.x86 < 0x19) { + df_cfg.flags.legacy_ficaa = true; + return; + } + + /* All systems after Family 19h use the current offsets. */ + if (boot_cpu_data.x86 > 0x19) + return; + + /* Some Family 19h systems use the legacy offsets. */ + switch (boot_cpu_data.x86_model) { + case 0x00 ... 0x0f: + case 0x20 ... 0x5f: + df_cfg.flags.legacy_ficaa = true; + } +} + +/* + * This library provides functionality for AMD-based systems with a Data Fabric. + * The set of systems with a Data Fabric is equivalent to the set of Zen-based systems + * and the set of systems with the Scalable MCA feature at this time. However, these + * are technically independent things. + * + * It's possible to match on the PCI IDs of the Data Fabric devices, but this will be + * an ever expanding list. Instead, match on the SMCA and Zen features to cover all + * relevant systems. + */ +static const struct x86_cpu_id amd_atl_cpuids[] = { + X86_MATCH_FEATURE(X86_FEATURE_SMCA, NULL), + X86_MATCH_FEATURE(X86_FEATURE_ZEN, NULL), + { } +}; +MODULE_DEVICE_TABLE(x86cpu, amd_atl_cpuids); + +static int __init amd_atl_init(void) +{ + if (!x86_match_cpu(amd_atl_cpuids)) + return -ENODEV; + + if (!amd_nb_num()) + return -ENODEV; + + check_for_legacy_df_access(); + + if (get_df_system_info()) + return -ENODEV; + + /* Increment this module's recount so that it can't be easily unloaded. */ + __module_get(THIS_MODULE); + amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr); + + pr_info("AMD Address Translation Library initialized"); + return 0; +} + +/* + * Exit function is only needed for testing and debug. Module unload must be + * forced to override refcount check. + */ +static void __exit amd_atl_exit(void) +{ + amd_atl_unregister_decoder(); +} + +module_init(amd_atl_init); +module_exit(amd_atl_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c new file mode 100644 index 000000000000..4ea46262c4f5 --- /dev/null +++ b/drivers/ras/amd/atl/dehash.c @@ -0,0 +1,500 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * dehash.c : Functions to account for hashing bits + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +/* + * Verify the interleave bits are correct in the different interleaving + * settings. + * + * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the + * respective interleaving is disabled. + */ +static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2, + u8 num_intlv_dies, u8 num_intlv_sockets) +{ + if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) { + pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); + return false; + } + + if (ctx->map.num_intlv_dies > num_intlv_dies) { + pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies); + return false; + } + + if (ctx->map.num_intlv_sockets > num_intlv_sockets) { + pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets); + return false; + } + + return true; +} + +static int df2_dehash_addr(struct addr_ctx *ctx) +{ + u8 hashed_bit, intlv_bit, intlv_bit_pos; + + if (!map_bits_valid(ctx, 8, 9, 1, 1)) + return -EINVAL; + + intlv_bit_pos = ctx->map.intlv_bit_pos; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(12), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr); + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + return 0; +} + +static int df3_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; + u8 hashed_bit, intlv_bit, intlv_bit_pos; + + if (!map_bits_valid(ctx, 8, 9, 1, 1)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit_pos = ctx->map.intlv_bit_pos; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + /* Calculation complete for 2 channels. Continue for 4 and 8 channels. */ + if (ctx->map.intlv_mode == DF3_COD4_2CHAN_HASH) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + + /* Calculation complete for 4 channels. Continue for 8 channels. */ + if (ctx->map.intlv_mode == DF3_COD2_4CHAN_HASH) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + + return 0; +} + +static int df3_6chan_dehash_addr(struct addr_ctx *ctx) +{ + u8 intlv_bit_pos = ctx->map.intlv_bit_pos; + u8 hashed_bit, intlv_bit, num_intlv_bits; + bool hash_ctl_2M, hash_ctl_1G; + + if (ctx->map.intlv_mode != DF3_6CHAN) { + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + num_intlv_bits = ilog2(ctx->map.num_intlv_chan) + 1; + + hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= !!(BIT_ULL(intlv_bit_pos + num_intlv_bits) & ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + intlv_bit_pos++; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + intlv_bit_pos++; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + return 0; +} + +static int df4_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; + u8 hashed_bit, intlv_bit; + + if (!map_bits_valid(ctx, 8, 8, 1, 2)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit = FIELD_GET(BIT_ULL(8), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (ctx->map.num_intlv_sockets == 1) + hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(8); + + /* + * Hashing is possible with socket interleaving, so check the total number + * of channels in the system rather than DRAM map interleaving mode. + * + * Calculation complete for 2 channels. Continue for 4, 8, and 16 channels. + */ + if (ctx->map.total_intlv_chan <= 2) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + + /* Calculation complete for 4 channels. Continue for 8 and 16 channels. */ + if (ctx->map.total_intlv_chan <= 4) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + + /* Calculation complete for 8 channels. Continue for 16 channels. */ + if (ctx->map.total_intlv_chan <= 8) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(19), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(24), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(33), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(14); + + return 0; +} + +static int df4p5_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; + u8 hashed_bit, intlv_bit; + u64 rehash_vector; + + if (!map_bits_valid(ctx, 8, 8, 1, 2)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); + + /* + * Generate a unique address to determine which bits + * need to be dehashed. + * + * Start with a contiguous bitmask for the total + * number of channels starting at bit 8. + * + * Then make a gap in the proper place based on + * interleave mode. + */ + rehash_vector = ctx->map.total_intlv_chan - 1; + rehash_vector <<= 8; + + if (ctx->map.intlv_mode == DF4p5_NPS2_4CHAN_1K_HASH || + ctx->map.intlv_mode == DF4p5_NPS1_8CHAN_1K_HASH || + ctx->map.intlv_mode == DF4p5_NPS1_16CHAN_1K_HASH) + rehash_vector = expand_bits(10, 2, rehash_vector); + else + rehash_vector = expand_bits(9, 3, rehash_vector); + + if (rehash_vector & BIT_ULL(8)) { + intlv_bit = FIELD_GET(BIT_ULL(8), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(40), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(8); + } + + if (rehash_vector & BIT_ULL(9)) { + intlv_bit = FIELD_GET(BIT_ULL(9), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(41), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(9); + } + + if (rehash_vector & BIT_ULL(12)) { + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(42), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + } + + if (rehash_vector & BIT_ULL(13)) { + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(19), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(24), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(33), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(43), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + } + + if (rehash_vector & BIT_ULL(14)) { + intlv_bit = FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(20), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(25), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(34), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(44), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(14); + } + + return 0; +} + +/* + * MI300 hash bits + * 4K 64K 2M 1G 1T 1T + * COH_ST_Select[0] = XOR of addr{8, 12, 15, 22, 29, 36, 43} + * COH_ST_Select[1] = XOR of addr{9, 13, 16, 23, 30, 37, 44} + * COH_ST_Select[2] = XOR of addr{10, 14, 17, 24, 31, 38, 45} + * COH_ST_Select[3] = XOR of addr{11, 18, 25, 32, 39, 46} + * COH_ST_Select[4] = XOR of addr{14, 19, 26, 33, 40, 47} aka Stack + * DieID[0] = XOR of addr{12, 20, 27, 34, 41 } + * DieID[1] = XOR of addr{13, 21, 28, 35, 42 } + */ +static int mi300_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_4k, hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; + bool hashed_bit, intlv_bit, test_bit; + u8 num_intlv_bits, base_bit, i; + + if (!map_bits_valid(ctx, 8, 8, 4, 1)) + return -EINVAL; + + hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl); + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); + + /* Channel bits */ + num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + + for (i = 0; i < num_intlv_bits; i++) { + base_bit = 8 + i; + + /* COH_ST_Select[4] jumps to a base bit of 14. */ + if (i == 4) + base_bit = 14; + + intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr; + + hashed_bit = intlv_bit; + + /* 4k hash bit only applies to the first 3 bits. */ + if (i <= 2) { + test_bit = BIT_ULL(12 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_4k; + } + + /* Use temporary 'test_bit' value to avoid Sparse warnings. */ + test_bit = BIT_ULL(15 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_64k; + test_bit = BIT_ULL(22 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_2M; + test_bit = BIT_ULL(29 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1G; + test_bit = BIT_ULL(36 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + test_bit = BIT_ULL(43 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(base_bit); + } + + /* Die bits */ + num_intlv_bits = ilog2(ctx->map.num_intlv_dies); + + for (i = 0; i < num_intlv_bits; i++) { + base_bit = 12 + i; + + intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr; + + hashed_bit = intlv_bit; + + test_bit = BIT_ULL(20 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_64k; + test_bit = BIT_ULL(27 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_2M; + test_bit = BIT_ULL(34 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1G; + test_bit = BIT_ULL(41 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(base_bit); + } + + return 0; +} + +int dehash_address(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + /* No hashing cases. */ + case NONE: + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + /* Hashing bits handled earlier during CS ID calculation. */ + case DF4_NPS4_3CHAN_HASH: + case DF4_NPS2_5CHAN_HASH: + case DF4_NPS2_6CHAN_HASH: + case DF4_NPS1_10CHAN_HASH: + case DF4_NPS1_12CHAN_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS0_24CHAN_2K_HASH: + /* No hash physical address bits, so nothing to do. */ + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + return 0; + + case DF2_2CHAN_HASH: + return df2_dehash_addr(ctx); + + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + return df3_dehash_addr(ctx); + + case DF3_6CHAN: + return df3_6chan_dehash_addr(ctx); + + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + return df4_dehash_addr(ctx); + + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return df4p5_dehash_addr(ctx); + + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return mi300_dehash_addr(ctx); + + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } +} diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c new file mode 100644 index 000000000000..e279224288d6 --- /dev/null +++ b/drivers/ras/amd/atl/denormalize.c @@ -0,0 +1,718 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * denormalize.c : Functions to account for interleaving bits + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +/* + * Returns the Destination Fabric ID. This is the first (lowest) + * COH_ST Fabric ID used within a DRAM Address map. + */ +static u16 get_dst_fabric_id(struct addr_ctx *ctx) +{ + switch (df_cfg.rev) { + case DF2: return FIELD_GET(DF2_DST_FABRIC_ID, ctx->map.limit); + case DF3: return FIELD_GET(DF3_DST_FABRIC_ID, ctx->map.limit); + case DF3p5: return FIELD_GET(DF3p5_DST_FABRIC_ID, ctx->map.limit); + case DF4: return FIELD_GET(DF4_DST_FABRIC_ID, ctx->map.ctl); + case DF4p5: return FIELD_GET(DF4p5_DST_FABRIC_ID, ctx->map.ctl); + default: + atl_debug_on_bad_df_rev(); + return 0; + } +} + +/* + * Make a contiguous gap in address for N bits starting at bit P. + * + * Example: + * address bits: [20:0] + * # of interleave bits (n): 3 + * starting interleave bit (p): 8 + * + * expanded address bits: [20+n : n+p][n+p-1 : p][p-1 : 0] + * [23 : 11][10 : 8][7 : 0] + */ +static u64 make_space_for_coh_st_id_at_intlv_bit(struct addr_ctx *ctx) +{ + return expand_bits(ctx->map.intlv_bit_pos, + ctx->map.total_intlv_bits, + ctx->ret_addr); +} + +/* + * Make two gaps in address for N bits. + * First gap is a single bit at bit P. + * Second gap is the remaining N-1 bits at bit 12. + * + * Example: + * address bits: [20:0] + * # of interleave bits (n): 3 + * starting interleave bit (p): 8 + * + * First gap + * expanded address bits: [20+1 : p+1][p][p-1 : 0] + * [21 : 9][8][7 : 0] + * + * Second gap uses result from first. + * r = n - 1; remaining interleave bits + * expanded address bits: [21+r : 12+r][12+r-1: 12][11 : 0] + * [23 : 14][13 : 12][11 : 0] + */ +static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx) +{ + /* Make a single space at the interleave bit. */ + u64 denorm_addr = expand_bits(ctx->map.intlv_bit_pos, 1, ctx->ret_addr); + + /* Done if there's only a single interleave bit. */ + if (ctx->map.total_intlv_bits <= 1) + return denorm_addr; + + /* Make spaces for the remaining interleave bits starting at bit 12. */ + return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr); +} + +/* + * Make space for CS ID at bits [14:8] as follows: + * + * 8 channels -> bits [10:8] + * 16 channels -> bits [11:8] + * 32 channels -> bits [14,11:8] + * + * 1 die -> N/A + * 2 dies -> bit [12] + * 4 dies -> bits [13:12] + */ +static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx) +{ + u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + u64 denorm_addr; + + if (ctx->map.intlv_bit_pos != 8) { + pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); + return ~0ULL; + } + + /* Channel bits. Covers up to 4 bits at [11:8]. */ + denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr); + + /* Die bits. Always starts at [12]. */ + denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr); + + /* Additional channel bit at [14]. */ + if (num_intlv_bits > 4) + denorm_addr = expand_bits(14, 1, denorm_addr); + + return denorm_addr; +} + +/* + * Take the current calculated address and shift enough bits in the middle + * to make a gap where the interleave bits will be inserted. + */ +static u64 make_space_for_coh_st_id(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + case DF2_2CHAN_HASH: + return make_space_for_coh_st_id_at_intlv_bit(ctx); + + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return make_space_for_coh_st_id_split_2_1(ctx); + + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return make_space_for_coh_st_id_mi300(ctx); + + default: + atl_debug_on_bad_intlv_mode(ctx); + return ~0ULL; + } +} + +static u16 get_coh_st_id_df2(struct addr_ctx *ctx) +{ + u8 num_socket_intlv_bits = ilog2(ctx->map.num_intlv_sockets); + u8 num_die_intlv_bits = ilog2(ctx->map.num_intlv_dies); + u8 num_intlv_bits; + u16 coh_st_id, mask; + + coh_st_id = ctx->coh_st_fabric_id - get_dst_fabric_id(ctx); + + /* Channel interleave bits */ + num_intlv_bits = order_base_2(ctx->map.num_intlv_chan); + mask = GENMASK(num_intlv_bits - 1, 0); + coh_st_id &= mask; + + /* Die interleave bits */ + if (num_die_intlv_bits) { + u16 die_bits; + + mask = GENMASK(num_die_intlv_bits - 1, 0); + die_bits = ctx->coh_st_fabric_id & df_cfg.die_id_mask; + die_bits >>= df_cfg.die_id_shift; + + coh_st_id |= (die_bits & mask) << num_intlv_bits; + num_intlv_bits += num_die_intlv_bits; + } + + /* Socket interleave bits */ + if (num_socket_intlv_bits) { + u16 socket_bits; + + mask = GENMASK(num_socket_intlv_bits - 1, 0); + socket_bits = ctx->coh_st_fabric_id & df_cfg.socket_id_mask; + socket_bits >>= df_cfg.socket_id_shift; + + coh_st_id |= (socket_bits & mask) << num_intlv_bits; + } + + return coh_st_id; +} + +static u16 get_coh_st_id_df4(struct addr_ctx *ctx) +{ + /* + * Start with the original component mask and the number of interleave + * bits for the channels in this map. + */ + u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + u16 mask = df_cfg.component_id_mask; + + u16 socket_bits; + + /* Set the derived Coherent Station ID to the input Coherent Station Fabric ID. */ + u16 coh_st_id = ctx->coh_st_fabric_id & mask; + + /* + * Subtract the "base" Destination Fabric ID. + * This accounts for systems with disabled Coherent Stations. + */ + coh_st_id -= get_dst_fabric_id(ctx) & mask; + + /* + * Generate and use a new mask based on the number of bits + * needed for channel interleaving in this map. + */ + mask = GENMASK(num_intlv_bits - 1, 0); + coh_st_id &= mask; + + /* Done if socket interleaving is not enabled. */ + if (ctx->map.num_intlv_sockets <= 1) + return coh_st_id; + + /* + * Figure out how many bits are needed for the number of + * interleaved sockets. And shift the derived Coherent Station ID to account + * for these. + */ + num_intlv_bits = ilog2(ctx->map.num_intlv_sockets); + coh_st_id <<= num_intlv_bits; + + /* Generate a new mask for the socket interleaving bits. */ + mask = GENMASK(num_intlv_bits - 1, 0); + + /* Get the socket interleave bits from the original Coherent Station Fabric ID. */ + socket_bits = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask) >> df_cfg.socket_id_shift; + + /* Apply the appropriate socket bits to the derived Coherent Station ID. */ + coh_st_id |= socket_bits & mask; + + return coh_st_id; +} + +/* + * MI300 hash has: + * (C)hannel[3:0] = coh_st_id[3:0] + * (S)tack[0] = coh_st_id[4] + * (D)ie[1:0] = coh_st_id[6:5] + * + * Hashed coh_st_id is swizzled so that Stack bit is at the end. + * coh_st_id = SDDCCCC + */ +static u16 get_coh_st_id_mi300(struct addr_ctx *ctx) +{ + u8 channel_bits, die_bits, stack_bit; + u16 die_id; + + /* Subtract the "base" Destination Fabric ID. */ + ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx); + + die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift; + + channel_bits = FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id); + stack_bit = FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6; + die_bits = die_id << 4; + + return stack_bit | die_bits | channel_bits; +} + +/* + * Derive the correct Coherent Station ID that represents the interleave bits + * used within the system physical address. This accounts for the + * interleave mode, number of interleaved channels/dies/sockets, and + * other system/mode-specific bit swizzling. + * + * Returns: Coherent Station ID on success. + * All bits set on error. + */ +static u16 calculate_coh_st_id(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + case DF2_2CHAN_HASH: + return get_coh_st_id_df2(ctx); + + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return get_coh_st_id_df4(ctx); + + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return get_coh_st_id_mi300(ctx); + + /* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */ + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_1K_HASH: + return ctx->coh_st_fabric_id - get_dst_fabric_id(ctx); + + default: + atl_debug_on_bad_intlv_mode(ctx); + return ~0; + } +} + +static u64 insert_coh_st_id_at_intlv_bit(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + return denorm_addr | (coh_st_id << ctx->map.intlv_bit_pos); +} + +static u64 insert_coh_st_id_split_2_1(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + /* Insert coh_st_id[0] at the interleave bit. */ + denorm_addr |= (coh_st_id & BIT(0)) << ctx->map.intlv_bit_pos; + + /* Insert coh_st_id[2:1] at bit 12. */ + denorm_addr |= (coh_st_id & GENMASK(2, 1)) << 11; + + return denorm_addr; +} + +static u64 insert_coh_st_id_split_2_2(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + /* Insert coh_st_id[1:0] at bit 8. */ + denorm_addr |= (coh_st_id & GENMASK(1, 0)) << 8; + + /* + * Insert coh_st_id[n:2] at bit 12. 'n' could be 2 or 3. + * Grab both because bit 3 will be clear if unused. + */ + denorm_addr |= (coh_st_id & GENMASK(3, 2)) << 10; + + return denorm_addr; +} + +static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + switch (ctx->map.intlv_mode) { + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + case DF2_2CHAN_HASH: + return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id); + + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return insert_coh_st_id_split_2_1(ctx, denorm_addr, coh_st_id); + + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_1K_HASH: + return insert_coh_st_id_split_2_2(ctx, denorm_addr, coh_st_id); + + default: + atl_debug_on_bad_intlv_mode(ctx); + return ~0ULL; + } +} + +/* + * MI300 systems have a fixed, hardware-defined physical-to-logical + * Coherent Station mapping. The Remap registers are not used. + */ +static const u16 phy_to_log_coh_st_map_mi300[] = { + 12, 13, 14, 15, + 8, 9, 10, 11, + 4, 5, 6, 7, + 0, 1, 2, 3, + 28, 29, 30, 31, + 24, 25, 26, 27, + 20, 21, 22, 23, + 16, 17, 18, 19, +}; + +static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx) +{ + if (ctx->inst_id >= ARRAY_SIZE(phy_to_log_coh_st_map_mi300)) { + atl_debug(ctx, "Instance ID out of range"); + return ~0; + } + + return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift); +} + +static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) +{ + u16 component_id, log_fabric_id; + + /* Start with the physical COH_ST Fabric ID. */ + u16 phys_fabric_id = ctx->coh_st_fabric_id; + + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return get_logical_coh_st_fabric_id_mi300(ctx); + + /* Skip logical ID lookup if remapping is disabled. */ + if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) && + ctx->map.intlv_mode != DF3_6CHAN) + return phys_fabric_id; + + /* Mask off the Node ID bits to get the "local" Component ID. */ + component_id = phys_fabric_id & df_cfg.component_id_mask; + + /* + * Search the list of logical Component IDs for the one that + * matches this physical Component ID. + */ + for (log_fabric_id = 0; log_fabric_id < MAX_COH_ST_CHANNELS; log_fabric_id++) { + if (ctx->map.remap_array[log_fabric_id] == component_id) + break; + } + + if (log_fabric_id == MAX_COH_ST_CHANNELS) + atl_debug(ctx, "COH_ST remap entry not found for 0x%x", + log_fabric_id); + + /* Get the Node ID bits from the physical and apply to the logical. */ + return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id; +} + +static int denorm_addr_common(struct addr_ctx *ctx) +{ + u64 denorm_addr; + u16 coh_st_id; + + /* + * Convert the original physical COH_ST Fabric ID to a logical value. + * This is required for non-power-of-two and other interleaving modes. + */ + ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx); + + denorm_addr = make_space_for_coh_st_id(ctx); + coh_st_id = calculate_coh_st_id(ctx); + ctx->ret_addr = insert_coh_st_id(ctx, denorm_addr, coh_st_id); + return 0; +} + +static int denorm_addr_df3_6chan(struct addr_ctx *ctx) +{ + u16 coh_st_id = ctx->coh_st_fabric_id & df_cfg.component_id_mask; + u8 total_intlv_bits = ctx->map.total_intlv_bits; + u8 low_bit, intlv_bit = ctx->map.intlv_bit_pos; + u64 msb_intlv_bits, temp_addr_a, temp_addr_b; + u8 np2_bits = ctx->map.np2_bits; + + if (ctx->map.intlv_mode != DF3_6CHAN) + return -EINVAL; + + /* + * 'np2_bits' holds the number of bits needed to cover the + * amount of memory (rounded up) in this map using 64K chunks. + * + * Example: + * Total memory in map: 6GB + * Rounded up to next power-of-2: 8GB + * Number of 64K chunks: 0x20000 + * np2_bits = log2(# of chunks): 17 + * + * Get the two most-significant interleave bits from the + * input address based on the following: + * + * [15 + np2_bits - total_intlv_bits : 14 + np2_bits - total_intlv_bits] + */ + low_bit = 14 + np2_bits - total_intlv_bits; + msb_intlv_bits = ctx->ret_addr >> low_bit; + msb_intlv_bits &= 0x3; + + /* + * If MSB are 11b, then logical COH_ST ID is 6 or 7. + * Need to adjust based on the mod3 result. + */ + if (msb_intlv_bits == 3) { + u8 addr_mod, phys_addr_msb, msb_coh_st_id; + + /* Get the remaining interleave bits from the input address. */ + temp_addr_b = GENMASK_ULL(low_bit - 1, intlv_bit) & ctx->ret_addr; + temp_addr_b >>= intlv_bit; + + /* Calculate the logical COH_ST offset based on mod3. */ + addr_mod = temp_addr_b % 3; + + /* Get COH_ST ID bits [2:1]. */ + msb_coh_st_id = (coh_st_id >> 1) & 0x3; + + /* Get the bit that starts the physical address bits. */ + phys_addr_msb = (intlv_bit + np2_bits + 1); + phys_addr_msb &= BIT(0); + phys_addr_msb++; + phys_addr_msb *= 3 - addr_mod + msb_coh_st_id; + phys_addr_msb %= 3; + + /* Move the physical address MSB to the correct place. */ + temp_addr_b |= phys_addr_msb << (low_bit - total_intlv_bits - intlv_bit); + + /* Generate a new COH_ST ID as follows: coh_st_id = [1, 1, coh_st_id[0]] */ + coh_st_id &= BIT(0); + coh_st_id |= GENMASK(2, 1); + } else { + temp_addr_b = GENMASK_ULL(63, intlv_bit) & ctx->ret_addr; + temp_addr_b >>= intlv_bit; + } + + temp_addr_a = GENMASK_ULL(intlv_bit - 1, 0) & ctx->ret_addr; + temp_addr_b <<= intlv_bit + total_intlv_bits; + + ctx->ret_addr = temp_addr_a | temp_addr_b; + ctx->ret_addr |= coh_st_id << intlv_bit; + return 0; +} + +static int denorm_addr_df4_np2(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; + u16 group, group_offset, log_coh_st_offset; + unsigned int mod_value, shift_value; + u16 mask = df_cfg.component_id_mask; + u64 temp_addr_a, temp_addr_b; + bool hash_pa8, hashed_bit; + + switch (ctx->map.intlv_mode) { + case DF4_NPS4_3CHAN_HASH: + mod_value = 3; + shift_value = 13; + break; + case DF4_NPS2_6CHAN_HASH: + mod_value = 3; + shift_value = 12; + break; + case DF4_NPS1_12CHAN_HASH: + mod_value = 3; + shift_value = 11; + break; + case DF4_NPS2_5CHAN_HASH: + mod_value = 5; + shift_value = 13; + break; + case DF4_NPS1_10CHAN_HASH: + mod_value = 5; + shift_value = 12; + break; + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + }; + + if (ctx->map.num_intlv_sockets == 1) { + hash_pa8 = BIT_ULL(shift_value) & ctx->ret_addr; + temp_addr_a = remove_bits(shift_value, shift_value, ctx->ret_addr); + } else { + hash_pa8 = ctx->coh_st_fabric_id & df_cfg.socket_id_mask; + temp_addr_a = ctx->ret_addr; + } + + /* Make a gap for the real bit [8]. */ + temp_addr_a = expand_bits(8, 1, temp_addr_a); + + /* Make an additional gap for bits [13:12], as appropriate.*/ + if (ctx->map.intlv_mode == DF4_NPS2_6CHAN_HASH || + ctx->map.intlv_mode == DF4_NPS1_10CHAN_HASH) { + temp_addr_a = expand_bits(13, 1, temp_addr_a); + } else if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) { + temp_addr_a = expand_bits(12, 2, temp_addr_a); + } + + /* Keep bits [13:0]. */ + temp_addr_a &= GENMASK_ULL(13, 0); + + /* Get the appropriate high bits. */ + shift_value += 1 - ilog2(ctx->map.num_intlv_sockets); + temp_addr_b = GENMASK_ULL(63, shift_value) & ctx->ret_addr; + temp_addr_b >>= shift_value; + temp_addr_b *= mod_value; + + /* + * Coherent Stations are divided into groups. + * + * Multiples of 3 (mod3) are divided into quadrants. + * e.g. NP4_3CHAN -> [0, 1, 2] [6, 7, 8] + * [3, 4, 5] [9, 10, 11] + * + * Multiples of 5 (mod5) are divided into sides. + * e.g. NP2_5CHAN -> [0, 1, 2, 3, 4] [5, 6, 7, 8, 9] + */ + + /* + * Calculate the logical offset for the COH_ST within its DRAM Address map. + * e.g. if map includes [5, 6, 7, 8, 9] and target instance is '8', then + * log_coh_st_offset = 8 - 5 = 3 + */ + log_coh_st_offset = (ctx->coh_st_fabric_id & mask) - (get_dst_fabric_id(ctx) & mask); + + /* + * Figure out the group number. + * + * Following above example, + * log_coh_st_offset = 3 + * mod_value = 5 + * group = 3 / 5 = 0 + */ + group = log_coh_st_offset / mod_value; + + /* + * Figure out the offset within the group. + * + * Following above example, + * log_coh_st_offset = 3 + * mod_value = 5 + * group_offset = 3 % 5 = 3 + */ + group_offset = log_coh_st_offset % mod_value; + + /* Adjust group_offset if the hashed bit [8] is set. */ + if (hash_pa8) { + if (!group_offset) + group_offset = mod_value - 1; + else + group_offset--; + } + + /* Add in the group offset to the high bits. */ + temp_addr_b += group_offset; + + /* Shift the high bits to the proper starting position. */ + temp_addr_b <<= 14; + + /* Combine the high and low bits together. */ + ctx->ret_addr = temp_addr_a | temp_addr_b; + + /* Account for hashing here instead of in dehash_address(). */ + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + + hashed_bit = !!hash_pa8; + hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + ctx->ret_addr |= hashed_bit << 8; + + /* Done for 3 and 5 channel. */ + if (ctx->map.intlv_mode == DF4_NPS4_3CHAN_HASH || + ctx->map.intlv_mode == DF4_NPS2_5CHAN_HASH) + return 0; + + /* Select the proper 'group' bit to use for Bit 13. */ + if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) + hashed_bit = !!(group & BIT(1)); + else + hashed_bit = group & BIT(0); + + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + ctx->ret_addr |= hashed_bit << 13; + + /* Done for 6 and 10 channel. */ + if (ctx->map.intlv_mode != DF4_NPS1_12CHAN_HASH) + return 0; + + hashed_bit = group & BIT(0); + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + ctx->ret_addr |= hashed_bit << 12; + return 0; +} + +int denormalize_address(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NONE: + return 0; + case DF4_NPS4_3CHAN_HASH: + case DF4_NPS2_6CHAN_HASH: + case DF4_NPS1_12CHAN_HASH: + case DF4_NPS2_5CHAN_HASH: + case DF4_NPS1_10CHAN_HASH: + return denorm_addr_df4_np2(ctx); + case DF3_6CHAN: + return denorm_addr_df3_6chan(ctx); + default: + return denorm_addr_common(ctx); + } +} diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h new file mode 100644 index 000000000000..5de69e0bb0f9 --- /dev/null +++ b/drivers/ras/amd/atl/internal.h @@ -0,0 +1,306 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Address Translation Library + * + * internal.h : Helper functions and common defines + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#ifndef __AMD_ATL_INTERNAL_H__ +#define __AMD_ATL_INTERNAL_H__ + +#include +#include +#include + +#include + +#include "reg_fields.h" + +/* Maximum possible number of Coherent Stations within a single Data Fabric. */ +#define MAX_COH_ST_CHANNELS 32 + +/* PCI ID for Zen4 Server DF Function 0. */ +#define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022 + +/* PCI IDs for MI300 DF Function 0. */ +#define DF_FUNC0_ID_MI300 0x15281022 + +/* Shift needed for adjusting register values to true values. */ +#define DF_DRAM_BASE_LIMIT_LSB 28 +#define MI300_DRAM_LIMIT_LSB 20 + +enum df_revisions { + UNKNOWN, + DF2, + DF3, + DF3p5, + DF4, + DF4p5, +}; + +/* These are mapped 1:1 to the hardware values. Special cases are set at > 0x20. */ +enum intlv_modes { + NONE = 0x00, + NOHASH_2CHAN = 0x01, + NOHASH_4CHAN = 0x03, + NOHASH_8CHAN = 0x05, + DF3_6CHAN = 0x06, + NOHASH_16CHAN = 0x07, + NOHASH_32CHAN = 0x08, + DF3_COD4_2CHAN_HASH = 0x0C, + DF3_COD2_4CHAN_HASH = 0x0D, + DF3_COD1_8CHAN_HASH = 0x0E, + DF4_NPS4_2CHAN_HASH = 0x10, + DF4_NPS2_4CHAN_HASH = 0x11, + DF4_NPS1_8CHAN_HASH = 0x12, + DF4_NPS4_3CHAN_HASH = 0x13, + DF4_NPS2_6CHAN_HASH = 0x14, + DF4_NPS1_12CHAN_HASH = 0x15, + DF4_NPS2_5CHAN_HASH = 0x16, + DF4_NPS1_10CHAN_HASH = 0x17, + MI3_HASH_8CHAN = 0x18, + MI3_HASH_16CHAN = 0x19, + MI3_HASH_32CHAN = 0x1A, + DF2_2CHAN_HASH = 0x21, + /* DF4.5 modes are all IntLvNumChan + 0x20 */ + DF4p5_NPS1_16CHAN_1K_HASH = 0x2C, + DF4p5_NPS0_24CHAN_1K_HASH = 0x2E, + DF4p5_NPS4_2CHAN_1K_HASH = 0x30, + DF4p5_NPS2_4CHAN_1K_HASH = 0x31, + DF4p5_NPS1_8CHAN_1K_HASH = 0x32, + DF4p5_NPS4_3CHAN_1K_HASH = 0x33, + DF4p5_NPS2_6CHAN_1K_HASH = 0x34, + DF4p5_NPS1_12CHAN_1K_HASH = 0x35, + DF4p5_NPS2_5CHAN_1K_HASH = 0x36, + DF4p5_NPS1_10CHAN_1K_HASH = 0x37, + DF4p5_NPS4_2CHAN_2K_HASH = 0x40, + DF4p5_NPS2_4CHAN_2K_HASH = 0x41, + DF4p5_NPS1_8CHAN_2K_HASH = 0x42, + DF4p5_NPS1_16CHAN_2K_HASH = 0x43, + DF4p5_NPS4_3CHAN_2K_HASH = 0x44, + DF4p5_NPS2_6CHAN_2K_HASH = 0x45, + DF4p5_NPS1_12CHAN_2K_HASH = 0x46, + DF4p5_NPS0_24CHAN_2K_HASH = 0x47, + DF4p5_NPS2_5CHAN_2K_HASH = 0x48, + DF4p5_NPS1_10CHAN_2K_HASH = 0x49, +}; + +struct df_flags { + __u8 legacy_ficaa : 1, + socket_id_shift_quirk : 1, + heterogeneous : 1, + __reserved_0 : 5; +}; + +struct df_config { + enum df_revisions rev; + + /* + * These masks operate on the 16-bit Coherent Station IDs, + * e.g. Instance, Fabric, Destination, etc. + */ + u16 component_id_mask; + u16 die_id_mask; + u16 node_id_mask; + u16 socket_id_mask; + + /* + * Least-significant bit of Node ID portion of the + * system-wide Coherent Station Fabric ID. + */ + u8 node_id_shift; + + /* + * Least-significant bit of Die portion of the Node ID. + * Adjusted to include the Node ID shift in order to apply + * to the Coherent Station Fabric ID. + */ + u8 die_id_shift; + + /* + * Least-significant bit of Socket portion of the Node ID. + * Adjusted to include the Node ID shift in order to apply + * to the Coherent Station Fabric ID. + */ + u8 socket_id_shift; + + /* Number of DRAM Address maps visible in a Coherent Station. */ + u8 num_coh_st_maps; + + /* Global flags to handle special cases. */ + struct df_flags flags; +}; + +extern struct df_config df_cfg; + +struct dram_addr_map { + /* + * Each DRAM Address Map can operate independently + * in different interleaving modes. + */ + enum intlv_modes intlv_mode; + + /* System-wide number for this address map. */ + u8 num; + + /* Raw register values */ + u32 base; + u32 limit; + u32 ctl; + u32 intlv; + + /* + * Logical to Physical Coherent Station Remapping array + * + * Index: Logical Coherent Station Instance ID + * Value: Physical Coherent Station Instance ID + * + * phys_coh_st_inst_id = remap_array[log_coh_st_inst_id] + */ + u8 remap_array[MAX_COH_ST_CHANNELS]; + + /* + * Number of bits covering DRAM Address map 0 + * when interleaving is non-power-of-2. + * + * Used only for DF3_6CHAN. + */ + u8 np2_bits; + + /* Position of the 'interleave bit'. */ + u8 intlv_bit_pos; + /* Number of channels interleaved in this map. */ + u8 num_intlv_chan; + /* Number of dies interleaved in this map. */ + u8 num_intlv_dies; + /* Number of sockets interleaved in this map. */ + u8 num_intlv_sockets; + /* + * Total number of channels interleaved accounting + * for die and socket interleaving. + */ + u8 total_intlv_chan; + /* Total bits needed to cover 'total_intlv_chan'. */ + u8 total_intlv_bits; +}; + +/* Original input values cached for debug printing. */ +struct addr_ctx_inputs { + u64 norm_addr; + u8 socket_id; + u8 die_id; + u8 coh_st_inst_id; +}; + +struct addr_ctx { + u64 ret_addr; + + struct addr_ctx_inputs inputs; + struct dram_addr_map map; + + /* AMD Node ID calculated from Socket and Die IDs. */ + u8 node_id; + + /* + * Coherent Station Instance ID + * Local ID used within a 'node'. + */ + u16 inst_id; + + /* + * Coherent Station Fabric ID + * System-wide ID that includes 'node' bits. + */ + u16 coh_st_fabric_id; +}; + +int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo); +int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo); + +int get_df_system_info(void); +int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num); +int get_addr_hash_mi300(void); + +int get_address_map(struct addr_ctx *ctx); + +int denormalize_address(struct addr_ctx *ctx); +int dehash_address(struct addr_ctx *ctx); + +unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr); +unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err); + +/* + * Make a gap in @data that is @num_bits long starting at @bit_num. + * e.g. data = 11111111'b + * bit_num = 3 + * num_bits = 2 + * result = 1111100111'b + */ +static inline u64 expand_bits(u8 bit_num, u8 num_bits, u64 data) +{ + u64 temp1, temp2; + + if (!num_bits) + return data; + + if (!bit_num) { + WARN_ON_ONCE(num_bits >= BITS_PER_LONG); + return data << num_bits; + } + + WARN_ON_ONCE(bit_num >= BITS_PER_LONG); + + temp1 = data & GENMASK_ULL(bit_num - 1, 0); + + temp2 = data & GENMASK_ULL(63, bit_num); + temp2 <<= num_bits; + + return temp1 | temp2; +} + +/* + * Remove bits in @data between @low_bit and @high_bit inclusive. + * e.g. data = XXXYYZZZ'b + * low_bit = 3 + * high_bit = 4 + * result = XXXZZZ'b + */ +static inline u64 remove_bits(u8 low_bit, u8 high_bit, u64 data) +{ + u64 temp1, temp2; + + WARN_ON_ONCE(high_bit >= BITS_PER_LONG); + WARN_ON_ONCE(low_bit >= BITS_PER_LONG); + WARN_ON_ONCE(low_bit > high_bit); + + if (!low_bit) + return data >> (high_bit++); + + temp1 = GENMASK_ULL(low_bit - 1, 0) & data; + temp2 = GENMASK_ULL(63, high_bit + 1) & data; + temp2 >>= high_bit - low_bit + 1; + + return temp1 | temp2; +} + +#define atl_debug(ctx, fmt, arg...) \ + pr_debug("socket_id=%u die_id=%u coh_st_inst_id=%u norm_addr=0x%016llx: " fmt,\ + (ctx)->inputs.socket_id, (ctx)->inputs.die_id,\ + (ctx)->inputs.coh_st_inst_id, (ctx)->inputs.norm_addr, ##arg) + +static inline void atl_debug_on_bad_df_rev(void) +{ + pr_debug("Unrecognized DF rev: %u", df_cfg.rev); +} + +static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx) +{ + atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode); +} + +#endif /* __AMD_ATL_INTERNAL_H__ */ diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c new file mode 100644 index 000000000000..8b908e8d7495 --- /dev/null +++ b/drivers/ras/amd/atl/map.c @@ -0,0 +1,682 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * map.c : Functions to read and decode DRAM address maps + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +static int df2_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF2_INTLV_NUM_CHAN, ctx->map.base); + + if (ctx->map.intlv_mode == 8) + ctx->map.intlv_mode = DF2_2CHAN_HASH; + + if (ctx->map.intlv_mode != NONE && + ctx->map.intlv_mode != NOHASH_2CHAN && + ctx->map.intlv_mode != DF2_2CHAN_HASH) + return -EINVAL; + + return 0; +} + +static int df3_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF3_INTLV_NUM_CHAN, ctx->map.base); + return 0; +} + +static int df3p5_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF3p5_INTLV_NUM_CHAN, ctx->map.base); + + if (ctx->map.intlv_mode == DF3_6CHAN) + return -EINVAL; + + return 0; +} + +static int df4_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF4_INTLV_NUM_CHAN, ctx->map.intlv); + + if (ctx->map.intlv_mode == DF3_COD4_2CHAN_HASH || + ctx->map.intlv_mode == DF3_COD2_4CHAN_HASH || + ctx->map.intlv_mode == DF3_COD1_8CHAN_HASH || + ctx->map.intlv_mode == DF3_6CHAN) + return -EINVAL; + + return 0; +} + +static int df4p5_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF4p5_INTLV_NUM_CHAN, ctx->map.intlv); + + if (ctx->map.intlv_mode <= NOHASH_32CHAN) + return 0; + + if (ctx->map.intlv_mode >= MI3_HASH_8CHAN && + ctx->map.intlv_mode <= MI3_HASH_32CHAN) + return 0; + + /* + * Modes matching the ranges above are returned as-is. + * + * All other modes are "fixed up" by adding 20h to make a unique value. + */ + ctx->map.intlv_mode += 0x20; + + return 0; +} + +static int get_intlv_mode(struct addr_ctx *ctx) +{ + int ret; + + switch (df_cfg.rev) { + case DF2: + ret = df2_get_intlv_mode(ctx); + break; + case DF3: + ret = df3_get_intlv_mode(ctx); + break; + case DF3p5: + ret = df3p5_get_intlv_mode(ctx); + break; + case DF4: + ret = df4_get_intlv_mode(ctx); + break; + case DF4p5: + ret = df4p5_get_intlv_mode(ctx); + break; + default: + ret = -EINVAL; + } + + if (ret) + atl_debug_on_bad_df_rev(); + + return ret; +} + +static u64 get_hi_addr_offset(u32 reg_dram_offset) +{ + u8 shift = DF_DRAM_BASE_LIMIT_LSB; + u64 hi_addr_offset; + + switch (df_cfg.rev) { + case DF2: + hi_addr_offset = FIELD_GET(DF2_HI_ADDR_OFFSET, reg_dram_offset); + break; + case DF3: + case DF3p5: + hi_addr_offset = FIELD_GET(DF3_HI_ADDR_OFFSET, reg_dram_offset); + break; + case DF4: + case DF4p5: + hi_addr_offset = FIELD_GET(DF4_HI_ADDR_OFFSET, reg_dram_offset); + break; + default: + hi_addr_offset = 0; + atl_debug_on_bad_df_rev(); + } + + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + shift = MI300_DRAM_LIMIT_LSB; + + return hi_addr_offset << shift; +} + +/* + * Returns: 0 if offset is disabled. + * 1 if offset is enabled. + * -EINVAL on error. + */ +static int get_dram_offset(struct addr_ctx *ctx, u64 *norm_offset) +{ + u32 reg_dram_offset; + u8 map_num; + + /* Should not be called for map 0. */ + if (!ctx->map.num) { + atl_debug(ctx, "Trying to find DRAM offset for map 0"); + return -EINVAL; + } + + /* + * DramOffset registers don't exist for map 0, so the base register + * actually refers to map 1. + * Adjust the map_num for the register offsets. + */ + map_num = ctx->map.num - 1; + + if (df_cfg.rev >= DF4) { + /* Read D18F7x140 (DramOffset) */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x140 + (4 * map_num), + ctx->inst_id, ®_dram_offset)) + return -EINVAL; + + } else { + /* Read D18F0x1B4 (DramOffset) */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x1B4 + (4 * map_num), + ctx->inst_id, ®_dram_offset)) + return -EINVAL; + } + + if (!FIELD_GET(DF_HI_ADDR_OFFSET_EN, reg_dram_offset)) + return 0; + + *norm_offset = get_hi_addr_offset(reg_dram_offset); + + return 1; +} + +static int df3_6ch_get_dram_addr_map(struct addr_ctx *ctx) +{ + u16 dst_fabric_id = FIELD_GET(DF3_DST_FABRIC_ID, ctx->map.limit); + u8 i, j, shift = 4, mask = 0xF; + u32 reg, offset = 0x60; + u16 dst_node_id; + + /* Get Socket 1 register. */ + if (dst_fabric_id & df_cfg.socket_id_mask) + offset = 0x68; + + /* Read D18F0x06{0,8} (DF::Skt0CsTargetRemap0)/(DF::Skt0CsTargetRemap1) */ + if (df_indirect_read_broadcast(ctx->node_id, 0, offset, ®)) + return -EINVAL; + + /* Save 8 remap entries. */ + for (i = 0, j = 0; i < 8; i++, j++) + ctx->map.remap_array[i] = (reg >> (j * shift)) & mask; + + dst_node_id = dst_fabric_id & df_cfg.node_id_mask; + dst_node_id >>= df_cfg.node_id_shift; + + /* Read D18F2x090 (DF::Np2ChannelConfig) */ + if (df_indirect_read_broadcast(dst_node_id, 2, 0x90, ®)) + return -EINVAL; + + ctx->map.np2_bits = FIELD_GET(DF_LOG2_ADDR_64K_SPACE0, reg); + return 0; +} + +static int df2_get_dram_addr_map(struct addr_ctx *ctx) +{ + /* Read D18F0x110 (DramBaseAddress). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x110 + (8 * ctx->map.num), + ctx->inst_id, &ctx->map.base)) + return -EINVAL; + + /* Read D18F0x114 (DramLimitAddress). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x114 + (8 * ctx->map.num), + ctx->inst_id, &ctx->map.limit)) + return -EINVAL; + + return 0; +} + +static int df3_get_dram_addr_map(struct addr_ctx *ctx) +{ + if (df2_get_dram_addr_map(ctx)) + return -EINVAL; + + /* Read D18F0x3F8 (DfGlobalCtl). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x3F8, + ctx->inst_id, &ctx->map.ctl)) + return -EINVAL; + + return 0; +} + +static int df4_get_dram_addr_map(struct addr_ctx *ctx) +{ + u8 remap_sel, i, j, shift = 4, mask = 0xF; + u32 remap_reg; + + /* Read D18F7xE00 (DramBaseAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE00 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.base)) + return -EINVAL; + + /* Read D18F7xE04 (DramLimitAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE04 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.limit)) + return -EINVAL; + + /* Read D18F7xE08 (DramAddressCtl). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE08 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.ctl)) + return -EINVAL; + + /* Read D18F7xE0C (DramAddressIntlv). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE0C + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.intlv)) + return -EINVAL; + + /* Check if Remap Enable bit is valid. */ + if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl)) + return 0; + + /* Fill with bogus values, because '0' is a valid value. */ + memset(&ctx->map.remap_array, 0xFF, sizeof(ctx->map.remap_array)); + + /* Get Remap registers. */ + remap_sel = FIELD_GET(DF4_REMAP_SEL, ctx->map.ctl); + + /* Read D18F7x180 (CsTargetRemap0A). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x180 + (8 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save first 8 remap entries. */ + for (i = 0, j = 0; i < 8; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + /* Read D18F7x184 (CsTargetRemap0B). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x184 + (8 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save next 8 remap entries. */ + for (i = 8, j = 0; i < 16; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + return 0; +} + +static int df4p5_get_dram_addr_map(struct addr_ctx *ctx) +{ + u8 remap_sel, i, j, shift = 5, mask = 0x1F; + u32 remap_reg; + + /* Read D18F7x200 (DramBaseAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x200 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.base)) + return -EINVAL; + + /* Read D18F7x204 (DramLimitAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x204 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.limit)) + return -EINVAL; + + /* Read D18F7x208 (DramAddressCtl). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x208 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.ctl)) + return -EINVAL; + + /* Read D18F7x20C (DramAddressIntlv). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x20C + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.intlv)) + return -EINVAL; + + /* Check if Remap Enable bit is valid. */ + if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl)) + return 0; + + /* Fill with bogus values, because '0' is a valid value. */ + memset(&ctx->map.remap_array, 0xFF, sizeof(ctx->map.remap_array)); + + /* Get Remap registers. */ + remap_sel = FIELD_GET(DF4p5_REMAP_SEL, ctx->map.ctl); + + /* Read D18F7x180 (CsTargetRemap0A). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x180 + (24 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save first 6 remap entries. */ + for (i = 0, j = 0; i < 6; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + /* Read D18F7x184 (CsTargetRemap0B). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x184 + (24 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save next 6 remap entries. */ + for (i = 6, j = 0; i < 12; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + /* Read D18F7x188 (CsTargetRemap0C). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x188 + (24 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save next 6 remap entries. */ + for (i = 12, j = 0; i < 18; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + return 0; +} + +static int get_dram_addr_map(struct addr_ctx *ctx) +{ + switch (df_cfg.rev) { + case DF2: return df2_get_dram_addr_map(ctx); + case DF3: + case DF3p5: return df3_get_dram_addr_map(ctx); + case DF4: return df4_get_dram_addr_map(ctx); + case DF4p5: return df4p5_get_dram_addr_map(ctx); + default: + atl_debug_on_bad_df_rev(); + return -EINVAL; + } +} + +static int get_coh_st_fabric_id(struct addr_ctx *ctx) +{ + u32 reg; + + /* + * On MI300 systems, the Coherent Station Fabric ID is derived + * later. And it does not depend on the register value. + */ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return 0; + + /* Read D18F0x50 (FabricBlockInstanceInformation3). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x50, ctx->inst_id, ®)) + return -EINVAL; + + if (df_cfg.rev < DF4p5) + ctx->coh_st_fabric_id = FIELD_GET(DF2_COH_ST_FABRIC_ID, reg); + else + ctx->coh_st_fabric_id = FIELD_GET(DF4p5_COH_ST_FABRIC_ID, reg); + + return 0; +} + +static int find_normalized_offset(struct addr_ctx *ctx, u64 *norm_offset) +{ + u64 last_offset = 0; + int ret; + + for (ctx->map.num = 1; ctx->map.num < df_cfg.num_coh_st_maps; ctx->map.num++) { + ret = get_dram_offset(ctx, norm_offset); + if (ret < 0) + return ret; + + /* Continue search if this map's offset is not enabled. */ + if (!ret) + continue; + + /* Enabled offsets should never be 0. */ + if (*norm_offset == 0) { + atl_debug(ctx, "Enabled map %u offset is 0", ctx->map.num); + return -EINVAL; + } + + /* Offsets should always increase from one map to the next. */ + if (*norm_offset <= last_offset) { + atl_debug(ctx, "Map %u offset (0x%016llx) <= previous (0x%016llx)", + ctx->map.num, *norm_offset, last_offset); + return -EINVAL; + } + + /* Match if this map's offset is less than the current calculated address. */ + if (ctx->ret_addr >= *norm_offset) + break; + + last_offset = *norm_offset; + } + + /* + * Finished search without finding a match. + * Reset to map 0 and no offset. + */ + if (ctx->map.num >= df_cfg.num_coh_st_maps) { + ctx->map.num = 0; + *norm_offset = 0; + } + + return 0; +} + +static bool valid_map(struct addr_ctx *ctx) +{ + if (df_cfg.rev >= DF4) + return FIELD_GET(DF_ADDR_RANGE_VAL, ctx->map.ctl); + else + return FIELD_GET(DF_ADDR_RANGE_VAL, ctx->map.base); +} + +static int get_address_map_common(struct addr_ctx *ctx) +{ + u64 norm_offset = 0; + + if (get_coh_st_fabric_id(ctx)) + return -EINVAL; + + if (find_normalized_offset(ctx, &norm_offset)) + return -EINVAL; + + if (get_dram_addr_map(ctx)) + return -EINVAL; + + if (!valid_map(ctx)) + return -EINVAL; + + ctx->ret_addr -= norm_offset; + + return 0; +} + +static u8 get_num_intlv_chan(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NONE: + return 1; + case NOHASH_2CHAN: + case DF2_2CHAN_HASH: + case DF3_COD4_2CHAN_HASH: + case DF4_NPS4_2CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + return 2; + case DF4_NPS4_3CHAN_HASH: + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + return 3; + case NOHASH_4CHAN: + case DF3_COD2_4CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + return 4; + case DF4_NPS2_5CHAN_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + return 5; + case DF3_6CHAN: + case DF4_NPS2_6CHAN_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + return 6; + case NOHASH_8CHAN: + case DF3_COD1_8CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case MI3_HASH_8CHAN: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + return 8; + case DF4_NPS1_10CHAN_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + return 10; + case DF4_NPS1_12CHAN_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + return 12; + case NOHASH_16CHAN: + case MI3_HASH_16CHAN: + case DF4p5_NPS1_16CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return 16; + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS0_24CHAN_2K_HASH: + return 24; + case NOHASH_32CHAN: + case MI3_HASH_32CHAN: + return 32; + default: + atl_debug_on_bad_intlv_mode(ctx); + return 0; + } +} + +static void calculate_intlv_bits(struct addr_ctx *ctx) +{ + ctx->map.num_intlv_chan = get_num_intlv_chan(ctx); + + ctx->map.total_intlv_chan = ctx->map.num_intlv_chan; + ctx->map.total_intlv_chan *= ctx->map.num_intlv_dies; + ctx->map.total_intlv_chan *= ctx->map.num_intlv_sockets; + + /* + * Get the number of bits needed to cover this many channels. + * order_base_2() rounds up automatically. + */ + ctx->map.total_intlv_bits = order_base_2(ctx->map.total_intlv_chan); +} + +static u8 get_intlv_bit_pos(struct addr_ctx *ctx) +{ + u8 addr_sel = 0; + + switch (df_cfg.rev) { + case DF2: + addr_sel = FIELD_GET(DF2_INTLV_ADDR_SEL, ctx->map.base); + break; + case DF3: + case DF3p5: + addr_sel = FIELD_GET(DF3_INTLV_ADDR_SEL, ctx->map.base); + break; + case DF4: + case DF4p5: + addr_sel = FIELD_GET(DF4_INTLV_ADDR_SEL, ctx->map.intlv); + break; + default: + atl_debug_on_bad_df_rev(); + break; + } + + /* Add '8' to get the 'interleave bit position'. */ + return addr_sel + 8; +} + +static u8 get_num_intlv_dies(struct addr_ctx *ctx) +{ + u8 dies = 0; + + switch (df_cfg.rev) { + case DF2: + dies = FIELD_GET(DF2_INTLV_NUM_DIES, ctx->map.limit); + break; + case DF3: + dies = FIELD_GET(DF3_INTLV_NUM_DIES, ctx->map.base); + break; + case DF3p5: + dies = FIELD_GET(DF3p5_INTLV_NUM_DIES, ctx->map.base); + break; + case DF4: + case DF4p5: + dies = FIELD_GET(DF4_INTLV_NUM_DIES, ctx->map.intlv); + break; + default: + atl_debug_on_bad_df_rev(); + break; + } + + /* Register value is log2, e.g. 0 -> 1 die, 1 -> 2 dies, etc. */ + return 1 << dies; +} + +static u8 get_num_intlv_sockets(struct addr_ctx *ctx) +{ + u8 sockets = 0; + + switch (df_cfg.rev) { + case DF2: + sockets = FIELD_GET(DF2_INTLV_NUM_SOCKETS, ctx->map.limit); + break; + case DF3: + case DF3p5: + sockets = FIELD_GET(DF2_INTLV_NUM_SOCKETS, ctx->map.base); + break; + case DF4: + case DF4p5: + sockets = FIELD_GET(DF4_INTLV_NUM_SOCKETS, ctx->map.intlv); + break; + default: + atl_debug_on_bad_df_rev(); + break; + } + + /* Register value is log2, e.g. 0 -> 1 sockets, 1 -> 2 sockets, etc. */ + return 1 << sockets; +} + +static int get_global_map_data(struct addr_ctx *ctx) +{ + if (get_intlv_mode(ctx)) + return -EINVAL; + + if (ctx->map.intlv_mode == DF3_6CHAN && + df3_6ch_get_dram_addr_map(ctx)) + return -EINVAL; + + ctx->map.intlv_bit_pos = get_intlv_bit_pos(ctx); + ctx->map.num_intlv_dies = get_num_intlv_dies(ctx); + ctx->map.num_intlv_sockets = get_num_intlv_sockets(ctx); + calculate_intlv_bits(ctx); + + return 0; +} + +static void dump_address_map(struct dram_addr_map *map) +{ + u8 i; + + pr_debug("intlv_mode=0x%x", map->intlv_mode); + pr_debug("num=0x%x", map->num); + pr_debug("base=0x%x", map->base); + pr_debug("limit=0x%x", map->limit); + pr_debug("ctl=0x%x", map->ctl); + pr_debug("intlv=0x%x", map->intlv); + + for (i = 0; i < MAX_COH_ST_CHANNELS; i++) + pr_debug("remap_array[%u]=0x%x", i, map->remap_array[i]); + + pr_debug("intlv_bit_pos=%u", map->intlv_bit_pos); + pr_debug("num_intlv_chan=%u", map->num_intlv_chan); + pr_debug("num_intlv_dies=%u", map->num_intlv_dies); + pr_debug("num_intlv_sockets=%u", map->num_intlv_sockets); + pr_debug("total_intlv_chan=%u", map->total_intlv_chan); + pr_debug("total_intlv_bits=%u", map->total_intlv_bits); +} + +int get_address_map(struct addr_ctx *ctx) +{ + int ret; + + ret = get_address_map_common(ctx); + if (ret) + return ret; + + ret = get_global_map_data(ctx); + if (ret) + return ret; + + dump_address_map(&ctx->map); + + return ret; +} diff --git a/drivers/ras/amd/atl/reg_fields.h b/drivers/ras/amd/atl/reg_fields.h new file mode 100644 index 000000000000..9dcdf6e4a856 --- /dev/null +++ b/drivers/ras/amd/atl/reg_fields.h @@ -0,0 +1,606 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Address Translation Library + * + * reg_fields.h : Register field definitions + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +/* + * Notes on naming: + * 1) Use "DF_" prefix for fields that are the same for all revisions. + * 2) Use "DFx_" prefix for fields that differ between revisions. + * a) "x" is the first major revision where the new field appears. + * b) E.g., if DF2 and DF3 have the same field, then call it DF2. + * c) E.g., if DF3p5 and DF4 have the same field, then call it DF4. + */ + +/* + * Coherent Station Fabric ID + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x50 [Fabric Block Instance Information 3] + * DF2 BlockFabricId [19:8] + * DF3 BlockFabricId [19:8] + * DF3p5 BlockFabricId [19:8] + * DF4 BlockFabricId [19:8] + * DF4p5 BlockFabricId [15:8] + */ +#define DF2_COH_ST_FABRIC_ID GENMASK(19, 8) +#define DF4p5_COH_ST_FABRIC_ID GENMASK(15, 8) + +/* + * Component ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F1x208 [System Fabric ID Mask 0] + * DF3 ComponentIdMask [9:0] + * + * D18F1x150 [System Fabric ID Mask 0] + * DF3p5 ComponentIdMask [15:0] + * + * D18F4x1B0 [System Fabric ID Mask 0] + * DF4 ComponentIdMask [15:0] + * DF4p5 ComponentIdMask [15:0] + */ +#define DF3_COMPONENT_ID_MASK GENMASK(9, 0) +#define DF4_COMPONENT_ID_MASK GENMASK(15, 0) + +/* + * Destination Fabric ID + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 DstFabricID [7:0] + * DF3 DstFabricID [9:0] + * DF3 DstFabricID [11:0] + * + * D18F7xE08 [DRAM Address Control] + * DF4 DstFabricID [27:16] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 DstFabricID [23:16] + */ +#define DF2_DST_FABRIC_ID GENMASK(7, 0) +#define DF3_DST_FABRIC_ID GENMASK(9, 0) +#define DF3p5_DST_FABRIC_ID GENMASK(11, 0) +#define DF4_DST_FABRIC_ID GENMASK(27, 16) +#define DF4p5_DST_FABRIC_ID GENMASK(23, 16) + +/* + * Die ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 DieIdMask [15:8] + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 DieIdMask [18:16] + * + * D18F1x158 [System Fabric ID Mask 2] + * DF3p5 DieIdMask [15:0] + * + * D18F4x1B8 [System Fabric ID Mask 2] + * DF4 DieIdMask [15:0] + * DF4p5 DieIdMask [15:0] + */ +#define DF2_DIE_ID_MASK GENMASK(15, 8) +#define DF3_DIE_ID_MASK GENMASK(18, 16) +#define DF4_DIE_ID_MASK GENMASK(15, 0) + +/* + * Die ID Shift + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 DieIdShift [27:24] + * + * DF3 N/A + * DF3p5 N/A + * DF4 N/A + * DF4p5 N/A + */ +#define DF2_DIE_ID_SHIFT GENMASK(27, 24) + +/* + * DRAM Address Range Valid + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 AddrRngVal [0] + * DF3 AddrRngVal [0] + * DF3p5 AddrRngVal [0] + * + * D18F7xE08 [DRAM Address Control] + * DF4 AddrRngVal [0] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 AddrRngVal [0] + */ +#define DF_ADDR_RANGE_VAL BIT(0) + +/* + * DRAM Base Address + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 DramBaseAddr [31:12] + * DF3 DramBaseAddr [31:12] + * DF3p5 DramBaseAddr [31:12] + * + * D18F7xE00 [DRAM Base Address] + * DF4 DramBaseAddr [27:0] + * + * D18F7x200 [DRAM Base Address] + * DF4p5 DramBaseAddr [27:0] + */ +#define DF2_BASE_ADDR GENMASK(31, 12) +#define DF4_BASE_ADDR GENMASK(27, 0) + +/* + * DRAM Hole Base + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F0x104 [DRAM Hole Control] + * DF2 DramHoleBase [31:24] + * DF3 DramHoleBase [31:24] + * DF3p5 DramHoleBase [31:24] + * + * D18F7x104 [DRAM Hole Control] + * DF4 DramHoleBase [31:24] + * DF4p5 DramHoleBase [31:24] + */ +#define DF_DRAM_HOLE_BASE_MASK GENMASK(31, 24) + +/* + * DRAM Limit Address + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 DramLimitAddr [31:12] + * DF3 DramLimitAddr [31:12] + * DF3p5 DramLimitAddr [31:12] + * + * D18F7xE04 [DRAM Limit Address] + * DF4 DramLimitAddr [27:0] + * + * D18F7x204 [DRAM Limit Address] + * DF4p5 DramLimitAddr [27:0] + */ +#define DF2_DRAM_LIMIT_ADDR GENMASK(31, 12) +#define DF4_DRAM_LIMIT_ADDR GENMASK(27, 0) + +/* + * Hash Interleave Controls + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F0x3F8 [DF Global Control] + * DF3 GlbHashIntlvCtl64K [20] + * GlbHashIntlvCtl2M [21] + * GlbHashIntlvCtl1G [22] + * + * DF3p5 GlbHashIntlvCtl64K [20] + * GlbHashIntlvCtl2M [21] + * GlbHashIntlvCtl1G [22] + * + * D18F7xE08 [DRAM Address Control] + * DF4 HashIntlvCtl64K [8] + * HashIntlvCtl2M [9] + * HashIntlvCtl1G [10] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 HashIntlvCtl4K [7] + * HashIntlvCtl64K [8] + * HashIntlvCtl2M [9] + * HashIntlvCtl1G [10] + * HashIntlvCtl1T [15] + */ +#define DF3_HASH_CTL_64K BIT(20) +#define DF3_HASH_CTL_2M BIT(21) +#define DF3_HASH_CTL_1G BIT(22) +#define DF4_HASH_CTL_64K BIT(8) +#define DF4_HASH_CTL_2M BIT(9) +#define DF4_HASH_CTL_1G BIT(10) +#define DF4p5_HASH_CTL_4K BIT(7) +#define DF4p5_HASH_CTL_1T BIT(15) + +/* + * High Address Offset + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x1B4 [DRAM Offset] + * DF2 HiAddrOffset [31:20] + * DF3 HiAddrOffset [31:12] + * DF3p5 HiAddrOffset [31:12] + * + * D18F7x140 [DRAM Offset] + * DF4 HiAddrOffset [24:1] + * DF4p5 HiAddrOffset [24:1] + * MI300 HiAddrOffset [31:1] + */ +#define DF2_HI_ADDR_OFFSET GENMASK(31, 20) +#define DF3_HI_ADDR_OFFSET GENMASK(31, 12) + +/* Follow reference code by including reserved bits for simplicity. */ +#define DF4_HI_ADDR_OFFSET GENMASK(31, 1) + +/* + * High Address Offset Enable + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x1B4 [DRAM Offset] + * DF2 HiAddrOffsetEn [0] + * DF3 HiAddrOffsetEn [0] + * DF3p5 HiAddrOffsetEn [0] + * + * D18F7x140 [DRAM Offset] + * DF4 HiAddrOffsetEn [0] + * DF4p5 HiAddrOffsetEn [0] + */ +#define DF_HI_ADDR_OFFSET_EN BIT(0) + +/* + * Interleave Address Select + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 IntLvAddrSel [10:8] + * DF3 IntLvAddrSel [11:9] + * DF3p5 IntLvAddrSel [11:9] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvAddrSel [2:0] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvAddrSel [2:0] + */ +#define DF2_INTLV_ADDR_SEL GENMASK(10, 8) +#define DF3_INTLV_ADDR_SEL GENMASK(11, 9) +#define DF4_INTLV_ADDR_SEL GENMASK(2, 0) + +/* + * Interleave Number of Channels + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 IntLvNumChan [7:4] + * DF3 IntLvNumChan [5:2] + * DF3p5 IntLvNumChan [6:2] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvNumChan [8:4] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvNumChan [9:4] + */ +#define DF2_INTLV_NUM_CHAN GENMASK(7, 4) +#define DF3_INTLV_NUM_CHAN GENMASK(5, 2) +#define DF3p5_INTLV_NUM_CHAN GENMASK(6, 2) +#define DF4_INTLV_NUM_CHAN GENMASK(8, 4) +#define DF4p5_INTLV_NUM_CHAN GENMASK(9, 4) + +/* + * Interleave Number of Dies + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 IntLvNumDies [11:10] + * + * D18F0x110 [DRAM Base Address] + * DF3 IntLvNumDies [7:6] + * DF3p5 IntLvNumDies [7] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvNumDies [13:12] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvNumDies [13:12] + */ +#define DF2_INTLV_NUM_DIES GENMASK(11, 10) +#define DF3_INTLV_NUM_DIES GENMASK(7, 6) +#define DF3p5_INTLV_NUM_DIES BIT(7) +#define DF4_INTLV_NUM_DIES GENMASK(13, 12) + +/* + * Interleave Number of Sockets + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 IntLvNumSockets [8] + * + * D18F0x110 [DRAM Base Address] + * DF3 IntLvNumSockets [8] + * DF3p5 IntLvNumSockets [8] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvNumSockets [18] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvNumSockets [18] + */ +#define DF2_INTLV_NUM_SOCKETS BIT(8) +#define DF4_INTLV_NUM_SOCKETS BIT(18) + +/* + * Legacy MMIO Hole Enable + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 LgcyMmioHoleEn [1] + * DF3 LgcyMmioHoleEn [1] + * DF3p5 LgcyMmioHoleEn [1] + * + * D18F7xE08 [DRAM Address Control] + * DF4 LgcyMmioHoleEn [1] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 LgcyMmioHoleEn [1] + */ +#define DF_LEGACY_MMIO_HOLE_EN BIT(1) + +/* + * Log2 Address 64K Space 0 + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F2x90 [Non-power-of-2 channel Configuration Register for COH_ST DRAM Address Maps] + * DF3 Log2Addr64KSpace0 [5:0] + * + * DF3p5 N/A + * DF4 N/A + * DF4p5 N/A + */ +#define DF_LOG2_ADDR_64K_SPACE0 GENMASK(5, 0) + +/* + * Major Revision + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F0x040 [Fabric Block Instance Count] + * DF4 MajorRevision [27:24] + * DF4p5 MajorRevision [27:24] + */ +#define DF_MAJOR_REVISION GENMASK(27, 24) + +/* + * Minor Revision + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F0x040 [Fabric Block Instance Count] + * DF4 MinorRevision [23:16] + * DF4p5 MinorRevision [23:16] + */ +#define DF_MINOR_REVISION GENMASK(23, 16) + +/* + * Node ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F1x208 [System Fabric ID Mask 0] + * DF3 NodeIdMask [25:16] + * + * D18F1x150 [System Fabric ID Mask 0] + * DF3p5 NodeIdMask [31:16] + * + * D18F4x1B0 [System Fabric ID Mask 0] + * DF4 NodeIdMask [31:16] + * DF4p5 NodeIdMask [31:16] + */ +#define DF3_NODE_ID_MASK GENMASK(25, 16) +#define DF4_NODE_ID_MASK GENMASK(31, 16) + +/* + * Node ID Shift + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 NodeIdShift [3:0] + * + * D18F1x154 [System Fabric ID Mask 1] + * DF3p5 NodeIdShift [3:0] + * + * D18F4x1B4 [System Fabric ID Mask 1] + * DF4 NodeIdShift [3:0] + * DF4p5 NodeIdShift [3:0] + */ +#define DF3_NODE_ID_SHIFT GENMASK(3, 0) + +/* + * Remap Enable + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F7xE08 [DRAM Address Control] + * DF4 RemapEn [4] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 RemapEn [4] + */ +#define DF4_REMAP_EN BIT(4) + +/* + * Remap Select + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F7xE08 [DRAM Address Control] + * DF4 RemapSel [7:5] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 RemapSel [6:5] + */ +#define DF4_REMAP_SEL GENMASK(7, 5) +#define DF4p5_REMAP_SEL GENMASK(6, 5) + +/* + * Socket ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 SocketIdMask [23:16] + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 SocketIdMask [26:24] + * + * D18F1x158 [System Fabric ID Mask 2] + * DF3p5 SocketIdMask [31:16] + * + * D18F4x1B8 [System Fabric ID Mask 2] + * DF4 SocketIdMask [31:16] + * DF4p5 SocketIdMask [31:16] + */ +#define DF2_SOCKET_ID_MASK GENMASK(23, 16) +#define DF3_SOCKET_ID_MASK GENMASK(26, 24) +#define DF4_SOCKET_ID_MASK GENMASK(31, 16) + +/* + * Socket ID Shift + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 SocketIdShift [31:28] + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 SocketIdShift [9:8] + * + * D18F1x158 [System Fabric ID Mask 2] + * DF3p5 SocketIdShift [11:8] + * + * D18F4x1B4 [System Fabric ID Mask 1] + * DF4 SocketIdShift [11:8] + * DF4p5 SocketIdShift [11:8] + */ +#define DF2_SOCKET_ID_SHIFT GENMASK(31, 28) +#define DF3_SOCKET_ID_SHIFT GENMASK(9, 8) +#define DF4_SOCKET_ID_SHIFT GENMASK(11, 8) diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c new file mode 100644 index 000000000000..701349e84942 --- /dev/null +++ b/drivers/ras/amd/atl/system.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * system.c : Functions to read and save system-wide data + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +int determine_node_id(struct addr_ctx *ctx, u8 socket_id, u8 die_id) +{ + u16 socket_id_bits, die_id_bits; + + if (socket_id > 0 && df_cfg.socket_id_mask == 0) { + atl_debug(ctx, "Invalid socket inputs: socket_id=%u socket_id_mask=0x%x", + socket_id, df_cfg.socket_id_mask); + return -EINVAL; + } + + /* Do each step independently to avoid shift out-of-bounds issues. */ + socket_id_bits = socket_id; + socket_id_bits <<= df_cfg.socket_id_shift; + socket_id_bits &= df_cfg.socket_id_mask; + + if (die_id > 0 && df_cfg.die_id_mask == 0) { + atl_debug(ctx, "Invalid die inputs: die_id=%u die_id_mask=0x%x", + die_id, df_cfg.die_id_mask); + return -EINVAL; + } + + /* Do each step independently to avoid shift out-of-bounds issues. */ + die_id_bits = die_id; + die_id_bits <<= df_cfg.die_id_shift; + die_id_bits &= df_cfg.die_id_mask; + + ctx->node_id = (socket_id_bits | die_id_bits) >> df_cfg.node_id_shift; + return 0; +} + +static void df2_get_masks_shifts(u32 mask0) +{ + df_cfg.socket_id_shift = FIELD_GET(DF2_SOCKET_ID_SHIFT, mask0); + df_cfg.socket_id_mask = FIELD_GET(DF2_SOCKET_ID_MASK, mask0); + df_cfg.die_id_shift = FIELD_GET(DF2_DIE_ID_SHIFT, mask0); + df_cfg.die_id_mask = FIELD_GET(DF2_DIE_ID_MASK, mask0); + df_cfg.node_id_shift = df_cfg.die_id_shift; + df_cfg.node_id_mask = df_cfg.socket_id_mask | df_cfg.die_id_mask; + df_cfg.component_id_mask = ~df_cfg.node_id_mask; +} + +static void df3_get_masks_shifts(u32 mask0, u32 mask1) +{ + df_cfg.component_id_mask = FIELD_GET(DF3_COMPONENT_ID_MASK, mask0); + df_cfg.node_id_mask = FIELD_GET(DF3_NODE_ID_MASK, mask0); + + df_cfg.node_id_shift = FIELD_GET(DF3_NODE_ID_SHIFT, mask1); + df_cfg.socket_id_shift = FIELD_GET(DF3_SOCKET_ID_SHIFT, mask1); + df_cfg.socket_id_mask = FIELD_GET(DF3_SOCKET_ID_MASK, mask1); + df_cfg.die_id_mask = FIELD_GET(DF3_DIE_ID_MASK, mask1); +} + +static void df3p5_get_masks_shifts(u32 mask0, u32 mask1, u32 mask2) +{ + df_cfg.component_id_mask = FIELD_GET(DF4_COMPONENT_ID_MASK, mask0); + df_cfg.node_id_mask = FIELD_GET(DF4_NODE_ID_MASK, mask0); + + df_cfg.node_id_shift = FIELD_GET(DF3_NODE_ID_SHIFT, mask1); + df_cfg.socket_id_shift = FIELD_GET(DF4_SOCKET_ID_SHIFT, mask1); + + df_cfg.socket_id_mask = FIELD_GET(DF4_SOCKET_ID_MASK, mask2); + df_cfg.die_id_mask = FIELD_GET(DF4_DIE_ID_MASK, mask2); +} + +static void df4_get_masks_shifts(u32 mask0, u32 mask1, u32 mask2) +{ + df3p5_get_masks_shifts(mask0, mask1, mask2); + + if (!(df_cfg.flags.socket_id_shift_quirk && df_cfg.socket_id_shift == 1)) + return; + + df_cfg.socket_id_shift = 0; + df_cfg.socket_id_mask = 1; + df_cfg.die_id_shift = 0; + df_cfg.die_id_mask = 0; + df_cfg.node_id_shift = 8; + df_cfg.node_id_mask = 0x100; +} + +static int df4_get_fabric_id_mask_registers(void) +{ + u32 mask0, mask1, mask2; + + /* Read D18F4x1B0 (SystemFabricIdMask0) */ + if (df_indirect_read_broadcast(0, 4, 0x1B0, &mask0)) + return -EINVAL; + + /* Read D18F4x1B4 (SystemFabricIdMask1) */ + if (df_indirect_read_broadcast(0, 4, 0x1B4, &mask1)) + return -EINVAL; + + /* Read D18F4x1B8 (SystemFabricIdMask2) */ + if (df_indirect_read_broadcast(0, 4, 0x1B8, &mask2)) + return -EINVAL; + + df4_get_masks_shifts(mask0, mask1, mask2); + return 0; +} + +static int df4_determine_df_rev(u32 reg) +{ + df_cfg.rev = FIELD_GET(DF_MINOR_REVISION, reg) < 5 ? DF4 : DF4p5; + + /* Check for special cases or quirks based on Device/Vendor IDs.*/ + + /* Read D18F0x000 (DeviceVendorId0) */ + if (df_indirect_read_broadcast(0, 0, 0, ®)) + return -EINVAL; + + if (reg == DF_FUNC0_ID_ZEN4_SERVER) + df_cfg.flags.socket_id_shift_quirk = 1; + + if (reg == DF_FUNC0_ID_MI300) { + df_cfg.flags.heterogeneous = 1; + + if (get_addr_hash_mi300()) + return -EINVAL; + } + + return df4_get_fabric_id_mask_registers(); +} + +static int determine_df_rev_legacy(void) +{ + u32 fabric_id_mask0, fabric_id_mask1, fabric_id_mask2; + + /* + * Check for DF3.5. + * + * Component ID Mask must be non-zero. Register D18F1x150 is + * reserved pre-DF3.5, so value will be Read-as-Zero. + */ + + /* Read D18F1x150 (SystemFabricIdMask0). */ + if (df_indirect_read_broadcast(0, 1, 0x150, &fabric_id_mask0)) + return -EINVAL; + + if (FIELD_GET(DF4_COMPONENT_ID_MASK, fabric_id_mask0)) { + df_cfg.rev = DF3p5; + + /* Read D18F1x154 (SystemFabricIdMask1) */ + if (df_indirect_read_broadcast(0, 1, 0x154, &fabric_id_mask1)) + return -EINVAL; + + /* Read D18F1x158 (SystemFabricIdMask2) */ + if (df_indirect_read_broadcast(0, 1, 0x158, &fabric_id_mask2)) + return -EINVAL; + + df3p5_get_masks_shifts(fabric_id_mask0, fabric_id_mask1, fabric_id_mask2); + return 0; + } + + /* + * Check for DF3. + * + * Component ID Mask must be non-zero. Field is Read-as-Zero on DF2. + */ + + /* Read D18F1x208 (SystemFabricIdMask). */ + if (df_indirect_read_broadcast(0, 1, 0x208, &fabric_id_mask0)) + return -EINVAL; + + if (FIELD_GET(DF3_COMPONENT_ID_MASK, fabric_id_mask0)) { + df_cfg.rev = DF3; + + /* Read D18F1x20C (SystemFabricIdMask1) */ + if (df_indirect_read_broadcast(0, 1, 0x20C, &fabric_id_mask1)) + return -EINVAL; + + df3_get_masks_shifts(fabric_id_mask0, fabric_id_mask1); + return 0; + } + + /* Default to DF2. */ + df_cfg.rev = DF2; + df2_get_masks_shifts(fabric_id_mask0); + return 0; +} + +static int determine_df_rev(void) +{ + u32 reg; + u8 rev; + + if (df_cfg.rev != UNKNOWN) + return 0; + + /* Read D18F0x40 (FabricBlockInstanceCount). */ + if (df_indirect_read_broadcast(0, 0, 0x40, ®)) + return -EINVAL; + + /* + * Revision fields added for DF4 and later. + * + * Major revision of '0' is found pre-DF4. Field is Read-as-Zero. + */ + rev = FIELD_GET(DF_MAJOR_REVISION, reg); + if (!rev) + return determine_df_rev_legacy(); + + /* + * Fail out for major revisions other than '4'. + * + * Explicit support should be added for newer systems to avoid issues. + */ + if (rev == 4) + return df4_determine_df_rev(reg); + + return -EINVAL; +} + +static void get_num_maps(void) +{ + switch (df_cfg.rev) { + case DF2: + case DF3: + case DF3p5: + df_cfg.num_coh_st_maps = 2; + break; + case DF4: + case DF4p5: + df_cfg.num_coh_st_maps = 4; + break; + default: + atl_debug_on_bad_df_rev(); + } +} + +static void apply_node_id_shift(void) +{ + if (df_cfg.rev == DF2) + return; + + df_cfg.die_id_shift = df_cfg.node_id_shift; + df_cfg.die_id_mask <<= df_cfg.node_id_shift; + df_cfg.socket_id_mask <<= df_cfg.node_id_shift; + df_cfg.socket_id_shift += df_cfg.node_id_shift; +} + +static void dump_df_cfg(void) +{ + pr_debug("rev=0x%x", df_cfg.rev); + + pr_debug("component_id_mask=0x%x", df_cfg.component_id_mask); + pr_debug("die_id_mask=0x%x", df_cfg.die_id_mask); + pr_debug("node_id_mask=0x%x", df_cfg.node_id_mask); + pr_debug("socket_id_mask=0x%x", df_cfg.socket_id_mask); + + pr_debug("die_id_shift=0x%x", df_cfg.die_id_shift); + pr_debug("node_id_shift=0x%x", df_cfg.node_id_shift); + pr_debug("socket_id_shift=0x%x", df_cfg.socket_id_shift); + + pr_debug("num_coh_st_maps=%u", df_cfg.num_coh_st_maps); + + pr_debug("flags.legacy_ficaa=%u", df_cfg.flags.legacy_ficaa); + pr_debug("flags.socket_id_shift_quirk=%u", df_cfg.flags.socket_id_shift_quirk); +} + +int get_df_system_info(void) +{ + if (determine_df_rev()) { + pr_warn("amd_atl: Failed to determine DF Revision"); + df_cfg.rev = UNKNOWN; + return -EINVAL; + } + + apply_node_id_shift(); + + get_num_maps(); + + dump_df_cfg(); + + return 0; +} diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c new file mode 100644 index 000000000000..59b6169093f7 --- /dev/null +++ b/drivers/ras/amd/atl/umc.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * umc.c : Unified Memory Controller (UMC) topology helpers + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +/* + * MI300 has a fixed, model-specific mapping between a UMC instance and + * its related Data Fabric Coherent Station instance. + * + * The MCA_IPID_UMC[InstanceId] field holds a unique identifier for the + * UMC instance within a Node. Use this to find the appropriate Coherent + * Station ID. + * + * Redundant bits were removed from the map below. + */ +static const u16 umc_coh_st_map[32] = { + 0x393, 0x293, 0x193, 0x093, + 0x392, 0x292, 0x192, 0x092, + 0x391, 0x291, 0x191, 0x091, + 0x390, 0x290, 0x190, 0x090, + 0x793, 0x693, 0x593, 0x493, + 0x792, 0x692, 0x592, 0x492, + 0x791, 0x691, 0x591, 0x491, + 0x790, 0x690, 0x590, 0x490, +}; + +#define UMC_ID_MI300 GENMASK(23, 12) +static u8 get_coh_st_inst_id_mi300(struct atl_err *err) +{ + u16 umc_id = FIELD_GET(UMC_ID_MI300, err->ipid); + u8 i; + + for (i = 0; i < ARRAY_SIZE(umc_coh_st_map); i++) { + if (umc_id == umc_coh_st_map[i]) + break; + } + + WARN_ON_ONCE(i >= ARRAY_SIZE(umc_coh_st_map)); + + return i; +} + +/* XOR the bits in @val. */ +static u16 bitwise_xor_bits(u16 val) +{ + u16 tmp = 0; + u8 i; + + for (i = 0; i < 16; i++) + tmp ^= (val >> i) & 0x1; + + return tmp; +} + +struct xor_bits { + bool xor_enable; + u16 col_xor; + u32 row_xor; +}; + +#define NUM_BANK_BITS 4 + +static struct { + /* UMC::CH::AddrHashBank */ + struct xor_bits bank[NUM_BANK_BITS]; + + /* UMC::CH::AddrHashPC */ + struct xor_bits pc; + + /* UMC::CH::AddrHashPC2 */ + u8 bank_xor; +} addr_hash; + +#define MI300_UMC_CH_BASE 0x90000 +#define MI300_ADDR_HASH_BANK0 (MI300_UMC_CH_BASE + 0xC8) +#define MI300_ADDR_HASH_PC (MI300_UMC_CH_BASE + 0xE0) +#define MI300_ADDR_HASH_PC2 (MI300_UMC_CH_BASE + 0xE4) + +#define ADDR_HASH_XOR_EN BIT(0) +#define ADDR_HASH_COL_XOR GENMASK(13, 1) +#define ADDR_HASH_ROW_XOR GENMASK(31, 14) +#define ADDR_HASH_BANK_XOR GENMASK(5, 0) + +/* + * Read UMC::CH::AddrHash{Bank,PC,PC2} registers to get XOR bits used + * for hashing. Do this during module init, since the values will not + * change during run time. + * + * These registers are instantiated for each UMC across each AMD Node. + * However, they should be identically programmed due to the fixed hardware + * design of MI300 systems. So read the values from Node 0 UMC 0 and keep a + * single global structure for simplicity. + */ +int get_addr_hash_mi300(void) +{ + u32 temp; + int ret; + u8 i; + + for (i = 0; i < NUM_BANK_BITS; i++) { + ret = amd_smn_read(0, MI300_ADDR_HASH_BANK0 + (i * 4), &temp); + if (ret) + return ret; + + addr_hash.bank[i].xor_enable = FIELD_GET(ADDR_HASH_XOR_EN, temp); + addr_hash.bank[i].col_xor = FIELD_GET(ADDR_HASH_COL_XOR, temp); + addr_hash.bank[i].row_xor = FIELD_GET(ADDR_HASH_ROW_XOR, temp); + } + + ret = amd_smn_read(0, MI300_ADDR_HASH_PC, &temp); + if (ret) + return ret; + + addr_hash.pc.xor_enable = FIELD_GET(ADDR_HASH_XOR_EN, temp); + addr_hash.pc.col_xor = FIELD_GET(ADDR_HASH_COL_XOR, temp); + addr_hash.pc.row_xor = FIELD_GET(ADDR_HASH_ROW_XOR, temp); + + ret = amd_smn_read(0, MI300_ADDR_HASH_PC2, &temp); + if (ret) + return ret; + + addr_hash.bank_xor = FIELD_GET(ADDR_HASH_BANK_XOR, temp); + + return 0; +} + +/* + * MI300 systems report a DRAM address in MCA_ADDR for DRAM ECC errors. This must + * be converted to the intermediate normalized address (NA) before translating to a + * system physical address. + * + * The DRAM address includes bank, row, and column. Also included are bits for + * pseudochannel (PC) and stack ID (SID). + * + * Abbreviations: (S)tack ID, (P)seudochannel, (R)ow, (B)ank, (C)olumn, (Z)ero + * + * The MCA address format is as follows: + * MCA_ADDR[27:0] = {S[1:0], P[0], R[14:0], B[3:0], C[4:0], Z[0]} + * + * The normalized address format is fixed in hardware and is as follows: + * NA[30:0] = {S[1:0], R[13:0], C4, B[1:0], B[3:2], C[3:2], P, C[1:0], Z[4:0]} + * + * Additionally, the PC and Bank bits may be hashed. This must be accounted for before + * reconstructing the normalized address. + */ +#define MI300_UMC_MCA_COL GENMASK(5, 1) +#define MI300_UMC_MCA_BANK GENMASK(9, 6) +#define MI300_UMC_MCA_ROW GENMASK(24, 10) +#define MI300_UMC_MCA_PC BIT(25) +#define MI300_UMC_MCA_SID GENMASK(27, 26) + +#define MI300_NA_COL_1_0 GENMASK(6, 5) +#define MI300_NA_PC BIT(7) +#define MI300_NA_COL_3_2 GENMASK(9, 8) +#define MI300_NA_BANK_3_2 GENMASK(11, 10) +#define MI300_NA_BANK_1_0 GENMASK(13, 12) +#define MI300_NA_COL_4 BIT(14) +#define MI300_NA_ROW GENMASK(28, 15) +#define MI300_NA_SID GENMASK(30, 29) + +static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) +{ + u16 i, col, row, bank, pc, sid, temp; + + col = FIELD_GET(MI300_UMC_MCA_COL, addr); + bank = FIELD_GET(MI300_UMC_MCA_BANK, addr); + row = FIELD_GET(MI300_UMC_MCA_ROW, addr); + pc = FIELD_GET(MI300_UMC_MCA_PC, addr); + sid = FIELD_GET(MI300_UMC_MCA_SID, addr); + + /* Calculate hash for each Bank bit. */ + for (i = 0; i < NUM_BANK_BITS; i++) { + if (!addr_hash.bank[i].xor_enable) + continue; + + temp = bitwise_xor_bits(col & addr_hash.bank[i].col_xor); + temp ^= bitwise_xor_bits(row & addr_hash.bank[i].row_xor); + bank ^= temp << i; + } + + /* Calculate hash for PC bit. */ + if (addr_hash.pc.xor_enable) { + /* Bits SID[1:0] act as Bank[6:5] for PC hash, so apply them here. */ + bank |= sid << 5; + + temp = bitwise_xor_bits(col & addr_hash.pc.col_xor); + temp ^= bitwise_xor_bits(row & addr_hash.pc.row_xor); + temp ^= bitwise_xor_bits(bank & addr_hash.bank_xor); + pc ^= temp; + + /* Drop SID bits for the sake of debug printing later. */ + bank &= 0x1F; + } + + /* Reconstruct the normalized address starting with NA[4:0] = 0 */ + addr = 0; + + /* NA[6:5] = Column[1:0] */ + temp = col & 0x3; + addr |= FIELD_PREP(MI300_NA_COL_1_0, temp); + + /* NA[7] = PC */ + addr |= FIELD_PREP(MI300_NA_PC, pc); + + /* NA[9:8] = Column[3:2] */ + temp = (col >> 2) & 0x3; + addr |= FIELD_PREP(MI300_NA_COL_3_2, temp); + + /* NA[11:10] = Bank[3:2] */ + temp = (bank >> 2) & 0x3; + addr |= FIELD_PREP(MI300_NA_BANK_3_2, temp); + + /* NA[13:12] = Bank[1:0] */ + temp = bank & 0x3; + addr |= FIELD_PREP(MI300_NA_BANK_1_0, temp); + + /* NA[14] = Column[4] */ + temp = (col >> 4) & 0x1; + addr |= FIELD_PREP(MI300_NA_COL_4, temp); + + /* NA[28:15] = Row[13:0] */ + addr |= FIELD_PREP(MI300_NA_ROW, row); + + /* NA[30:29] = SID[1:0] */ + addr |= FIELD_PREP(MI300_NA_SID, sid); + + pr_debug("Addr=0x%016lx", addr); + pr_debug("Bank=%u Row=%u Column=%u PC=%u SID=%u", bank, row, col, pc, sid); + + return addr; +} + +/* + * When a DRAM ECC error occurs on MI300 systems, it is recommended to retire + * all memory within that DRAM row. This applies to the memory with a DRAM + * bank. + * + * To find the memory addresses, loop through permutations of the DRAM column + * bits and find the System Physical address of each. The column bits are used + * to calculate the intermediate Normalized address, so all permutations should + * be checked. + * + * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats. + */ +#define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL)) +static void retire_row_mi300(struct atl_err *a_err) +{ + unsigned long addr; + struct page *p; + u8 col; + + for (col = 0; col < MI300_NUM_COL; col++) { + a_err->addr &= ~MI300_UMC_MCA_COL; + a_err->addr |= FIELD_PREP(MI300_UMC_MCA_COL, col); + + addr = amd_convert_umc_mca_addr_to_sys_addr(a_err); + if (IS_ERR_VALUE(addr)) + continue; + + addr = PHYS_PFN(addr); + + /* + * Skip invalid or already poisoned pages to avoid unnecessary + * error messages from memory_failure(). + */ + p = pfn_to_online_page(addr); + if (!p) + continue; + + if (PageHWPoison(p)) + continue; + + memory_failure(addr, 0); + } +} + +void amd_retire_dram_row(struct atl_err *a_err) +{ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return retire_row_mi300(a_err); +} +EXPORT_SYMBOL_GPL(amd_retire_dram_row); + +static unsigned long get_addr(unsigned long addr) +{ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return convert_dram_to_norm_addr_mi300(addr); + + return addr; +} + +#define MCA_IPID_INST_ID_HI GENMASK_ULL(47, 44) +static u8 get_die_id(struct atl_err *err) +{ + /* + * AMD Node ID is provided in MCA_IPID[InstanceIdHi], and this + * needs to be divided by 4 to get the internal Die ID. + */ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) { + u8 node_id = FIELD_GET(MCA_IPID_INST_ID_HI, err->ipid); + + return node_id >> 2; + } + + /* + * For CPUs, this is the AMD Node ID modulo the number + * of AMD Nodes per socket. + */ + return topology_amd_node_id(err->cpu) % topology_amd_nodes_per_pkg(); +} + +#define UMC_CHANNEL_NUM GENMASK(31, 20) +static u8 get_coh_st_inst_id(struct atl_err *err) +{ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return get_coh_st_inst_id_mi300(err); + + return FIELD_GET(UMC_CHANNEL_NUM, err->ipid); +} + +unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err) +{ + u8 socket_id = topology_physical_package_id(err->cpu); + u8 coh_st_inst_id = get_coh_st_inst_id(err); + unsigned long addr = get_addr(err->addr); + u8 die_id = get_die_id(err); + + pr_debug("socket_id=0x%x die_id=0x%x coh_st_inst_id=0x%x addr=0x%016lx", + socket_id, die_id, coh_st_inst_id, addr); + + return norm_to_sys_addr(socket_id, die_id, coh_st_inst_id, addr); +} diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c new file mode 100644 index 000000000000..2f4ac9591c8f --- /dev/null +++ b/drivers/ras/amd/fmpm.c @@ -0,0 +1,1013 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * FRU (Field-Replaceable Unit) Memory Poison Manager + * + * Copyright (c) 2024, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Authors: + * Naveen Krishna Chatradhi + * Muralidhara M K + * Yazen Ghannam + * + * Implementation notes, assumptions, and limitations: + * + * - FRU memory poison section and memory poison descriptor definitions are not yet + * included in the UEFI specification. So they are defined here. Afterwards, they + * may be moved to linux/cper.h, if appropriate. + * + * - Platforms based on AMD MI300 systems will be the first to use these structures. + * There are a number of assumptions made here that will need to be generalized + * to support other platforms. + * + * AMD MI300-based platform(s) assumptions: + * - Memory errors are reported through x86 MCA. + * - The entire DRAM row containing a memory error should be retired. + * - There will be (1) FRU memory poison section per CPER. + * - The FRU will be the CPU package (processor socket). + * - The default number of memory poison descriptor entries should be (8). + * - The platform will use ACPI ERST for persistent storage. + * - All FRU records should be saved to persistent storage. Module init will + * fail if any FRU record is not successfully written. + * + * - Boot time memory retirement may occur later than ideal due to dependencies + * on other libraries and drivers. This leaves a gap where bad memory may be + * accessed during early boot stages. + * + * - Enough memory should be pre-allocated for each FRU record to be able to hold + * the expected number of descriptor entries. This, mostly empty, record is + * written to storage during init time. Subsequent writes to the same record + * should allow the Platform to update the stored record in-place. Otherwise, + * if the record is extended, then the Platform may need to perform costly memory + * management operations on the storage. For example, the Platform may spend time + * in Firmware copying and invalidating memory on a relatively slow SPI ROM. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include + +#include +#include + +#include "../debugfs.h" + +#define INVALID_CPU UINT_MAX + +/* Validation Bits */ +#define FMP_VALID_ARCH_TYPE BIT_ULL(0) +#define FMP_VALID_ARCH BIT_ULL(1) +#define FMP_VALID_ID_TYPE BIT_ULL(2) +#define FMP_VALID_ID BIT_ULL(3) +#define FMP_VALID_LIST_ENTRIES BIT_ULL(4) +#define FMP_VALID_LIST BIT_ULL(5) + +/* FRU Architecture Types */ +#define FMP_ARCH_TYPE_X86_CPUID_1_EAX 0 + +/* FRU ID Types */ +#define FMP_ID_TYPE_X86_PPIN 0 + +/* FRU Memory Poison Section */ +struct cper_sec_fru_mem_poison { + u32 checksum; + u64 validation_bits; + u32 fru_arch_type; + u64 fru_arch; + u32 fru_id_type; + u64 fru_id; + u32 nr_entries; +} __packed; + +/* FRU Descriptor ID Types */ +#define FPD_HW_ID_TYPE_MCA_IPID 0 + +/* FRU Descriptor Address Types */ +#define FPD_ADDR_TYPE_MCA_ADDR 0 + +/* Memory Poison Descriptor */ +struct cper_fru_poison_desc { + u64 timestamp; + u32 hw_id_type; + u64 hw_id; + u32 addr_type; + u64 addr; +} __packed; + +/* Collection of headers and sections for easy pointer use. */ +struct fru_rec { + struct cper_record_header hdr; + struct cper_section_descriptor sec_desc; + struct cper_sec_fru_mem_poison fmp; + struct cper_fru_poison_desc entries[]; +} __packed; + +/* + * Pointers to the complete CPER record of each FRU. + * + * Memory allocation will include padded space for descriptor entries. + */ +static struct fru_rec **fru_records; + +/* system physical addresses array */ +static u64 *spa_entries; + +#define INVALID_SPA ~0ULL + +static struct dentry *fmpm_dfs_dir; +static struct dentry *fmpm_dfs_entries; + +#define CPER_CREATOR_FMP \ + GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3, \ + 0xa0, 0x33, 0x08, 0x75) + +#define CPER_SECTION_TYPE_FMP \ + GUID_INIT(0x5e4706c1, 0x5356, 0x48c6, 0x93, 0x0b, 0x52, 0xf2, \ + 0x12, 0x0a, 0x44, 0x58) + +/** + * DOC: max_nr_entries (byte) + * Maximum number of descriptor entries possible for each FRU. + * + * Values between '1' and '255' are valid. + * No input or '0' will default to FMPM_DEFAULT_MAX_NR_ENTRIES. + */ +static u8 max_nr_entries; +module_param(max_nr_entries, byte, 0644); +MODULE_PARM_DESC(max_nr_entries, + "Maximum number of memory poison descriptor entries per FRU"); + +#define FMPM_DEFAULT_MAX_NR_ENTRIES 8 + +/* Maximum number of FRUs in the system. */ +#define FMPM_MAX_NR_FRU 256 +static unsigned int max_nr_fru; + +/* Total length of record including headers and list of descriptor entries. */ +static size_t max_rec_len; + +/* Total number of SPA entries across all FRUs. */ +static unsigned int spa_nr_entries; + +/* + * Protect the local records cache in fru_records and prevent concurrent + * writes to storage. This is only needed after init once notifier block + * registration is done. + * + * The majority of a record is fixed at module init and will not change + * during run time. The entries within a record will be updated as new + * errors are reported. The mutex should be held whenever the entries are + * accessed during run time. + */ +static DEFINE_MUTEX(fmpm_update_mutex); + +#define for_each_fru(i, rec) \ + for (i = 0; rec = fru_records[i], i < max_nr_fru; i++) + +static inline u32 get_fmp_len(struct fru_rec *rec) +{ + return rec->sec_desc.section_length - sizeof(struct cper_section_descriptor); +} + +static struct fru_rec *get_fru_record(u64 fru_id) +{ + struct fru_rec *rec; + unsigned int i; + + for_each_fru(i, rec) { + if (rec->fmp.fru_id == fru_id) + return rec; + } + + pr_debug("Record not found for FRU 0x%016llx\n", fru_id); + + return NULL; +} + +/* + * Sum up all bytes within the FRU Memory Poison Section including the Memory + * Poison Descriptor entries. + * + * Don't include the old checksum here. It's a u32 value, so summing each of its + * bytes will give the wrong total. + */ +static u32 do_fmp_checksum(struct cper_sec_fru_mem_poison *fmp, u32 len) +{ + u32 checksum = 0; + u8 *buf, *end; + + /* Skip old checksum. */ + buf = (u8 *)fmp + sizeof(u32); + end = buf + len; + + while (buf < end) + checksum += (u8)(*(buf++)); + + return checksum; +} + +static int update_record_on_storage(struct fru_rec *rec) +{ + u32 len, checksum; + int ret; + + /* Calculate a new checksum. */ + len = get_fmp_len(rec); + + /* Get the current total. */ + checksum = do_fmp_checksum(&rec->fmp, len); + + /* Use the complement value. */ + rec->fmp.checksum = -checksum; + + pr_debug("Writing to storage\n"); + + ret = erst_write(&rec->hdr); + if (ret) { + pr_warn("Storage update failed for FRU 0x%016llx\n", rec->fmp.fru_id); + + if (ret == -ENOSPC) + pr_warn("Not enough space on storage\n"); + } + + return ret; +} + +static bool rec_has_valid_entries(struct fru_rec *rec) +{ + if (!(rec->fmp.validation_bits & FMP_VALID_LIST_ENTRIES)) + return false; + + if (!(rec->fmp.validation_bits & FMP_VALID_LIST)) + return false; + + return true; +} + +static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new) +{ + /* + * Ignore timestamp field. + * The same physical error may be reported multiple times due to stuck bits, etc. + * + * Also, order the checks from most->least likely to fail to shortcut the code. + */ + if (old->addr != new->addr) + return false; + + if (old->hw_id != new->hw_id) + return false; + + if (old->addr_type != new->addr_type) + return false; + + if (old->hw_id_type != new->hw_id_type) + return false; + + return true; +} + +static bool rec_has_fpd(struct fru_rec *rec, struct cper_fru_poison_desc *fpd) +{ + unsigned int i; + + for (i = 0; i < rec->fmp.nr_entries; i++) { + struct cper_fru_poison_desc *fpd_i = &rec->entries[i]; + + if (fpds_equal(fpd_i, fpd)) { + pr_debug("Found duplicate record\n"); + return true; + } + } + + return false; +} + +static void save_spa(struct fru_rec *rec, unsigned int entry, + u64 addr, u64 id, unsigned int cpu) +{ + unsigned int i, fru_idx, spa_entry; + struct atl_err a_err; + unsigned long spa; + + if (entry >= max_nr_entries) { + pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n", + entry, max_nr_entries); + return; + } + + /* spa_nr_entries is always multiple of max_nr_entries */ + for (i = 0; i < spa_nr_entries; i += max_nr_entries) { + fru_idx = i / max_nr_entries; + if (fru_records[fru_idx] == rec) + break; + } + + if (i >= spa_nr_entries) { + pr_warn_once("FRU record %d not found\n", i); + return; + } + + spa_entry = i + entry; + if (spa_entry >= spa_nr_entries) { + pr_warn_once("spa_entries[] index out-of-bounds\n"); + return; + } + + memset(&a_err, 0, sizeof(struct atl_err)); + + a_err.addr = addr; + a_err.ipid = id; + a_err.cpu = cpu; + + spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err); + if (IS_ERR_VALUE(spa)) { + pr_debug("Failed to get system address\n"); + return; + } + + spa_entries[spa_entry] = spa; + pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n", + fru_idx, entry, spa_entry, spa_entries[spa_entry]); +} + +static void update_fru_record(struct fru_rec *rec, struct mce *m) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + struct cper_fru_poison_desc fpd, *fpd_dest; + u32 entry = 0; + + mutex_lock(&fmpm_update_mutex); + + memset(&fpd, 0, sizeof(struct cper_fru_poison_desc)); + + fpd.timestamp = m->time; + fpd.hw_id_type = FPD_HW_ID_TYPE_MCA_IPID; + fpd.hw_id = m->ipid; + fpd.addr_type = FPD_ADDR_TYPE_MCA_ADDR; + fpd.addr = m->addr; + + /* This is the first entry, so just save it. */ + if (!rec_has_valid_entries(rec)) + goto save_fpd; + + /* Ignore already recorded errors. */ + if (rec_has_fpd(rec, &fpd)) + goto out_unlock; + + if (rec->fmp.nr_entries >= max_nr_entries) { + pr_warn("Exceeded number of entries for FRU 0x%016llx\n", rec->fmp.fru_id); + goto out_unlock; + } + + entry = fmp->nr_entries; + +save_fpd: + save_spa(rec, entry, m->addr, m->ipid, m->extcpu); + fpd_dest = &rec->entries[entry]; + memcpy(fpd_dest, &fpd, sizeof(struct cper_fru_poison_desc)); + + fmp->nr_entries = entry + 1; + fmp->validation_bits |= FMP_VALID_LIST_ENTRIES; + fmp->validation_bits |= FMP_VALID_LIST; + + pr_debug("Updated FRU 0x%016llx entry #%u\n", fmp->fru_id, entry); + + update_record_on_storage(rec); + +out_unlock: + mutex_unlock(&fmpm_update_mutex); +} + +static void retire_dram_row(u64 addr, u64 id, u32 cpu) +{ + struct atl_err a_err; + + memset(&a_err, 0, sizeof(struct atl_err)); + + a_err.addr = addr; + a_err.ipid = id; + a_err.cpu = cpu; + + amd_retire_dram_row(&a_err); +} + +static int fru_handle_mem_poison(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *m = (struct mce *)data; + struct fru_rec *rec; + + if (!mce_is_memory_error(m)) + return NOTIFY_DONE; + + retire_dram_row(m->addr, m->ipid, m->extcpu); + + /* + * An invalid FRU ID should not happen on real errors. But it + * could happen from software error injection, etc. + */ + rec = get_fru_record(m->ppin); + if (!rec) + return NOTIFY_DONE; + + update_fru_record(rec, m); + + return NOTIFY_OK; +} + +static struct notifier_block fru_mem_poison_nb = { + .notifier_call = fru_handle_mem_poison, + .priority = MCE_PRIO_LOWEST, +}; + +static void retire_mem_fmp(struct fru_rec *rec) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + unsigned int i, cpu; + + for (i = 0; i < fmp->nr_entries; i++) { + struct cper_fru_poison_desc *fpd = &rec->entries[i]; + unsigned int err_cpu = INVALID_CPU; + + if (fpd->hw_id_type != FPD_HW_ID_TYPE_MCA_IPID) + continue; + + if (fpd->addr_type != FPD_ADDR_TYPE_MCA_ADDR) + continue; + + cpus_read_lock(); + for_each_online_cpu(cpu) { + if (topology_ppin(cpu) == fmp->fru_id) { + err_cpu = cpu; + break; + } + } + cpus_read_unlock(); + + if (err_cpu == INVALID_CPU) + continue; + + retire_dram_row(fpd->addr, fpd->hw_id, err_cpu); + save_spa(rec, i, fpd->addr, fpd->hw_id, err_cpu); + } +} + +static void retire_mem_records(void) +{ + struct fru_rec *rec; + unsigned int i; + + for_each_fru(i, rec) { + if (!rec_has_valid_entries(rec)) + continue; + + retire_mem_fmp(rec); + } +} + +/* Set the CPER Record Header and CPER Section Descriptor fields. */ +static void set_rec_fields(struct fru_rec *rec) +{ + struct cper_section_descriptor *sec_desc = &rec->sec_desc; + struct cper_record_header *hdr = &rec->hdr; + + memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE); + hdr->revision = CPER_RECORD_REV; + hdr->signature_end = CPER_SIG_END; + + /* + * Currently, it is assumed that there is one FRU Memory Poison + * section per CPER. But this may change for other implementations. + */ + hdr->section_count = 1; + + /* The logged errors are recoverable. Otherwise, they'd never make it here. */ + hdr->error_severity = CPER_SEV_RECOVERABLE; + + hdr->validation_bits = 0; + hdr->record_length = max_rec_len; + hdr->creator_id = CPER_CREATOR_FMP; + hdr->notification_type = CPER_NOTIFY_MCE; + hdr->record_id = cper_next_record_id(); + hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR; + + sec_desc->section_offset = sizeof(struct cper_record_header); + sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header); + sec_desc->revision = CPER_SEC_REV; + sec_desc->validation_bits = 0; + sec_desc->flags = CPER_SEC_PRIMARY; + sec_desc->section_type = CPER_SECTION_TYPE_FMP; + sec_desc->section_severity = CPER_SEV_RECOVERABLE; +} + +static int save_new_records(void) +{ + DECLARE_BITMAP(new_records, FMPM_MAX_NR_FRU); + struct fru_rec *rec; + unsigned int i; + int ret = 0; + + for_each_fru(i, rec) { + if (rec->hdr.record_length) + continue; + + set_rec_fields(rec); + + ret = update_record_on_storage(rec); + if (ret) + goto out_clear; + + set_bit(i, new_records); + } + + return ret; + +out_clear: + for_each_fru(i, rec) { + if (!test_bit(i, new_records)) + continue; + + erst_clear(rec->hdr.record_id); + } + + return ret; +} + +/* Check that the record matches expected types for the current system.*/ +static bool fmp_is_usable(struct fru_rec *rec) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + u64 cpuid; + + pr_debug("Validation bits: 0x%016llx\n", fmp->validation_bits); + + if (!(fmp->validation_bits & FMP_VALID_ARCH_TYPE)) { + pr_debug("Arch type unknown\n"); + return false; + } + + if (fmp->fru_arch_type != FMP_ARCH_TYPE_X86_CPUID_1_EAX) { + pr_debug("Arch type not 'x86 Family/Model/Stepping'\n"); + return false; + } + + if (!(fmp->validation_bits & FMP_VALID_ARCH)) { + pr_debug("Arch value unknown\n"); + return false; + } + + cpuid = cpuid_eax(1); + if (fmp->fru_arch != cpuid) { + pr_debug("Arch value mismatch: record = 0x%016llx, system = 0x%016llx\n", + fmp->fru_arch, cpuid); + return false; + } + + if (!(fmp->validation_bits & FMP_VALID_ID_TYPE)) { + pr_debug("FRU ID type unknown\n"); + return false; + } + + if (fmp->fru_id_type != FMP_ID_TYPE_X86_PPIN) { + pr_debug("FRU ID type is not 'x86 PPIN'\n"); + return false; + } + + if (!(fmp->validation_bits & FMP_VALID_ID)) { + pr_debug("FRU ID value unknown\n"); + return false; + } + + return true; +} + +static bool fmp_is_valid(struct fru_rec *rec) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + u32 checksum, len; + + len = get_fmp_len(rec); + if (len < sizeof(struct cper_sec_fru_mem_poison)) { + pr_debug("fmp length is too small\n"); + return false; + } + + /* Checksum must sum to zero for the entire section. */ + checksum = do_fmp_checksum(fmp, len) + fmp->checksum; + if (checksum) { + pr_debug("fmp checksum failed: sum = 0x%x\n", checksum); + print_hex_dump_debug("fmp record: ", DUMP_PREFIX_NONE, 16, 1, fmp, len, false); + return false; + } + + if (!fmp_is_usable(rec)) + return false; + + return true; +} + +static struct fru_rec *get_valid_record(struct fru_rec *old) +{ + struct fru_rec *new; + + if (!fmp_is_valid(old)) { + pr_debug("Ignoring invalid record\n"); + return NULL; + } + + new = get_fru_record(old->fmp.fru_id); + if (!new) + pr_debug("Ignoring record for absent FRU\n"); + + return new; +} + +/* + * Fetch saved records from persistent storage. + * + * For each found record: + * - If it was not created by this module, then ignore it. + * - If it is valid, then copy its data to the local cache. + * - If it is not valid, then erase it. + */ +static int get_saved_records(void) +{ + struct fru_rec *old, *new; + u64 record_id; + int ret, pos; + ssize_t len; + + /* + * Assume saved records match current max size. + * + * However, this may not be true depending on module parameters. + */ + old = kmalloc(max_rec_len, GFP_KERNEL); + if (!old) { + ret = -ENOMEM; + goto out; + } + + ret = erst_get_record_id_begin(&pos); + if (ret < 0) + goto out_end; + + while (!erst_get_record_id_next(&pos, &record_id)) { + if (record_id == APEI_ERST_INVALID_RECORD_ID) + goto out_end; + /* + * Make sure to clear temporary buffer between reads to avoid + * leftover data from records of various sizes. + */ + memset(old, 0, max_rec_len); + + len = erst_read_record(record_id, &old->hdr, max_rec_len, + sizeof(struct fru_rec), &CPER_CREATOR_FMP); + if (len < 0) + continue; + + if (len > max_rec_len) { + pr_debug("Found record larger than max_rec_len\n"); + continue; + } + + new = get_valid_record(old); + if (!new) + erst_clear(record_id); + + /* Restore the record */ + memcpy(new, old, len); + } + +out_end: + erst_get_record_id_end(); + kfree(old); +out: + return ret; +} + +static void set_fmp_fields(struct fru_rec *rec, unsigned int cpu) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + + fmp->fru_arch_type = FMP_ARCH_TYPE_X86_CPUID_1_EAX; + fmp->validation_bits |= FMP_VALID_ARCH_TYPE; + + /* Assume all CPUs in the system have the same value for now. */ + fmp->fru_arch = cpuid_eax(1); + fmp->validation_bits |= FMP_VALID_ARCH; + + fmp->fru_id_type = FMP_ID_TYPE_X86_PPIN; + fmp->validation_bits |= FMP_VALID_ID_TYPE; + + fmp->fru_id = topology_ppin(cpu); + fmp->validation_bits |= FMP_VALID_ID; +} + +static int init_fmps(void) +{ + struct fru_rec *rec; + unsigned int i, cpu; + int ret = 0; + + for_each_fru(i, rec) { + unsigned int fru_cpu = INVALID_CPU; + + cpus_read_lock(); + for_each_online_cpu(cpu) { + if (topology_physical_package_id(cpu) == i) { + fru_cpu = cpu; + break; + } + } + cpus_read_unlock(); + + if (fru_cpu == INVALID_CPU) { + pr_debug("Failed to find matching CPU for FRU #%u\n", i); + ret = -ENODEV; + break; + } + + set_fmp_fields(rec, fru_cpu); + } + + return ret; +} + +static int get_system_info(void) +{ + /* Only load on MI300A systems for now. */ + if (!(boot_cpu_data.x86_model >= 0x90 && + boot_cpu_data.x86_model <= 0x9f)) + return -ENODEV; + + if (!cpu_feature_enabled(X86_FEATURE_AMD_PPIN)) { + pr_debug("PPIN feature not available\n"); + return -ENODEV; + } + + /* Use CPU socket as FRU for MI300 systems. */ + max_nr_fru = topology_max_packages(); + if (!max_nr_fru) + return -ENODEV; + + if (max_nr_fru > FMPM_MAX_NR_FRU) { + pr_warn("Too many FRUs to manage: found: %u, max: %u\n", + max_nr_fru, FMPM_MAX_NR_FRU); + return -ENODEV; + } + + if (!max_nr_entries) + max_nr_entries = FMPM_DEFAULT_MAX_NR_ENTRIES; + + spa_nr_entries = max_nr_fru * max_nr_entries; + + max_rec_len = sizeof(struct fru_rec); + max_rec_len += sizeof(struct cper_fru_poison_desc) * max_nr_entries; + + pr_info("max FRUs: %u, max entries: %u, max record length: %lu\n", + max_nr_fru, max_nr_entries, max_rec_len); + + return 0; +} + +static void free_records(void) +{ + struct fru_rec *rec; + int i; + + for_each_fru(i, rec) + kfree(rec); + + kfree(fru_records); + kfree(spa_entries); +} + +static int allocate_records(void) +{ + int i, ret = 0; + + fru_records = kcalloc(max_nr_fru, sizeof(struct fru_rec *), GFP_KERNEL); + if (!fru_records) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < max_nr_fru; i++) { + fru_records[i] = kzalloc(max_rec_len, GFP_KERNEL); + if (!fru_records[i]) { + ret = -ENOMEM; + goto out_free; + } + } + + spa_entries = kcalloc(spa_nr_entries, sizeof(u64), GFP_KERNEL); + if (!spa_entries) { + ret = -ENOMEM; + goto out_free; + } + + for (i = 0; i < spa_nr_entries; i++) + spa_entries[i] = INVALID_SPA; + + return ret; + +out_free: + while (--i >= 0) + kfree(fru_records[i]); + + kfree(fru_records); +out: + return ret; +} + +static void *fmpm_start(struct seq_file *f, loff_t *pos) +{ + if (*pos >= (spa_nr_entries + 1)) + return NULL; + return pos; +} + +static void *fmpm_next(struct seq_file *f, void *data, loff_t *pos) +{ + if (++(*pos) >= (spa_nr_entries + 1)) + return NULL; + return pos; +} + +static void fmpm_stop(struct seq_file *f, void *data) +{ +} + +#define SHORT_WIDTH 8 +#define U64_WIDTH 18 +#define TIMESTAMP_WIDTH 19 +#define LONG_WIDTH 24 +#define U64_PAD (LONG_WIDTH - U64_WIDTH) +#define TS_PAD (LONG_WIDTH - TIMESTAMP_WIDTH) +static int fmpm_show(struct seq_file *f, void *data) +{ + unsigned int fru_idx, entry, spa_entry, line; + struct cper_fru_poison_desc *fpd; + struct fru_rec *rec; + + line = *(loff_t *)data; + if (line == 0) { + seq_printf(f, "%-*s", SHORT_WIDTH, "fru_idx"); + seq_printf(f, "%-*s", LONG_WIDTH, "fru_id"); + seq_printf(f, "%-*s", SHORT_WIDTH, "entry"); + seq_printf(f, "%-*s", LONG_WIDTH, "timestamp"); + seq_printf(f, "%-*s", LONG_WIDTH, "hw_id"); + seq_printf(f, "%-*s", LONG_WIDTH, "addr"); + seq_printf(f, "%-*s", LONG_WIDTH, "spa"); + goto out_newline; + } + + spa_entry = line - 1; + fru_idx = spa_entry / max_nr_entries; + entry = spa_entry % max_nr_entries; + + rec = fru_records[fru_idx]; + if (!rec) + goto out; + + seq_printf(f, "%-*u", SHORT_WIDTH, fru_idx); + seq_printf(f, "0x%016llx%-*s", rec->fmp.fru_id, U64_PAD, ""); + seq_printf(f, "%-*u", SHORT_WIDTH, entry); + + mutex_lock(&fmpm_update_mutex); + + if (entry >= rec->fmp.nr_entries) { + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + goto out_unlock; + } + + fpd = &rec->entries[entry]; + + seq_printf(f, "%ptT%-*s", &fpd->timestamp, TS_PAD, ""); + seq_printf(f, "0x%016llx%-*s", fpd->hw_id, U64_PAD, ""); + seq_printf(f, "0x%016llx%-*s", fpd->addr, U64_PAD, ""); + + if (spa_entries[spa_entry] == INVALID_SPA) + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + else + seq_printf(f, "0x%016llx%-*s", spa_entries[spa_entry], U64_PAD, ""); + +out_unlock: + mutex_unlock(&fmpm_update_mutex); +out_newline: + seq_putc(f, '\n'); +out: + return 0; +} + +static const struct seq_operations fmpm_seq_ops = { + .start = fmpm_start, + .next = fmpm_next, + .stop = fmpm_stop, + .show = fmpm_show, +}; + +static int fmpm_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &fmpm_seq_ops); +} + +static const struct file_operations fmpm_fops = { + .open = fmpm_open, + .release = seq_release, + .read = seq_read, + .llseek = seq_lseek, +}; + +static void setup_debugfs(void) +{ + struct dentry *dfs = ras_get_debugfs_root(); + + if (!dfs) + return; + + fmpm_dfs_dir = debugfs_create_dir("fmpm", dfs); + if (!fmpm_dfs_dir) + return; + + fmpm_dfs_entries = debugfs_create_file("entries", 0400, fmpm_dfs_dir, NULL, &fmpm_fops); + if (!fmpm_dfs_entries) + debugfs_remove(fmpm_dfs_dir); +} + +static const struct x86_cpu_id fmpm_cpuids[] = { + X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL), + { } +}; +MODULE_DEVICE_TABLE(x86cpu, fmpm_cpuids); + +static int __init fru_mem_poison_init(void) +{ + int ret; + + if (!x86_match_cpu(fmpm_cpuids)) { + ret = -ENODEV; + goto out; + } + + if (erst_disable) { + pr_debug("ERST not available\n"); + ret = -ENODEV; + goto out; + } + + ret = get_system_info(); + if (ret) + goto out; + + ret = allocate_records(); + if (ret) + goto out; + + ret = init_fmps(); + if (ret) + goto out_free; + + ret = get_saved_records(); + if (ret) + goto out_free; + + ret = save_new_records(); + if (ret) + goto out_free; + + setup_debugfs(); + + retire_mem_records(); + + mce_register_decode_chain(&fru_mem_poison_nb); + + pr_info("FRU Memory Poison Manager initialized\n"); + return 0; + +out_free: + free_records(); +out: + return ret; +} + +static void __exit fru_mem_poison_exit(void) +{ + mce_unregister_decode_chain(&fru_mem_poison_nb); + debugfs_remove(fmpm_dfs_dir); + free_records(); +} + +module_init(fru_mem_poison_init); +module_exit(fru_mem_poison_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("FRU Memory Poison Manager"); diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index 321af498ee11..e440b15fbabc 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -480,9 +480,15 @@ DEFINE_SHOW_ATTRIBUTE(array); static int __init create_debugfs_nodes(void) { - struct dentry *d, *pfn, *decay, *count, *array; + struct dentry *d, *pfn, *decay, *count, *array, *dfs; - d = debugfs_create_dir("cec", ras_debugfs_dir); + dfs = ras_get_debugfs_root(); + if (!dfs) { + pr_warn("Error getting RAS debugfs root!\n"); + return -1; + } + + d = debugfs_create_dir("cec", dfs); if (!d) { pr_warn("Error creating cec debugfs node!\n"); return -1; diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c index ffb973c328e3..42afd3de68b2 100644 --- a/drivers/ras/debugfs.c +++ b/drivers/ras/debugfs.c @@ -3,10 +3,16 @@ #include #include "debugfs.h" -struct dentry *ras_debugfs_dir; +static struct dentry *ras_debugfs_dir; static atomic_t trace_count = ATOMIC_INIT(0); +struct dentry *ras_get_debugfs_root(void) +{ + return ras_debugfs_dir; +} +EXPORT_SYMBOL_GPL(ras_get_debugfs_root); + int ras_userspace_consumers(void) { return atomic_read(&trace_count); diff --git a/drivers/ras/debugfs.h b/drivers/ras/debugfs.h index c07443b462ad..4749ccdeeba1 100644 --- a/drivers/ras/debugfs.h +++ b/drivers/ras/debugfs.h @@ -4,6 +4,6 @@ #include -extern struct dentry *ras_debugfs_dir; +struct dentry *ras_get_debugfs_root(void); #endif /* __RAS_DEBUGFS_H__ */ diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 95540ea8dd9d..a6e4792a1b2e 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -10,6 +10,37 @@ #include #include +#if IS_ENABLED(CONFIG_AMD_ATL) +/* + * Once set, this function pointer should never be unset. + * + * The library module will set this pointer if it successfully loads. The module + * should not be unloaded except for testing and debug purposes. + */ +static unsigned long (*amd_atl_umc_na_to_spa)(struct atl_err *err); + +void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)) +{ + amd_atl_umc_na_to_spa = f; +} +EXPORT_SYMBOL_GPL(amd_atl_register_decoder); + +void amd_atl_unregister_decoder(void) +{ + amd_atl_umc_na_to_spa = NULL; +} +EXPORT_SYMBOL_GPL(amd_atl_unregister_decoder); + +unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) +{ + if (!amd_atl_umc_na_to_spa) + return -EINVAL; + + return amd_atl_umc_na_to_spa(err); +} +EXPORT_SYMBOL_GPL(amd_convert_umc_mca_addr_to_sys_addr); +#endif /* CONFIG_AMD_ATL */ + #define CREATE_TRACE_POINTS #define TRACE_INCLUDE_PATH ../../include/ras #include diff --git a/include/linux/ras.h b/include/linux/ras.h index 1f4048bf2674..a64182bc72ad 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -25,6 +25,7 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); void log_arm_hw_error(struct cper_sec_proc_arm *err); + #else static inline void log_non_standard_event(const guid_t *sec_type, @@ -35,4 +36,21 @@ static inline void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif +struct atl_err { + u64 addr; + u64 ipid; + u32 cpu; +}; + +#if IS_ENABLED(CONFIG_AMD_ATL) +void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)); +void amd_atl_unregister_decoder(void); +void amd_retire_dram_row(struct atl_err *err); +unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err); +#else +static inline void amd_retire_dram_row(struct atl_err *err) { } +static inline unsigned long +amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } +#endif /* CONFIG_AMD_ATL */ + #endif /* __RAS_H__ */