Merge branch 'for-6.11/xor_fixes' into cxl-for-next

Series to fix XOR math for DPA to SPA translation
- Refactor and fold cxl_trace_hpa() into cxl_dpa_to_hpa()
- Complete DPA->HPA->SPA translation and correct XOR translation issue
- Add new method to verify a CXL target position
- Remove old method of CXL target position verifiation
This commit is contained in:
Dave Jiang 2024-07-11 16:30:18 -07:00
commit 5647847556
7 changed files with 77 additions and 113 deletions

View File

@ -22,56 +22,42 @@ static const guid_t acpi_cxl_qtg_id_guid =
GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071,
0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52);
/*
* Find a targets entry (n) in the host bridge interleave list.
* CXL Specification 3.0 Table 9-22
*/
static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw,
int ig)
{
int i = 0, n = 0;
u8 eiw;
/* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */
if (iw != 3) {
for (i = 0; i < cximsd->nr_maps; i++)
n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i;
}
/* IW: 3,6,12 add a modulo calculation to 'n' */
if (!is_power_of_2(iw)) {
if (ways_to_eiw(iw, &eiw))
return -1;
hpa &= GENMASK_ULL(51, eiw + ig);
n |= do_div(hpa, 3) << i;
}
return n;
}
static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa)
{
struct cxl_cxims_data *cximsd = cxlrd->platform_data;
struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
struct cxl_decoder *cxld = &cxlsd->cxld;
int ig = cxld->interleave_granularity;
int iw = cxld->interleave_ways;
int n = 0;
u64 hpa;
int hbiw = cxlrd->cxlsd.nr_targets;
u64 val;
int pos;
if (dev_WARN_ONCE(&cxld->dev,
cxld->interleave_ways != cxlsd->nr_targets,
"misconfigured root decoder\n"))
return NULL;
/* No xormaps for host bridge interleave ways of 1 or 3 */
if (hbiw == 1 || hbiw == 3)
return hpa;
hpa = cxlrd->res->start + pos * ig;
/*
* For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore
* the position bit to its value before the xormap was applied at
* HPA->DPA translation.
*
* pos is the lowest set bit in an XORMAP
* val is the XORALLBITS(HPA & XORMAP)
*
* XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS
* as an operation that outputs a single bit by XORing all the
* bits in the input (hpa & xormap). Implement XORALLBITS using
* hweight64(). If the hamming weight is even the XOR of those
* bits results in val==0, if odd the XOR result is val==1.
*/
/* Entry (n) is 0 for no interleave (iw == 1) */
if (iw != 1)
n = cxl_xor_calc_n(hpa, cximsd, iw, ig);
for (int i = 0; i < cximsd->nr_maps; i++) {
if (!cximsd->xormaps[i])
continue;
pos = __ffs(cximsd->xormaps[i]);
val = (hweight64(hpa & cximsd->xormaps[i]) & 1);
hpa = (hpa & ~(1ULL << pos)) | (val << pos);
}
if (n < 0)
return NULL;
return cxlrd->cxlsd.target[n];
return hpa;
}
struct cxl_cxims_context {
@ -361,7 +347,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
struct cxl_port *root_port = ctx->root_port;
struct cxl_cxims_context cxims_ctx;
struct device *dev = ctx->dev;
cxl_calc_hb_fn cxl_calc_hb;
struct cxl_decoder *cxld;
unsigned int ways, i, ig;
int rc;
@ -389,13 +374,9 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
if (rc)
return rc;
if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO)
cxl_calc_hb = cxl_hb_modulo;
else
cxl_calc_hb = cxl_hb_xor;
struct cxl_root_decoder *cxlrd __free(put_cxlrd) =
cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
cxl_root_decoder_alloc(root_port, ways);
if (IS_ERR(cxlrd))
return PTR_ERR(cxlrd);
@ -434,6 +415,9 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
cxlrd->qos_class = cfmws->qtg_id;
if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR)
cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa;
rc = cxl_decoder_add(cxld, target_map);
if (rc)
return rc;

View File

@ -28,12 +28,12 @@ int cxl_region_init(void);
void cxl_region_exit(void);
int cxl_get_poison_by_endpoint(struct cxl_port *port);
struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
u64 cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
u64 dpa);
u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
u64 dpa);
#else
static inline u64
cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa)
static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
const struct cxl_memdev *cxlmd, u64 dpa)
{
return ULLONG_MAX;
}

View File

@ -878,7 +878,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK;
cxlr = cxl_dpa_to_region(cxlmd, dpa);
if (cxlr)
hpa = cxl_trace_hpa(cxlr, cxlmd, dpa);
hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa);
if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
trace_cxl_general_media(cxlmd, type, cxlr, hpa,

View File

@ -1733,21 +1733,6 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd,
return 0;
}
struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
{
struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
struct cxl_decoder *cxld = &cxlsd->cxld;
int iw;
iw = cxld->interleave_ways;
if (dev_WARN_ONCE(&cxld->dev, iw != cxlsd->nr_targets,
"misconfigured root decoder\n"))
return NULL;
return cxlrd->cxlsd.target[pos % iw];
}
EXPORT_SYMBOL_NS_GPL(cxl_hb_modulo, CXL);
static struct lock_class_key cxl_decoder_key;
/**
@ -1807,7 +1792,6 @@ static int cxl_switch_decoder_init(struct cxl_port *port,
* cxl_root_decoder_alloc - Allocate a root level decoder
* @port: owning CXL root of this decoder
* @nr_targets: static number of downstream targets
* @calc_hb: which host bridge covers the n'th position by granularity
*
* Return: A new cxl decoder to be registered by cxl_decoder_add(). A
* 'CXL root' decoder is one that decodes from a top-level / static platform
@ -1815,8 +1799,7 @@ static int cxl_switch_decoder_init(struct cxl_port *port,
* topology.
*/
struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets,
cxl_calc_hb_fn calc_hb)
unsigned int nr_targets)
{
struct cxl_root_decoder *cxlrd;
struct cxl_switch_decoder *cxlsd;
@ -1838,7 +1821,6 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
return ERR_PTR(rc);
}
cxlrd->calc_hb = calc_hb;
mutex_init(&cxlrd->range_lock);
cxld = &cxlsd->cxld;

View File

@ -1560,10 +1560,13 @@ static int cxl_region_attach_position(struct cxl_region *cxlr,
const struct cxl_dport *dport, int pos)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
struct cxl_decoder *cxld = &cxlsd->cxld;
int iw = cxld->interleave_ways;
struct cxl_port *iter;
int rc;
if (cxlrd->calc_hb(cxlrd, pos) != dport) {
if (dport != cxlrd->cxlsd.target[pos % iw]) {
dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
dev_name(&cxlrd->cxlsd.cxld.dev));
@ -2759,20 +2762,13 @@ struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa)
return ctx.cxlr;
}
static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos)
static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos)
{
struct cxl_region_params *p = &cxlr->params;
int gran = p->interleave_granularity;
int ways = p->interleave_ways;
u64 offset;
/* Is the hpa within this region at all */
if (hpa < p->res->start || hpa > p->res->end) {
dev_dbg(&cxlr->dev,
"Addr trans fail: hpa 0x%llx not in region\n", hpa);
return false;
}
/* Is the hpa in an expected chunk for its pos(-ition) */
offset = hpa - p->res->start;
offset = do_div(offset, gran * ways);
@ -2785,15 +2781,26 @@ static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos)
return false;
}
static u64 cxl_dpa_to_hpa(u64 dpa, struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled)
u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
u64 dpa)
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
struct cxl_region_params *p = &cxlr->params;
int pos = cxled->pos;
struct cxl_endpoint_decoder *cxled = NULL;
u16 eig = 0;
u8 eiw = 0;
int pos;
for (int i = 0; i < p->nr_targets; i++) {
cxled = p->targets[i];
if (cxlmd == cxled_to_memdev(cxled))
break;
}
if (!cxled || cxlmd != cxled_to_memdev(cxled))
return ULLONG_MAX;
pos = cxled->pos;
ways_to_eiw(p->interleave_ways, &eiw);
granularity_to_eig(p->interleave_granularity, &eig);
@ -2827,29 +2834,23 @@ static u64 cxl_dpa_to_hpa(u64 dpa, struct cxl_region *cxlr,
/* Apply the hpa_offset to the region base address */
hpa = hpa_offset + p->res->start;
if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos))
/* Root decoder translation overrides typical modulo decode */
if (cxlrd->hpa_to_spa)
hpa = cxlrd->hpa_to_spa(cxlrd, hpa);
if (hpa < p->res->start || hpa > p->res->end) {
dev_dbg(&cxlr->dev,
"Addr trans fail: hpa 0x%llx not in region\n", hpa);
return ULLONG_MAX;
}
/* Simple chunk check, by pos & gran, only applies to modulo decodes */
if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos)))
return ULLONG_MAX;
return hpa;
}
u64 cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
u64 dpa)
{
struct cxl_region_params *p = &cxlr->params;
struct cxl_endpoint_decoder *cxled = NULL;
for (int i = 0; i < p->nr_targets; i++) {
cxled = p->targets[i];
if (cxlmd == cxled_to_memdev(cxled))
break;
}
if (!cxled || cxlmd != cxled_to_memdev(cxled))
return ULLONG_MAX;
return cxl_dpa_to_hpa(dpa, cxlr, cxled);
}
static struct lock_class_key cxl_pmem_region_key;
static int cxl_pmem_region_alloc(struct cxl_region *cxlr)

View File

@ -704,8 +704,8 @@ TRACE_EVENT(cxl_poison,
if (cxlr) {
__assign_str(region);
memcpy(__entry->uuid, &cxlr->params.uuid, 16);
__entry->hpa = cxl_trace_hpa(cxlr, cxlmd,
__entry->dpa);
__entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd,
__entry->dpa);
} else {
__assign_str(region);
memset(__entry->uuid, 0, 16);

View File

@ -432,14 +432,13 @@ struct cxl_switch_decoder {
};
struct cxl_root_decoder;
typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd,
int pos);
typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa);
/**
* struct cxl_root_decoder - Static platform CXL address decoder
* @res: host / parent resource for region allocations
* @region_id: region id for next region provisioning event
* @calc_hb: which host bridge covers the n'th position by granularity
* @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address
* @platform_data: platform specific configuration data
* @range_lock: sync region autodiscovery by address range
* @qos_class: QoS performance class cookie
@ -448,7 +447,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd,
struct cxl_root_decoder {
struct resource *res;
atomic_t region_id;
cxl_calc_hb_fn calc_hb;
cxl_hpa_to_spa_fn hpa_to_spa;
void *platform_data;
struct mutex range_lock;
int qos_class;
@ -774,9 +773,7 @@ bool is_root_decoder(struct device *dev);
bool is_switch_decoder(struct device *dev);
bool is_endpoint_decoder(struct device *dev);
struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets,
cxl_calc_hb_fn calc_hb);
struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos);
unsigned int nr_targets);
struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets);
int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);