diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index fff2581b8033..3f5627a1210a 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -552,3 +552,37 @@ Description: attribute is only visible for devices supporting the capability. The retrieved errors are logged as kernel events when cxl_poison event tracing is enabled. + + +What: /sys/bus/cxl/devices/regionZ/accessY/read_bandwidth + /sys/bus/cxl/devices/regionZ/accessY/write_banwidth +Date: Jan, 2024 +KernelVersion: v6.9 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The aggregated read or write bandwidth of the region. The + number is the accumulated read or write bandwidth of all CXL memory + devices that contributes to the region in MB/s. It is + identical data that should appear in + /sys/devices/system/node/nodeX/accessY/initiators/read_bandwidth or + /sys/devices/system/node/nodeX/accessY/initiators/write_bandwidth. + See Documentation/ABI/stable/sysfs-devices-node. access0 provides + the number to the closest initiator and access1 provides the + number to the closest CPU. + + +What: /sys/bus/cxl/devices/regionZ/accessY/read_latency + /sys/bus/cxl/devices/regionZ/accessY/write_latency +Date: Jan, 2024 +KernelVersion: v6.9 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The read or write latency of the region. The number is + the worst read or write latency of all CXL memory devices that + contributes to the region in nanoseconds. It is identical data + that should appear in + /sys/devices/system/node/nodeX/accessY/initiators/read_latency or + /sys/devices/system/node/nodeX/accessY/initiators/write_latency. + See Documentation/ABI/stable/sysfs-devices-node. access0 provides + the number to the closest initiator and access1 provides the + number to the closest CPU. diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index d6b85f0f6082..2c8ccc91ebe6 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -59,9 +59,8 @@ struct target_cache { }; enum { - NODE_ACCESS_CLASS_0 = 0, - NODE_ACCESS_CLASS_1, - NODE_ACCESS_CLASS_GENPORT_SINK, + NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL = ACCESS_COORDINATE_MAX, + NODE_ACCESS_CLASS_GENPORT_SINK_CPU, NODE_ACCESS_CLASS_MAX, }; @@ -75,6 +74,7 @@ struct memory_target { struct node_cache_attrs cache_attrs; u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE]; bool registered; + bool ext_updated; /* externally updated */ }; struct memory_initiator { @@ -127,7 +127,8 @@ static struct memory_target *acpi_find_genport_target(u32 uid) /** * acpi_get_genport_coordinates - Retrieve the access coordinates for a generic port * @uid: ACPI unique id - * @coord: The access coordinates written back out for the generic port + * @coord: The access coordinates written back out for the generic port. + * Expect 2 levels array. * * Return: 0 on success. Errno on failure. * @@ -143,7 +144,10 @@ int acpi_get_genport_coordinates(u32 uid, if (!target) return -ENOENT; - *coord = target->coord[NODE_ACCESS_CLASS_GENPORT_SINK]; + coord[ACCESS_COORDINATE_LOCAL] = + target->coord[NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL]; + coord[ACCESS_COORDINATE_CPU] = + target->coord[NODE_ACCESS_CLASS_GENPORT_SINK_CPU]; return 0; } @@ -325,6 +329,35 @@ static void hmat_update_target_access(struct memory_target *target, } } +int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, + enum access_coordinate_class access) +{ + struct memory_target *target; + int pxm; + + if (nid == NUMA_NO_NODE) + return -EINVAL; + + pxm = node_to_pxm(nid); + guard(mutex)(&target_lock); + target = find_mem_target(pxm); + if (!target) + return -ENODEV; + + hmat_update_target_access(target, ACPI_HMAT_READ_LATENCY, + coord->read_latency, access); + hmat_update_target_access(target, ACPI_HMAT_WRITE_LATENCY, + coord->write_latency, access); + hmat_update_target_access(target, ACPI_HMAT_READ_BANDWIDTH, + coord->read_bandwidth, access); + hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH, + coord->write_bandwidth, access); + target->ext_updated = true; + + return 0; +} +EXPORT_SYMBOL_GPL(hmat_update_target_coordinates); + static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc) { struct memory_locality *loc; @@ -374,11 +407,11 @@ static __init void hmat_update_target(unsigned int tgt_pxm, unsigned int init_px if (target && target->processor_pxm == init_pxm) { hmat_update_target_access(target, type, value, - NODE_ACCESS_CLASS_0); + ACCESS_COORDINATE_LOCAL); /* If the node has a CPU, update access 1 */ if (node_state(pxm_to_node(init_pxm), N_CPU)) hmat_update_target_access(target, type, value, - NODE_ACCESS_CLASS_1); + ACCESS_COORDINATE_CPU); } } @@ -696,8 +729,13 @@ static void hmat_update_target_attrs(struct memory_target *target, u32 best = 0; int i; + /* Don't update if an external agent has changed the data. */ + if (target->ext_updated) + return; + /* Don't update for generic port if there's no device handle */ - if (access == NODE_ACCESS_CLASS_GENPORT_SINK && + if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL || + access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) && !(*(u16 *)target->gen_port_device_handle)) return; @@ -709,7 +747,8 @@ static void hmat_update_target_attrs(struct memory_target *target, */ if (target->processor_pxm != PXM_INVAL) { cpu_nid = pxm_to_node(target->processor_pxm); - if (access == 0 || node_state(cpu_nid, N_CPU)) { + if (access == ACCESS_COORDINATE_LOCAL || + node_state(cpu_nid, N_CPU)) { set_bit(target->processor_pxm, p_nodes); return; } @@ -737,7 +776,9 @@ static void hmat_update_target_attrs(struct memory_target *target, list_for_each_entry(initiator, &initiators, node) { u32 value; - if (access == 1 && !initiator->has_cpu) { + if ((access == ACCESS_COORDINATE_CPU || + access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) && + !initiator->has_cpu) { clear_bit(initiator->processor_pxm, p_nodes); continue; } @@ -770,20 +811,24 @@ static void __hmat_register_target_initiators(struct memory_target *target, } } -static void hmat_register_generic_target_initiators(struct memory_target *target) +static void hmat_update_generic_target(struct memory_target *target) { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); - __hmat_register_target_initiators(target, p_nodes, - NODE_ACCESS_CLASS_GENPORT_SINK); + hmat_update_target_attrs(target, p_nodes, + NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL); + hmat_update_target_attrs(target, p_nodes, + NODE_ACCESS_CLASS_GENPORT_SINK_CPU); } static void hmat_register_target_initiators(struct memory_target *target) { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); - __hmat_register_target_initiators(target, p_nodes, 0); - __hmat_register_target_initiators(target, p_nodes, 1); + __hmat_register_target_initiators(target, p_nodes, + ACCESS_COORDINATE_LOCAL); + __hmat_register_target_initiators(target, p_nodes, + ACCESS_COORDINATE_CPU); } static void hmat_register_target_cache(struct memory_target *target) @@ -835,7 +880,7 @@ static void hmat_register_target(struct memory_target *target) */ mutex_lock(&target_lock); if (*(u16 *)target->gen_port_device_handle) { - hmat_register_generic_target_initiators(target); + hmat_update_generic_target(target); target->registered = true; } mutex_unlock(&target_lock); @@ -854,8 +899,8 @@ static void hmat_register_target(struct memory_target *target) if (!target->registered) { hmat_register_target_initiators(target); hmat_register_target_cache(target); - hmat_register_target_perf(target, NODE_ACCESS_CLASS_0); - hmat_register_target_perf(target, NODE_ACCESS_CLASS_1); + hmat_register_target_perf(target, ACCESS_COORDINATE_LOCAL); + hmat_register_target_perf(target, ACCESS_COORDINATE_CPU); target->registered = true; } mutex_unlock(&target_lock); @@ -927,7 +972,7 @@ static int hmat_calculate_adistance(struct notifier_block *self, return NOTIFY_OK; mutex_lock(&target_lock); - hmat_update_target_attrs(target, p_nodes, 1); + hmat_update_target_attrs(target, p_nodes, ACCESS_COORDINATE_CPU); mutex_unlock(&target_lock); perf = &target->coord[1]; diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 0214518fc582..e45e64993c50 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -29,6 +29,8 @@ static int node_to_pxm_map[MAX_NUMNODES] unsigned char acpi_srat_revision __initdata; static int acpi_numa __initdata; +static int last_real_pxm; + void __init disable_srat(void) { acpi_numa = -1; @@ -536,6 +538,7 @@ int __init acpi_numa_init(void) if (node_to_pxm_map[i] > fake_pxm) fake_pxm = node_to_pxm_map[i]; } + last_real_pxm = fake_pxm; fake_pxm++; acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws, &fake_pxm); @@ -547,6 +550,14 @@ int __init acpi_numa_init(void) return 0; } +bool acpi_node_backed_by_real_pxm(int nid) +{ + int pxm = node_to_pxm(nid); + + return pxm <= last_real_pxm; +} +EXPORT_SYMBOL_GPL(acpi_node_backed_by_real_pxm); + static int acpi_get_pxm(acpi_handle h) { unsigned long long pxm; diff --git a/drivers/base/node.c b/drivers/base/node.c index 1c05640461dd..eb72580288e6 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -126,7 +126,7 @@ static void node_access_release(struct device *dev) } static struct node_access_nodes *node_init_node_access(struct node *node, - unsigned int access) + enum access_coordinate_class access) { struct node_access_nodes *access_node; struct device *dev; @@ -191,7 +191,7 @@ static struct attribute *access_attrs[] = { * @access: The access class the for the given attributes */ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, - unsigned int access) + enum access_coordinate_class access) { struct node_access_nodes *c; struct node *node; @@ -215,6 +215,7 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, } } } +EXPORT_SYMBOL_GPL(node_set_perf_attrs); /** * struct node_cache_info - Internal tracking for memory node caches @@ -689,7 +690,7 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid) */ int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, - unsigned int access) + enum access_coordinate_class access) { struct node *init_node, *targ_node; struct node_access_nodes *initiator, *target; diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 1a3e6aafbdcc..af5cb818f84d 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -530,13 +530,15 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport) if (kstrtou32(acpi_device_uid(hb), 0, &uid)) return -EINVAL; - rc = acpi_get_genport_coordinates(uid, &dport->hb_coord); + rc = acpi_get_genport_coordinates(uid, dport->hb_coord); if (rc < 0) return rc; /* Adjust back to picoseconds from nanoseconds */ - dport->hb_coord.read_latency *= 1000; - dport->hb_coord.write_latency *= 1000; + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + dport->hb_coord[i].read_latency *= 1000; + dport->hb_coord[i].write_latency *= 1000; + } return 0; } diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index c8737e480789..e8c066293b31 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -9,6 +9,7 @@ #include "cxlmem.h" #include "core.h" #include "cxl.h" +#include "core.h" struct dsmas_entry { struct range dpa_range; @@ -162,15 +163,22 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port, static int cxl_port_perf_data_calculate(struct cxl_port *port, struct xarray *dsmas_xa) { - struct access_coordinate c; + struct access_coordinate ep_c; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; struct dsmas_entry *dent; int valid_entries = 0; unsigned long index; int rc; - rc = cxl_endpoint_get_perf_coordinates(port, &c); + rc = cxl_endpoint_get_perf_coordinates(port, &ep_c); if (rc) { - dev_dbg(&port->dev, "Failed to retrieve perf coordinates.\n"); + dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n"); + return rc; + } + + rc = cxl_hb_get_perf_coordinates(port, coord); + if (rc) { + dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n"); return rc; } @@ -185,18 +193,19 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, xa_for_each(dsmas_xa, index, dent) { int qos_class; - dent->coord.read_latency = dent->coord.read_latency + - c.read_latency; - dent->coord.write_latency = dent->coord.write_latency + - c.write_latency; - dent->coord.read_bandwidth = min_t(int, c.read_bandwidth, - dent->coord.read_bandwidth); - dent->coord.write_bandwidth = min_t(int, c.write_bandwidth, - dent->coord.write_bandwidth); - + cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c); + /* + * Keeping the host bridge coordinates separate from the dsmas + * coordinates in order to allow calculation of access class + * 0 and 1 for region later. + */ + cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU], + &coord[ACCESS_COORDINATE_CPU], + &dent->coord); dent->entries = 1; - rc = cxl_root->ops->qos_class(cxl_root, &dent->coord, 1, - &qos_class); + rc = cxl_root->ops->qos_class(cxl_root, + &coord[ACCESS_COORDINATE_CPU], + 1, &qos_class); if (rc != 1) continue; @@ -484,4 +493,101 @@ void cxl_switch_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL); +/** + * cxl_coordinates_combine - Combine the two input coordinates + * + * @out: Output coordinate of c1 and c2 combined + * @c1: input coordinates + * @c2: input coordinates + */ +void cxl_coordinates_combine(struct access_coordinate *out, + struct access_coordinate *c1, + struct access_coordinate *c2) +{ + if (c1->write_bandwidth && c2->write_bandwidth) + out->write_bandwidth = min(c1->write_bandwidth, + c2->write_bandwidth); + out->write_latency = c1->write_latency + c2->write_latency; + + if (c1->read_bandwidth && c2->read_bandwidth) + out->read_bandwidth = min(c1->read_bandwidth, + c2->read_bandwidth); + out->read_latency = c1->read_latency + c2->read_latency; +} + MODULE_IMPORT_NS(CXL); + +void cxl_region_perf_data_calculate(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *port = cxlmd->endpoint; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate coord; + struct range dpa = { + .start = cxled->dpa_res->start, + .end = cxled->dpa_res->end, + }; + struct cxl_dpa_perf *perf; + int rc; + + switch (cxlr->mode) { + case CXL_DECODER_RAM: + perf = &mds->ram_perf; + break; + case CXL_DECODER_PMEM: + perf = &mds->pmem_perf; + break; + default: + return; + } + + lockdep_assert_held(&cxl_dpa_rwsem); + + if (!range_contains(&perf->dpa_range, &dpa)) + return; + + rc = cxl_hb_get_perf_coordinates(port, hb_coord); + if (rc) { + dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n"); + return; + } + + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + /* Pickup the host bridge coords */ + cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord); + + /* Get total bandwidth and the worst latency for the cxl region */ + cxlr->coord[i].read_latency = max_t(unsigned int, + cxlr->coord[i].read_latency, + coord.read_latency); + cxlr->coord[i].write_latency = max_t(unsigned int, + cxlr->coord[i].write_latency, + coord.write_latency); + cxlr->coord[i].read_bandwidth += coord.read_bandwidth; + cxlr->coord[i].write_bandwidth += coord.write_bandwidth; + + /* + * Convert latency to nanosec from picosec to be consistent + * with the resulting latency coordinates computed by the + * HMAT_REPORTING code. + */ + cxlr->coord[i].read_latency = + DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000); + cxlr->coord[i].write_latency = + DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000); + } +} + +int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, + enum access_coordinate_class access) +{ + return hmat_update_target_coordinates(nid, &cxlr->coord[access], access); +} + +bool cxl_need_node_perf_attrs_update(int nid) +{ + return !acpi_node_backed_by_real_pxm(nid); +} diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 3b64fb1b9ed0..bc5a95665aa0 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -90,4 +90,8 @@ enum cxl_poison_trace_type { long cxl_pci_get_latency(struct pci_dev *pdev); +int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, + enum access_coordinate_class access); +bool cxl_need_node_perf_attrs_update(int nid); + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index e59d9d37aa65..9ab542e7af65 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -822,6 +822,7 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, */ port->reg_map = cxlds->reg_map; port->reg_map.host = &port->dev; + cxlmd->endpoint = port; } else if (parent_dport) { rc = dev_set_name(dev, "port%d", port->id); if (rc) @@ -1374,7 +1375,6 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) get_device(host); get_device(&endpoint->dev); - cxlmd->endpoint = endpoint; cxlmd->depth = endpoint->depth; return devm_add_action_or_reset(dev, delete_endpoint, cxlmd); } @@ -2096,18 +2096,36 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd) } EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL); -static void combine_coordinates(struct access_coordinate *c1, - struct access_coordinate *c2) +/** + * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator + * and host bridge + * + * @port: endpoint cxl_port + * @coord: output access coordinates + * + * Return: errno on failure, 0 on success. + */ +int cxl_hb_get_perf_coordinates(struct cxl_port *port, + struct access_coordinate *coord) { - if (c2->write_bandwidth) - c1->write_bandwidth = min(c1->write_bandwidth, - c2->write_bandwidth); - c1->write_latency += c2->write_latency; + struct cxl_port *iter = port; + struct cxl_dport *dport; - if (c2->read_bandwidth) - c1->read_bandwidth = min(c1->read_bandwidth, - c2->read_bandwidth); - c1->read_latency += c2->read_latency; + if (!is_cxl_endpoint(port)) + return -EINVAL; + + dport = iter->parent_dport; + while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) { + iter = to_cxl_port(iter->dev.parent); + dport = iter->parent_dport; + } + + coord[ACCESS_COORDINATE_LOCAL] = + dport->hb_coord[ACCESS_COORDINATE_LOCAL]; + coord[ACCESS_COORDINATE_CPU] = + dport->hb_coord[ACCESS_COORDINATE_CPU]; + + return 0; } /** @@ -2143,7 +2161,7 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, * nothing to gather. */ while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) { - combine_coordinates(&c, &dport->sw_coord); + cxl_coordinates_combine(&c, &c, &dport->sw_coord); c.write_latency += dport->link_latency; c.read_latency += dport->link_latency; @@ -2151,9 +2169,6 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, dport = iter->parent_dport; } - /* Augment with the generic port (host bridge) perf data */ - combine_coordinates(&c, &dport->hb_coord); - /* Get the calculated PCI paths bandwidth */ pdev = to_pci_dev(port->uport_dev->parent); bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 4c7fd2d5cccb..5c186e0a39b9 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,108 @@ static struct cxl_region *to_cxl_region(struct device *dev); +#define __ACCESS_ATTR_RO(_level, _name) { \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = _name##_access##_level##_show, \ +} + +#define ACCESS_DEVICE_ATTR_RO(level, name) \ + struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name) + +#define ACCESS_ATTR_RO(level, attrib) \ +static ssize_t attrib##_access##level##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct cxl_region *cxlr = to_cxl_region(dev); \ + \ + if (cxlr->coord[level].attrib == 0) \ + return -ENOENT; \ + \ + return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib); \ +} \ +static ACCESS_DEVICE_ATTR_RO(level, attrib) + +ACCESS_ATTR_RO(0, read_bandwidth); +ACCESS_ATTR_RO(0, read_latency); +ACCESS_ATTR_RO(0, write_bandwidth); +ACCESS_ATTR_RO(0, write_latency); + +#define ACCESS_ATTR_DECLARE(level, attrib) \ + (&dev_attr_access##level##_##attrib.attr) + +static struct attribute *access0_coordinate_attrs[] = { + ACCESS_ATTR_DECLARE(0, read_bandwidth), + ACCESS_ATTR_DECLARE(0, write_bandwidth), + ACCESS_ATTR_DECLARE(0, read_latency), + ACCESS_ATTR_DECLARE(0, write_latency), + NULL +}; + +ACCESS_ATTR_RO(1, read_bandwidth); +ACCESS_ATTR_RO(1, read_latency); +ACCESS_ATTR_RO(1, write_bandwidth); +ACCESS_ATTR_RO(1, write_latency); + +static struct attribute *access1_coordinate_attrs[] = { + ACCESS_ATTR_DECLARE(1, read_bandwidth), + ACCESS_ATTR_DECLARE(1, write_bandwidth), + ACCESS_ATTR_DECLARE(1, read_latency), + ACCESS_ATTR_DECLARE(1, write_latency), + NULL +}; + +#define ACCESS_VISIBLE(level) \ +static umode_t cxl_region_access##level##_coordinate_visible( \ + struct kobject *kobj, struct attribute *a, int n) \ +{ \ + struct device *dev = kobj_to_dev(kobj); \ + struct cxl_region *cxlr = to_cxl_region(dev); \ + \ + if (a == &dev_attr_access##level##_read_latency.attr && \ + cxlr->coord[level].read_latency == 0) \ + return 0; \ + \ + if (a == &dev_attr_access##level##_write_latency.attr && \ + cxlr->coord[level].write_latency == 0) \ + return 0; \ + \ + if (a == &dev_attr_access##level##_read_bandwidth.attr && \ + cxlr->coord[level].read_bandwidth == 0) \ + return 0; \ + \ + if (a == &dev_attr_access##level##_write_bandwidth.attr && \ + cxlr->coord[level].write_bandwidth == 0) \ + return 0; \ + \ + return a->mode; \ +} + +ACCESS_VISIBLE(0); +ACCESS_VISIBLE(1); + +static const struct attribute_group cxl_region_access0_coordinate_group = { + .name = "access0", + .attrs = access0_coordinate_attrs, + .is_visible = cxl_region_access0_coordinate_visible, +}; + +static const struct attribute_group *get_cxl_region_access0_group(void) +{ + return &cxl_region_access0_coordinate_group; +} + +static const struct attribute_group cxl_region_access1_coordinate_group = { + .name = "access1", + .attrs = access1_coordinate_attrs, + .is_visible = cxl_region_access1_coordinate_visible, +}; + +static const struct attribute_group *get_cxl_region_access1_group(void) +{ + return &cxl_region_access1_coordinate_group; +} + static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1752,6 +1855,8 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -EINVAL; } + cxl_region_perf_data_calculate(cxlr, cxled); + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { int i; @@ -2067,6 +2172,8 @@ static const struct attribute_group *region_groups[] = { &cxl_base_attribute_group, &cxl_region_group, &cxl_region_target_group, + &cxl_region_access0_coordinate_group, + &cxl_region_access1_coordinate_group, NULL, }; @@ -2120,6 +2227,7 @@ static void unregister_region(void *_cxlr) struct cxl_region_params *p = &cxlr->params; int i; + unregister_memory_notifier(&cxlr->memory_notifier); device_del(&cxlr->dev); /* @@ -2164,6 +2272,63 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i return cxlr; } +static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) +{ + int cset = 0; + int rc; + + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + if (cxlr->coord[i].read_bandwidth) { + rc = 0; + if (cxl_need_node_perf_attrs_update(nid)) + node_set_perf_attrs(nid, &cxlr->coord[i], i); + else + rc = cxl_update_hmat_access_coordinates(nid, cxlr, i); + + if (rc == 0) + cset++; + } + } + + if (!cset) + return false; + + rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group()); + if (rc) + dev_dbg(&cxlr->dev, "Failed to update access0 group\n"); + + rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group()); + if (rc) + dev_dbg(&cxlr->dev, "Failed to update access1 group\n"); + + return true; +} + +static int cxl_region_perf_attrs_callback(struct notifier_block *nb, + unsigned long action, void *arg) +{ + struct cxl_region *cxlr = container_of(nb, struct cxl_region, + memory_notifier); + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled = p->targets[0]; + struct cxl_decoder *cxld = &cxled->cxld; + struct memory_notify *mnb = arg; + int nid = mnb->status_change_nid; + int region_nid; + + if (nid == NUMA_NO_NODE || action != MEM_ONLINE) + return NOTIFY_DONE; + + region_nid = phys_to_target_node(cxld->hpa_range.start); + if (nid != region_nid) + return NOTIFY_DONE; + + if (!cxl_region_update_coordinates(cxlr, nid)) + return NOTIFY_DONE; + + return NOTIFY_OK; +} + /** * devm_cxl_add_region - Adds a region to a decoder * @cxlrd: root decoder @@ -2211,6 +2376,10 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, if (rc) goto err; + cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback; + cxlr->memory_notifier.priority = CXL_CALLBACK_PRI; + register_memory_notifier(&cxlr->memory_notifier); + rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr); if (rc) return ERR_PTR(rc); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 003feebab79b..534e25e2f0a4 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -517,6 +518,8 @@ struct cxl_region_params { * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge * @flags: Region state flags * @params: active + config params for the region + * @coord: QoS access coordinates for the region + * @memory_notifier: notifier for setting the access coordinates to node */ struct cxl_region { struct device dev; @@ -527,6 +530,8 @@ struct cxl_region { struct cxl_pmem_region *cxlr_pmem; unsigned long flags; struct cxl_region_params params; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; + struct notifier_block memory_notifier; }; struct cxl_nvdimm_bridge { @@ -671,7 +676,7 @@ struct cxl_dport { struct cxl_port *port; struct cxl_regs regs; struct access_coordinate sw_coord; - struct access_coordinate hb_coord; + struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX]; long link_latency; }; @@ -879,9 +884,17 @@ void cxl_switch_parse_cdat(struct cxl_port *port); int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord); +int cxl_hb_get_perf_coordinates(struct cxl_port *port, + struct access_coordinate *coord); +void cxl_region_perf_data_calculate(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled); void cxl_memdev_update_perf(struct cxl_memdev *cxlmd); +void cxl_coordinates_combine(struct access_coordinate *out, + struct access_coordinate *c1, + struct access_coordinate *c2); + /* * Unit test builds overrides this to __weak, find the 'strong' version * of these symbols in tools/testing/cxl/. diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b7165e52b3c6..2a7c4b90d589 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1547,4 +1547,25 @@ static inline void acpi_use_parent_companion(struct device *dev) ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); } +#ifdef CONFIG_ACPI_HMAT +int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, + enum access_coordinate_class access); +#else +static inline int hmat_update_target_coordinates(int nid, + struct access_coordinate *coord, + enum access_coordinate_class access) +{ + return -EOPNOTSUPP; +} +#endif + +#ifdef CONFIG_ACPI_NUMA +bool acpi_node_backed_by_real_pxm(int nid); +#else +static inline bool acpi_node_backed_by_real_pxm(int nid) +{ + return false; +} +#endif + #endif /*_LINUX_ACPI_H*/ diff --git a/include/linux/memory.h b/include/linux/memory.h index f53cfdaaaa41..d8588256578a 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -114,6 +114,7 @@ struct mem_section; #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 #define HMAT_CALLBACK_PRI 2 +#define CXL_CALLBACK_PRI 5 #define MM_COMPUTE_BATCH_PRI 10 #define CPUSET_CALLBACK_PRI 10 #define MEMTIER_HOTPLUG_PRI 100 diff --git a/include/linux/node.h b/include/linux/node.h index 25b66d705ee2..dfc004e4bee7 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -34,6 +34,18 @@ struct access_coordinate { unsigned int write_latency; }; +/* + * ACCESS_COORDINATE_LOCAL correlates to ACCESS CLASS 0 + * - access_coordinate between target node and nearest initiator node + * ACCESS_COORDINATE_CPU correlates to ACCESS CLASS 1 + * - access_coordinate between target node and nearest CPU node + */ +enum access_coordinate_class { + ACCESS_COORDINATE_LOCAL, + ACCESS_COORDINATE_CPU, + ACCESS_COORDINATE_MAX +}; + enum cache_indexing { NODE_CACHE_DIRECT_MAP, NODE_CACHE_INDEXED, @@ -66,7 +78,7 @@ struct node_cache_attrs { #ifdef CONFIG_HMEM_REPORTING void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, - unsigned access); + enum access_coordinate_class access); #else static inline void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) @@ -75,7 +87,7 @@ static inline void node_add_cache(unsigned int nid, static inline void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, - unsigned access) + enum access_coordinate_class access) { } #endif @@ -137,7 +149,7 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); extern int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, - unsigned access); + enum access_coordinate_class access); #else static inline void node_dev_init(void) {