linux/drivers/cxl/core/memdev.c

389 lines
9.3 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. */
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/pci.h>
#include <cxlmem.h>
#include "core.h"
static DECLARE_RWSEM(cxl_memdev_rwsem);
/*
* An entire PCI topology full of devices should be enough for any
* config
*/
#define CXL_MEM_MAX_DEVS 65536
static int cxl_mem_major;
static DEFINE_IDA(cxl_memdev_ida);
static void cxl_memdev_release(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
ida_free(&cxl_memdev_ida, cxlmd->id);
kfree(cxlmd);
}
static char *cxl_memdev_devnode(struct device *dev, umode_t *mode, kuid_t *uid,
kgid_t *gid)
{
return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
}
static ssize_t firmware_version_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
return sysfs_emit(buf, "%.16s\n", cxlds->firmware_version);
}
static DEVICE_ATTR_RO(firmware_version);
static ssize_t payload_max_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
return sysfs_emit(buf, "%zu\n", cxlds->payload_size);
}
static DEVICE_ATTR_RO(payload_max);
static ssize_t label_storage_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
return sysfs_emit(buf, "%zu\n", cxlds->lsa_size);
}
static DEVICE_ATTR_RO(label_storage_size);
static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
unsigned long long len = resource_size(&cxlds->ram_res);
return sysfs_emit(buf, "%#llx\n", len);
}
static struct device_attribute dev_attr_ram_size =
__ATTR(size, 0444, ram_size_show, NULL);
static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
unsigned long long len = resource_size(&cxlds->pmem_res);
return sysfs_emit(buf, "%#llx\n", len);
}
static struct device_attribute dev_attr_pmem_size =
__ATTR(size, 0444, pmem_size_show, NULL);
static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
return sysfs_emit(buf, "%#llx\n", cxlds->serial);
}
static DEVICE_ATTR_RO(serial);
static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%d\n", dev_to_node(dev));
}
static DEVICE_ATTR_RO(numa_node);
static struct attribute *cxl_memdev_attributes[] = {
&dev_attr_serial.attr,
&dev_attr_firmware_version.attr,
&dev_attr_payload_max.attr,
&dev_attr_label_storage_size.attr,
&dev_attr_numa_node.attr,
NULL,
};
static struct attribute *cxl_memdev_pmem_attributes[] = {
&dev_attr_pmem_size.attr,
NULL,
};
static struct attribute *cxl_memdev_ram_attributes[] = {
&dev_attr_ram_size.attr,
NULL,
};
static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
int n)
{
if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr)
return 0;
return a->mode;
}
static struct attribute_group cxl_memdev_attribute_group = {
.attrs = cxl_memdev_attributes,
.is_visible = cxl_memdev_visible,
};
static struct attribute_group cxl_memdev_ram_attribute_group = {
.name = "ram",
.attrs = cxl_memdev_ram_attributes,
};
static struct attribute_group cxl_memdev_pmem_attribute_group = {
.name = "pmem",
.attrs = cxl_memdev_pmem_attributes,
};
static const struct attribute_group *cxl_memdev_attribute_groups[] = {
&cxl_memdev_attribute_group,
&cxl_memdev_ram_attribute_group,
&cxl_memdev_pmem_attribute_group,
NULL,
};
static const struct device_type cxl_memdev_type = {
.name = "cxl_memdev",
.release = cxl_memdev_release,
.devnode = cxl_memdev_devnode,
.groups = cxl_memdev_attribute_groups,
};
cxl/mem: Add the cxl_mem driver At this point the subsystem can enumerate all CXL ports (CXL.mem decode resources in upstream switch ports and host bridges) in a system. The last mile is connecting those ports to endpoints. The cxl_mem driver connects an endpoint device to the platform CXL.mem protoctol decode-topology. At ->probe() time it walks its device-topology-ancestry and adds a CXL Port object at every Upstream Port hop until it gets to CXL root. The CXL root object is only present after a platform firmware driver registers platform CXL resources. For ACPI based platform this is managed by the ACPI0017 device and the cxl_acpi driver. The ports are registered such that disabling a given port automatically unregisters all descendant ports, and the chain can only be registered after the root is established. Given ACPI device scanning may run asynchronously compared to PCI device scanning the root driver is tasked with rescanning the bus after the root successfully probes. Conversely if any ports in a chain between the root and an endpoint becomes disconnected it subsequently triggers the endpoint to unregister. Given lock depenedencies the endpoint unregistration happens in a workqueue asynchronously. If userspace cares about synchronizing delayed work after port events the /sys/bus/cxl/flush attribute is available for that purpose. Reported-by: Randy Dunlap <rdunlap@infradead.org> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [djbw: clarify changelog, rework hotplug support] Link: https://lore.kernel.org/r/164398782997.903003.9725273241627693186.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-02-04 23:18:31 +08:00
bool is_cxl_memdev(struct device *dev)
{
return dev->type == &cxl_memdev_type;
}
EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
/**
* set_exclusive_cxl_commands() - atomically disable user cxl commands
* @cxlds: The device state to operate on
* @cmds: bitmap of commands to mark exclusive
*
* Grab the cxl_memdev_rwsem in write mode to flush in-flight
* invocations of the ioctl path and then disable future execution of
* commands with the command ids set in @cmds.
*/
void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
{
down_write(&cxl_memdev_rwsem);
bitmap_or(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem);
}
EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL);
/**
* clear_exclusive_cxl_commands() - atomically enable user cxl commands
* @cxlds: The device state to modify
* @cmds: bitmap of commands to mark available for userspace
*/
void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
{
down_write(&cxl_memdev_rwsem);
bitmap_andnot(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem);
}
EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
static void cxl_memdev_shutdown(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
down_write(&cxl_memdev_rwsem);
cxlmd->cxlds = NULL;
up_write(&cxl_memdev_rwsem);
}
static void cxl_memdev_unregister(void *_cxlmd)
{
struct cxl_memdev *cxlmd = _cxlmd;
struct device *dev = &cxlmd->dev;
cxl_memdev_shutdown(dev);
cdev_device_del(&cxlmd->cdev, dev);
put_device(dev);
}
cxl/mem: Add the cxl_mem driver At this point the subsystem can enumerate all CXL ports (CXL.mem decode resources in upstream switch ports and host bridges) in a system. The last mile is connecting those ports to endpoints. The cxl_mem driver connects an endpoint device to the platform CXL.mem protoctol decode-topology. At ->probe() time it walks its device-topology-ancestry and adds a CXL Port object at every Upstream Port hop until it gets to CXL root. The CXL root object is only present after a platform firmware driver registers platform CXL resources. For ACPI based platform this is managed by the ACPI0017 device and the cxl_acpi driver. The ports are registered such that disabling a given port automatically unregisters all descendant ports, and the chain can only be registered after the root is established. Given ACPI device scanning may run asynchronously compared to PCI device scanning the root driver is tasked with rescanning the bus after the root successfully probes. Conversely if any ports in a chain between the root and an endpoint becomes disconnected it subsequently triggers the endpoint to unregister. Given lock depenedencies the endpoint unregistration happens in a workqueue asynchronously. If userspace cares about synchronizing delayed work after port events the /sys/bus/cxl/flush attribute is available for that purpose. Reported-by: Randy Dunlap <rdunlap@infradead.org> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [djbw: clarify changelog, rework hotplug support] Link: https://lore.kernel.org/r/164398782997.903003.9725273241627693186.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-02-04 23:18:31 +08:00
static void detach_memdev(struct work_struct *work)
{
struct cxl_memdev *cxlmd;
cxlmd = container_of(work, typeof(*cxlmd), detach_work);
device_release_driver(&cxlmd->dev);
put_device(&cxlmd->dev);
}
2022-04-21 23:33:13 +08:00
static struct lock_class_key cxl_memdev_key;
static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
const struct file_operations *fops)
{
struct cxl_memdev *cxlmd;
struct device *dev;
struct cdev *cdev;
int rc;
cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
if (!cxlmd)
return ERR_PTR(-ENOMEM);
rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
if (rc < 0)
goto err;
cxlmd->id = rc;
cxl/memdev: Fix endpoint port removal Testing of ram region support [1], stimulates a long standing bug in cxl_detach_ep() where some cxl_ep_remove() cleanup is skipped due to inability to walk ports after dports have been unregistered. That results in a failure to re-register a memdev after the port is re-enabled leading to a crash like the following: cxl_port_setup_targets: cxl region4: cxl_host_bridge.0:port4 iw: 1 ig: 256 general protection fault, ... [..] RIP: 0010:cxl_region_setup_targets+0x897/0x9e0 [cxl_core] dev_name at include/linux/device.h:700 (inlined by) cxl_port_setup_targets at drivers/cxl/core/region.c:1155 (inlined by) cxl_region_setup_targets at drivers/cxl/core/region.c:1249 [..] Call Trace: <TASK> attach_target+0x39a/0x760 [cxl_core] ? __mutex_unlock_slowpath+0x3a/0x290 cxl_add_to_region+0xb8/0x340 [cxl_core] ? lockdep_hardirqs_on+0x7d/0x100 discover_region+0x4b/0x80 [cxl_port] ? __pfx_discover_region+0x10/0x10 [cxl_port] device_for_each_child+0x58/0x90 cxl_port_probe+0x10e/0x130 [cxl_port] cxl_bus_probe+0x17/0x50 [cxl_core] Change the port ancestry walk to be by depth rather than by dport. This ensures that even if a port has unregistered its dports a deferred memdev cleanup will still be able to cleanup the memdev's interest in that port. The parent_port->dev.driver check is only needed for determining if the bottom up removal beat the top-down removal, but cxl_ep_remove() can always proceed given the port is pinned. That is, the two sources of cxl_ep_remove() are in cxl_detach_ep() and cxl_port_release(), and cxl_port_release() can not run if cxl_detach_ep() holds a reference. Fixes: 2703c16c75ae ("cxl/core/port: Add switch port enumeration") Link: http://lore.kernel.org/r/167564534874.847146.5222419648551436750.stgit@dwillia2-xfh.jf.intel.com [1] Reviewed-by: Vishal Verma <vishal.l.verma@intel.com> Link: https://lore.kernel.org/r/167601992789.1924368.8083994227892600608.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-02-11 09:29:09 +08:00
cxlmd->depth = -1;
dev = &cxlmd->dev;
device_initialize(dev);
2022-04-21 23:33:13 +08:00
lockdep_set_class(&dev->mutex, &cxl_memdev_key);
dev->parent = cxlds->dev;
dev->bus = &cxl_bus_type;
dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
dev->type = &cxl_memdev_type;
device_set_pm_not_required(dev);
cxl/mem: Add the cxl_mem driver At this point the subsystem can enumerate all CXL ports (CXL.mem decode resources in upstream switch ports and host bridges) in a system. The last mile is connecting those ports to endpoints. The cxl_mem driver connects an endpoint device to the platform CXL.mem protoctol decode-topology. At ->probe() time it walks its device-topology-ancestry and adds a CXL Port object at every Upstream Port hop until it gets to CXL root. The CXL root object is only present after a platform firmware driver registers platform CXL resources. For ACPI based platform this is managed by the ACPI0017 device and the cxl_acpi driver. The ports are registered such that disabling a given port automatically unregisters all descendant ports, and the chain can only be registered after the root is established. Given ACPI device scanning may run asynchronously compared to PCI device scanning the root driver is tasked with rescanning the bus after the root successfully probes. Conversely if any ports in a chain between the root and an endpoint becomes disconnected it subsequently triggers the endpoint to unregister. Given lock depenedencies the endpoint unregistration happens in a workqueue asynchronously. If userspace cares about synchronizing delayed work after port events the /sys/bus/cxl/flush attribute is available for that purpose. Reported-by: Randy Dunlap <rdunlap@infradead.org> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [djbw: clarify changelog, rework hotplug support] Link: https://lore.kernel.org/r/164398782997.903003.9725273241627693186.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-02-04 23:18:31 +08:00
INIT_WORK(&cxlmd->detach_work, detach_memdev);
cdev = &cxlmd->cdev;
cdev_init(cdev, fops);
return cxlmd;
err:
kfree(cxlmd);
return ERR_PTR(rc);
}
static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
unsigned long arg)
{
switch (cmd) {
case CXL_MEM_QUERY_COMMANDS:
return cxl_query_cmd(cxlmd, (void __user *)arg);
case CXL_MEM_SEND_COMMAND:
return cxl_send_cmd(cxlmd, (void __user *)arg);
default:
return -ENOTTY;
}
}
static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct cxl_memdev *cxlmd = file->private_data;
int rc = -ENXIO;
down_read(&cxl_memdev_rwsem);
if (cxlmd->cxlds)
rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
up_read(&cxl_memdev_rwsem);
return rc;
}
static int cxl_memdev_open(struct inode *inode, struct file *file)
{
struct cxl_memdev *cxlmd =
container_of(inode->i_cdev, typeof(*cxlmd), cdev);
get_device(&cxlmd->dev);
file->private_data = cxlmd;
return 0;
}
static int cxl_memdev_release_file(struct inode *inode, struct file *file)
{
struct cxl_memdev *cxlmd =
container_of(inode->i_cdev, typeof(*cxlmd), cdev);
put_device(&cxlmd->dev);
return 0;
}
static const struct file_operations cxl_memdev_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = cxl_memdev_ioctl,
.open = cxl_memdev_open,
.release = cxl_memdev_release_file,
.compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
{
struct cxl_memdev *cxlmd;
struct device *dev;
struct cdev *cdev;
int rc;
cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
if (IS_ERR(cxlmd))
return cxlmd;
dev = &cxlmd->dev;
rc = dev_set_name(dev, "mem%d", cxlmd->id);
if (rc)
goto err;
/*
* Activate ioctl operations, no cxl_memdev_rwsem manipulation
* needed as this is ordered with cdev_add() publishing the device.
*/
cxlmd->cxlds = cxlds;
cxlds->cxlmd = cxlmd;
cdev = &cxlmd->cdev;
rc = cdev_device_add(cdev, dev);
if (rc)
goto err;
rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
if (rc)
return ERR_PTR(rc);
return cxlmd;
err:
/*
* The cdev was briefly live, shutdown any ioctl operations that
* saw that state.
*/
cxl_memdev_shutdown(dev);
put_device(dev);
return ERR_PTR(rc);
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
__init int cxl_memdev_init(void)
{
dev_t devt;
int rc;
rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
if (rc)
return rc;
cxl_mem_major = MAJOR(devt);
return 0;
}
void cxl_memdev_exit(void)
{
unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
}