linux/drivers/cxl/core/memdev.c
Dan Williams 2345df5424 cxl/memdev: Fix endpoint port removal
Testing of ram region support [1], stimulates a long standing bug in
cxl_detach_ep() where some cxl_ep_remove() cleanup is skipped due to
inability to walk ports after dports have been unregistered. That
results in a failure to re-register a memdev after the port is
re-enabled leading to a crash like the following:

    cxl_port_setup_targets: cxl region4: cxl_host_bridge.0:port4 iw: 1 ig: 256
    general protection fault, ...
    [..]
    RIP: 0010:cxl_region_setup_targets+0x897/0x9e0 [cxl_core]
    dev_name at include/linux/device.h:700
    (inlined by) cxl_port_setup_targets at drivers/cxl/core/region.c:1155
    (inlined by) cxl_region_setup_targets at drivers/cxl/core/region.c:1249
    [..]
    Call Trace:
     <TASK>
     attach_target+0x39a/0x760 [cxl_core]
     ? __mutex_unlock_slowpath+0x3a/0x290
     cxl_add_to_region+0xb8/0x340 [cxl_core]
     ? lockdep_hardirqs_on+0x7d/0x100
     discover_region+0x4b/0x80 [cxl_port]
     ? __pfx_discover_region+0x10/0x10 [cxl_port]
     device_for_each_child+0x58/0x90
     cxl_port_probe+0x10e/0x130 [cxl_port]
     cxl_bus_probe+0x17/0x50 [cxl_core]

Change the port ancestry walk to be by depth rather than by dport. This
ensures that even if a port has unregistered its dports a deferred
memdev cleanup will still be able to cleanup the memdev's interest in
that port.

The parent_port->dev.driver check is only needed for determining if the
bottom up removal beat the top-down removal, but cxl_ep_remove() can
always proceed given the port is pinned. That is, the two sources of
cxl_ep_remove() are in cxl_detach_ep() and cxl_port_release(), and
cxl_port_release() can not run if cxl_detach_ep() holds a reference.

Fixes: 2703c16c75 ("cxl/core/port: Add switch port enumeration")
Link: http://lore.kernel.org/r/167564534874.847146.5222419648551436750.stgit@dwillia2-xfh.jf.intel.com [1]
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Link: https://lore.kernel.org/r/167601992789.1924368.8083994227892600608.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-02-10 17:29:09 -08:00

389 lines
9.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. */
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/pci.h>
#include <cxlmem.h>
#include "core.h"
static DECLARE_RWSEM(cxl_memdev_rwsem);
/*
* An entire PCI topology full of devices should be enough for any
* config
*/
#define CXL_MEM_MAX_DEVS 65536
static int cxl_mem_major;
static DEFINE_IDA(cxl_memdev_ida);
static void cxl_memdev_release(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
ida_free(&cxl_memdev_ida, cxlmd->id);
kfree(cxlmd);
}
static char *cxl_memdev_devnode(struct device *dev, umode_t *mode, kuid_t *uid,
kgid_t *gid)
{
return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
}
static ssize_t firmware_version_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
return sysfs_emit(buf, "%.16s\n", cxlds->firmware_version);
}
static DEVICE_ATTR_RO(firmware_version);
static ssize_t payload_max_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
return sysfs_emit(buf, "%zu\n", cxlds->payload_size);
}
static DEVICE_ATTR_RO(payload_max);
static ssize_t label_storage_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
return sysfs_emit(buf, "%zu\n", cxlds->lsa_size);
}
static DEVICE_ATTR_RO(label_storage_size);
static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
unsigned long long len = resource_size(&cxlds->ram_res);
return sysfs_emit(buf, "%#llx\n", len);
}
static struct device_attribute dev_attr_ram_size =
__ATTR(size, 0444, ram_size_show, NULL);
static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
unsigned long long len = resource_size(&cxlds->pmem_res);
return sysfs_emit(buf, "%#llx\n", len);
}
static struct device_attribute dev_attr_pmem_size =
__ATTR(size, 0444, pmem_size_show, NULL);
static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
return sysfs_emit(buf, "%#llx\n", cxlds->serial);
}
static DEVICE_ATTR_RO(serial);
static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%d\n", dev_to_node(dev));
}
static DEVICE_ATTR_RO(numa_node);
static struct attribute *cxl_memdev_attributes[] = {
&dev_attr_serial.attr,
&dev_attr_firmware_version.attr,
&dev_attr_payload_max.attr,
&dev_attr_label_storage_size.attr,
&dev_attr_numa_node.attr,
NULL,
};
static struct attribute *cxl_memdev_pmem_attributes[] = {
&dev_attr_pmem_size.attr,
NULL,
};
static struct attribute *cxl_memdev_ram_attributes[] = {
&dev_attr_ram_size.attr,
NULL,
};
static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
int n)
{
if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr)
return 0;
return a->mode;
}
static struct attribute_group cxl_memdev_attribute_group = {
.attrs = cxl_memdev_attributes,
.is_visible = cxl_memdev_visible,
};
static struct attribute_group cxl_memdev_ram_attribute_group = {
.name = "ram",
.attrs = cxl_memdev_ram_attributes,
};
static struct attribute_group cxl_memdev_pmem_attribute_group = {
.name = "pmem",
.attrs = cxl_memdev_pmem_attributes,
};
static const struct attribute_group *cxl_memdev_attribute_groups[] = {
&cxl_memdev_attribute_group,
&cxl_memdev_ram_attribute_group,
&cxl_memdev_pmem_attribute_group,
NULL,
};
static const struct device_type cxl_memdev_type = {
.name = "cxl_memdev",
.release = cxl_memdev_release,
.devnode = cxl_memdev_devnode,
.groups = cxl_memdev_attribute_groups,
};
bool is_cxl_memdev(struct device *dev)
{
return dev->type == &cxl_memdev_type;
}
EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
/**
* set_exclusive_cxl_commands() - atomically disable user cxl commands
* @cxlds: The device state to operate on
* @cmds: bitmap of commands to mark exclusive
*
* Grab the cxl_memdev_rwsem in write mode to flush in-flight
* invocations of the ioctl path and then disable future execution of
* commands with the command ids set in @cmds.
*/
void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
{
down_write(&cxl_memdev_rwsem);
bitmap_or(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem);
}
EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL);
/**
* clear_exclusive_cxl_commands() - atomically enable user cxl commands
* @cxlds: The device state to modify
* @cmds: bitmap of commands to mark available for userspace
*/
void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
{
down_write(&cxl_memdev_rwsem);
bitmap_andnot(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem);
}
EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
static void cxl_memdev_shutdown(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
down_write(&cxl_memdev_rwsem);
cxlmd->cxlds = NULL;
up_write(&cxl_memdev_rwsem);
}
static void cxl_memdev_unregister(void *_cxlmd)
{
struct cxl_memdev *cxlmd = _cxlmd;
struct device *dev = &cxlmd->dev;
cxl_memdev_shutdown(dev);
cdev_device_del(&cxlmd->cdev, dev);
put_device(dev);
}
static void detach_memdev(struct work_struct *work)
{
struct cxl_memdev *cxlmd;
cxlmd = container_of(work, typeof(*cxlmd), detach_work);
device_release_driver(&cxlmd->dev);
put_device(&cxlmd->dev);
}
static struct lock_class_key cxl_memdev_key;
static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
const struct file_operations *fops)
{
struct cxl_memdev *cxlmd;
struct device *dev;
struct cdev *cdev;
int rc;
cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
if (!cxlmd)
return ERR_PTR(-ENOMEM);
rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
if (rc < 0)
goto err;
cxlmd->id = rc;
cxlmd->depth = -1;
dev = &cxlmd->dev;
device_initialize(dev);
lockdep_set_class(&dev->mutex, &cxl_memdev_key);
dev->parent = cxlds->dev;
dev->bus = &cxl_bus_type;
dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
dev->type = &cxl_memdev_type;
device_set_pm_not_required(dev);
INIT_WORK(&cxlmd->detach_work, detach_memdev);
cdev = &cxlmd->cdev;
cdev_init(cdev, fops);
return cxlmd;
err:
kfree(cxlmd);
return ERR_PTR(rc);
}
static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
unsigned long arg)
{
switch (cmd) {
case CXL_MEM_QUERY_COMMANDS:
return cxl_query_cmd(cxlmd, (void __user *)arg);
case CXL_MEM_SEND_COMMAND:
return cxl_send_cmd(cxlmd, (void __user *)arg);
default:
return -ENOTTY;
}
}
static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct cxl_memdev *cxlmd = file->private_data;
int rc = -ENXIO;
down_read(&cxl_memdev_rwsem);
if (cxlmd->cxlds)
rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
up_read(&cxl_memdev_rwsem);
return rc;
}
static int cxl_memdev_open(struct inode *inode, struct file *file)
{
struct cxl_memdev *cxlmd =
container_of(inode->i_cdev, typeof(*cxlmd), cdev);
get_device(&cxlmd->dev);
file->private_data = cxlmd;
return 0;
}
static int cxl_memdev_release_file(struct inode *inode, struct file *file)
{
struct cxl_memdev *cxlmd =
container_of(inode->i_cdev, typeof(*cxlmd), cdev);
put_device(&cxlmd->dev);
return 0;
}
static const struct file_operations cxl_memdev_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = cxl_memdev_ioctl,
.open = cxl_memdev_open,
.release = cxl_memdev_release_file,
.compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
{
struct cxl_memdev *cxlmd;
struct device *dev;
struct cdev *cdev;
int rc;
cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
if (IS_ERR(cxlmd))
return cxlmd;
dev = &cxlmd->dev;
rc = dev_set_name(dev, "mem%d", cxlmd->id);
if (rc)
goto err;
/*
* Activate ioctl operations, no cxl_memdev_rwsem manipulation
* needed as this is ordered with cdev_add() publishing the device.
*/
cxlmd->cxlds = cxlds;
cxlds->cxlmd = cxlmd;
cdev = &cxlmd->cdev;
rc = cdev_device_add(cdev, dev);
if (rc)
goto err;
rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
if (rc)
return ERR_PTR(rc);
return cxlmd;
err:
/*
* The cdev was briefly live, shutdown any ioctl operations that
* saw that state.
*/
cxl_memdev_shutdown(dev);
put_device(dev);
return ERR_PTR(rc);
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
__init int cxl_memdev_init(void)
{
dev_t devt;
int rc;
rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
if (rc)
return rc;
cxl_mem_major = MAJOR(devt);
return 0;
}
void cxl_memdev_exit(void)
{
unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
}