mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-14 22:44:27 +08:00
7c3dc440b1
- CXL RAM region enumeration: instantiate 'struct cxl_region' objects for platform firmware created memory regions - CXL RAM region provisioning: complement the existing PMEM region creation support with RAM region support - "Soft Reservation" policy change: Online (memory hot-add) soft-reserved memory (EFI_MEMORY_SP) by default, but still allow for setting aside such memory for dedicated access via device-dax. - CXL Events and Interrupts: Takeover CXL event handling from platform-firmware (ACPI calls this CXL Memory Error Reporting) and export CXL Events via Linux Trace Events. - Convey CXL _OSC results to drivers: Similar to PCI, let the CXL subsystem interrogate the result of CXL _OSC negotiation. - Emulate CXL DVSEC Range Registers as "decoders": Allow for first-generation devices that pre-date the definition of the CXL HDM Decoder Capability to translate the CXL DVSEC Range Registers into 'struct cxl_decoder' objects. - Set timestamp: Per spec, set the device timestamp in case of hotplug, or if platform-firwmare failed to set it. - General fixups: linux-next build issues, non-urgent fixes for pre-production hardware, unit test fixes, spelling and debug message improvements. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQSbo+XnGs+rwLz9XGXfioYZHlFsZwUCY/WYcgAKCRDfioYZHlFs Z6m3APkBUtiEEm1o8ikdu5llUS1OTLBwqjJDwGMTyf8X/WDXhgD+J2mLsCgARS7X 5IS0RAtefutrW5sQpUucPM7QiLuraAY= =kOXC -----END PGP SIGNATURE----- Merge tag 'cxl-for-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl Pull Compute Express Link (CXL) updates from Dan Williams: "To date Linux has been dependent on platform-firmware to map CXL RAM regions and handle events / errors from devices. With this update we can now parse / update the CXL memory layout, and report events / errors from devices. This is a precursor for the CXL subsystem to handle the end-to-end "RAS" flow for CXL memory. i.e. the flow that for DDR-attached-DRAM is handled by the EDAC driver where it maps system physical address events to a field-replaceable-unit (FRU / endpoint device). In general, CXL has the potential to standardize what has historically been a pile of memory-controller-specific error handling logic. Another change of note is the default policy for handling RAM-backed device-dax instances. Previously the default access mode was "device", mmap(2) a device special file to access memory. The new default is "kmem" where the address range is assigned to the core-mm via add_memory_driver_managed(). This saves typical users from wondering why their platform memory is not visible via free(1) and stuck behind a device-file. At the same time it allows expert users to deploy policy to, for example, get dedicated access to high performance memory, or hide low performance memory from general purpose kernel allocations. This affects not only CXL, but also systems with high-bandwidth-memory that platform-firmware tags with the EFI_MEMORY_SP (special purpose) designation. Summary: - CXL RAM region enumeration: instantiate 'struct cxl_region' objects for platform firmware created memory regions - CXL RAM region provisioning: complement the existing PMEM region creation support with RAM region support - "Soft Reservation" policy change: Online (memory hot-add) soft-reserved memory (EFI_MEMORY_SP) by default, but still allow for setting aside such memory for dedicated access via device-dax. - CXL Events and Interrupts: Takeover CXL event handling from platform-firmware (ACPI calls this CXL Memory Error Reporting) and export CXL Events via Linux Trace Events. - Convey CXL _OSC results to drivers: Similar to PCI, let the CXL subsystem interrogate the result of CXL _OSC negotiation. - Emulate CXL DVSEC Range Registers as "decoders": Allow for first-generation devices that pre-date the definition of the CXL HDM Decoder Capability to translate the CXL DVSEC Range Registers into 'struct cxl_decoder' objects. - Set timestamp: Per spec, set the device timestamp in case of hotplug, or if platform-firwmare failed to set it. - General fixups: linux-next build issues, non-urgent fixes for pre-production hardware, unit test fixes, spelling and debug message improvements" * tag 'cxl-for-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (66 commits) dax/kmem: Fix leak of memory-hotplug resources cxl/mem: Add kdoc param for event log driver state cxl/trace: Add serial number to trace points cxl/trace: Add host output to trace points cxl/trace: Standardize device information output cxl/pci: Remove locked check for dvsec_range_allowed() cxl/hdm: Add emulation when HDM decoders are not committed cxl/hdm: Create emulated cxl_hdm for devices that do not have HDM decoders cxl/hdm: Emulate HDM decoder from DVSEC range registers cxl/pci: Refactor cxl_hdm_decode_init() cxl/port: Export cxl_dvsec_rr_decode() to cxl_port cxl/pci: Break out range register decoding from cxl_hdm_decode_init() cxl: add RAS status unmasking for CXL cxl: remove unnecessary calling of pci_enable_pcie_error_reporting() dax/hmem: build hmem device support as module if possible dax: cxl: add CXL_REGION dependency cxl: avoid returning uninitialized error code cxl/pmem: Fix nvdimm registration races cxl/mem: Fix UAPI command comment cxl/uapi: Tag commands from cxl_query_cmd() ...
1356 lines
31 KiB
C
1356 lines
31 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
|
|
*/
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
#include <linux/libnvdimm.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/module.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/fcntl.h>
|
|
#include <linux/async.h>
|
|
#include <linux/ndctl.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/io.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/nd.h>
|
|
#include "nd-core.h"
|
|
#include "nd.h"
|
|
#include "pfn.h"
|
|
|
|
int nvdimm_major;
|
|
static int nvdimm_bus_major;
|
|
struct class *nd_class;
|
|
static DEFINE_IDA(nd_ida);
|
|
|
|
static int to_nd_device_type(const struct device *dev)
|
|
{
|
|
if (is_nvdimm(dev))
|
|
return ND_DEVICE_DIMM;
|
|
else if (is_memory(dev))
|
|
return ND_DEVICE_REGION_PMEM;
|
|
else if (is_nd_dax(dev))
|
|
return ND_DEVICE_DAX_PMEM;
|
|
else if (is_nd_region(dev->parent))
|
|
return nd_region_to_nstype(to_nd_region(dev->parent));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nvdimm_bus_uevent(const struct device *dev, struct kobj_uevent_env *env)
|
|
{
|
|
return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
|
|
to_nd_device_type(dev));
|
|
}
|
|
|
|
static struct module *to_bus_provider(struct device *dev)
|
|
{
|
|
/* pin bus providers while regions are enabled */
|
|
if (is_nd_region(dev)) {
|
|
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
|
|
|
|
return nvdimm_bus->nd_desc->module;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void nvdimm_bus_probe_start(struct nvdimm_bus *nvdimm_bus)
|
|
{
|
|
nvdimm_bus_lock(&nvdimm_bus->dev);
|
|
nvdimm_bus->probe_active++;
|
|
nvdimm_bus_unlock(&nvdimm_bus->dev);
|
|
}
|
|
|
|
static void nvdimm_bus_probe_end(struct nvdimm_bus *nvdimm_bus)
|
|
{
|
|
nvdimm_bus_lock(&nvdimm_bus->dev);
|
|
if (--nvdimm_bus->probe_active == 0)
|
|
wake_up(&nvdimm_bus->wait);
|
|
nvdimm_bus_unlock(&nvdimm_bus->dev);
|
|
}
|
|
|
|
static int nvdimm_bus_probe(struct device *dev)
|
|
{
|
|
struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
|
|
struct module *provider = to_bus_provider(dev);
|
|
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
|
|
int rc;
|
|
|
|
if (!try_module_get(provider))
|
|
return -ENXIO;
|
|
|
|
dev_dbg(&nvdimm_bus->dev, "START: %s.probe(%s)\n",
|
|
dev->driver->name, dev_name(dev));
|
|
|
|
nvdimm_bus_probe_start(nvdimm_bus);
|
|
rc = nd_drv->probe(dev);
|
|
if ((rc == 0 || rc == -EOPNOTSUPP) &&
|
|
dev->parent && is_nd_region(dev->parent))
|
|
nd_region_advance_seeds(to_nd_region(dev->parent), dev);
|
|
nvdimm_bus_probe_end(nvdimm_bus);
|
|
|
|
dev_dbg(&nvdimm_bus->dev, "END: %s.probe(%s) = %d\n", dev->driver->name,
|
|
dev_name(dev), rc);
|
|
|
|
if (rc != 0)
|
|
module_put(provider);
|
|
return rc;
|
|
}
|
|
|
|
static void nvdimm_bus_remove(struct device *dev)
|
|
{
|
|
struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
|
|
struct module *provider = to_bus_provider(dev);
|
|
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
|
|
|
|
if (nd_drv->remove)
|
|
nd_drv->remove(dev);
|
|
|
|
dev_dbg(&nvdimm_bus->dev, "%s.remove(%s)\n", dev->driver->name,
|
|
dev_name(dev));
|
|
module_put(provider);
|
|
}
|
|
|
|
static void nvdimm_bus_shutdown(struct device *dev)
|
|
{
|
|
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
|
|
struct nd_device_driver *nd_drv = NULL;
|
|
|
|
if (dev->driver)
|
|
nd_drv = to_nd_device_driver(dev->driver);
|
|
|
|
if (nd_drv && nd_drv->shutdown) {
|
|
nd_drv->shutdown(dev);
|
|
dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n",
|
|
dev->driver->name, dev_name(dev));
|
|
}
|
|
}
|
|
|
|
void nd_device_notify(struct device *dev, enum nvdimm_event event)
|
|
{
|
|
device_lock(dev);
|
|
if (dev->driver) {
|
|
struct nd_device_driver *nd_drv;
|
|
|
|
nd_drv = to_nd_device_driver(dev->driver);
|
|
if (nd_drv->notify)
|
|
nd_drv->notify(dev, event);
|
|
}
|
|
device_unlock(dev);
|
|
}
|
|
EXPORT_SYMBOL(nd_device_notify);
|
|
|
|
void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event)
|
|
{
|
|
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
|
|
|
|
if (!nvdimm_bus)
|
|
return;
|
|
|
|
/* caller is responsible for holding a reference on the device */
|
|
nd_device_notify(&nd_region->dev, event);
|
|
}
|
|
EXPORT_SYMBOL_GPL(nvdimm_region_notify);
|
|
|
|
struct clear_badblocks_context {
|
|
resource_size_t phys, cleared;
|
|
};
|
|
|
|
static int nvdimm_clear_badblocks_region(struct device *dev, void *data)
|
|
{
|
|
struct clear_badblocks_context *ctx = data;
|
|
struct nd_region *nd_region;
|
|
resource_size_t ndr_end;
|
|
sector_t sector;
|
|
|
|
/* make sure device is a region */
|
|
if (!is_memory(dev))
|
|
return 0;
|
|
|
|
nd_region = to_nd_region(dev);
|
|
ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1;
|
|
|
|
/* make sure we are in the region */
|
|
if (ctx->phys < nd_region->ndr_start ||
|
|
(ctx->phys + ctx->cleared - 1) > ndr_end)
|
|
return 0;
|
|
|
|
sector = (ctx->phys - nd_region->ndr_start) / 512;
|
|
badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512);
|
|
|
|
if (nd_region->bb_state)
|
|
sysfs_notify_dirent(nd_region->bb_state);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void nvdimm_clear_badblocks_regions(struct nvdimm_bus *nvdimm_bus,
|
|
phys_addr_t phys, u64 cleared)
|
|
{
|
|
struct clear_badblocks_context ctx = {
|
|
.phys = phys,
|
|
.cleared = cleared,
|
|
};
|
|
|
|
device_for_each_child(&nvdimm_bus->dev, &ctx,
|
|
nvdimm_clear_badblocks_region);
|
|
}
|
|
|
|
static void nvdimm_account_cleared_poison(struct nvdimm_bus *nvdimm_bus,
|
|
phys_addr_t phys, u64 cleared)
|
|
{
|
|
if (cleared > 0)
|
|
badrange_forget(&nvdimm_bus->badrange, phys, cleared);
|
|
|
|
if (cleared > 0 && cleared / 512)
|
|
nvdimm_clear_badblocks_regions(nvdimm_bus, phys, cleared);
|
|
}
|
|
|
|
long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
|
|
unsigned int len)
|
|
{
|
|
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
|
|
struct nvdimm_bus_descriptor *nd_desc;
|
|
struct nd_cmd_clear_error clear_err;
|
|
struct nd_cmd_ars_cap ars_cap;
|
|
u32 clear_err_unit, mask;
|
|
unsigned int noio_flag;
|
|
int cmd_rc, rc;
|
|
|
|
if (!nvdimm_bus)
|
|
return -ENXIO;
|
|
|
|
nd_desc = nvdimm_bus->nd_desc;
|
|
/*
|
|
* if ndctl does not exist, it's PMEM_LEGACY and
|
|
* we want to just pretend everything is handled.
|
|
*/
|
|
if (!nd_desc->ndctl)
|
|
return len;
|
|
|
|
memset(&ars_cap, 0, sizeof(ars_cap));
|
|
ars_cap.address = phys;
|
|
ars_cap.length = len;
|
|
noio_flag = memalloc_noio_save();
|
|
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap,
|
|
sizeof(ars_cap), &cmd_rc);
|
|
memalloc_noio_restore(noio_flag);
|
|
if (rc < 0)
|
|
return rc;
|
|
if (cmd_rc < 0)
|
|
return cmd_rc;
|
|
clear_err_unit = ars_cap.clear_err_unit;
|
|
if (!clear_err_unit || !is_power_of_2(clear_err_unit))
|
|
return -ENXIO;
|
|
|
|
mask = clear_err_unit - 1;
|
|
if ((phys | len) & mask)
|
|
return -ENXIO;
|
|
memset(&clear_err, 0, sizeof(clear_err));
|
|
clear_err.address = phys;
|
|
clear_err.length = len;
|
|
noio_flag = memalloc_noio_save();
|
|
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err,
|
|
sizeof(clear_err), &cmd_rc);
|
|
memalloc_noio_restore(noio_flag);
|
|
if (rc < 0)
|
|
return rc;
|
|
if (cmd_rc < 0)
|
|
return cmd_rc;
|
|
|
|
nvdimm_account_cleared_poison(nvdimm_bus, phys, clear_err.cleared);
|
|
|
|
return clear_err.cleared;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
|
|
|
|
static int nvdimm_bus_match(struct device *dev, struct device_driver *drv);
|
|
|
|
static struct bus_type nvdimm_bus_type = {
|
|
.name = "nd",
|
|
.uevent = nvdimm_bus_uevent,
|
|
.match = nvdimm_bus_match,
|
|
.probe = nvdimm_bus_probe,
|
|
.remove = nvdimm_bus_remove,
|
|
.shutdown = nvdimm_bus_shutdown,
|
|
};
|
|
|
|
static void nvdimm_bus_release(struct device *dev)
|
|
{
|
|
struct nvdimm_bus *nvdimm_bus;
|
|
|
|
nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
|
|
ida_simple_remove(&nd_ida, nvdimm_bus->id);
|
|
kfree(nvdimm_bus);
|
|
}
|
|
|
|
static const struct device_type nvdimm_bus_dev_type = {
|
|
.release = nvdimm_bus_release,
|
|
.groups = nvdimm_bus_attribute_groups,
|
|
};
|
|
|
|
bool is_nvdimm_bus(struct device *dev)
|
|
{
|
|
return dev->type == &nvdimm_bus_dev_type;
|
|
}
|
|
|
|
struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
|
|
{
|
|
struct device *dev;
|
|
|
|
for (dev = nd_dev; dev; dev = dev->parent)
|
|
if (is_nvdimm_bus(dev))
|
|
break;
|
|
dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
|
|
if (dev)
|
|
return to_nvdimm_bus(dev);
|
|
return NULL;
|
|
}
|
|
|
|
struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
|
|
{
|
|
struct nvdimm_bus *nvdimm_bus;
|
|
|
|
nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
|
|
WARN_ON(!is_nvdimm_bus(dev));
|
|
return nvdimm_bus;
|
|
}
|
|
EXPORT_SYMBOL_GPL(to_nvdimm_bus);
|
|
|
|
struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm)
|
|
{
|
|
return to_nvdimm_bus(nvdimm->dev.parent);
|
|
}
|
|
EXPORT_SYMBOL_GPL(nvdimm_to_bus);
|
|
|
|
static struct lock_class_key nvdimm_bus_key;
|
|
|
|
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
|
|
struct nvdimm_bus_descriptor *nd_desc)
|
|
{
|
|
struct nvdimm_bus *nvdimm_bus;
|
|
int rc;
|
|
|
|
nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
|
|
if (!nvdimm_bus)
|
|
return NULL;
|
|
INIT_LIST_HEAD(&nvdimm_bus->list);
|
|
INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
|
|
init_waitqueue_head(&nvdimm_bus->wait);
|
|
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
|
|
if (nvdimm_bus->id < 0) {
|
|
kfree(nvdimm_bus);
|
|
return NULL;
|
|
}
|
|
mutex_init(&nvdimm_bus->reconfig_mutex);
|
|
badrange_init(&nvdimm_bus->badrange);
|
|
nvdimm_bus->nd_desc = nd_desc;
|
|
nvdimm_bus->dev.parent = parent;
|
|
nvdimm_bus->dev.type = &nvdimm_bus_dev_type;
|
|
nvdimm_bus->dev.groups = nd_desc->attr_groups;
|
|
nvdimm_bus->dev.bus = &nvdimm_bus_type;
|
|
nvdimm_bus->dev.of_node = nd_desc->of_node;
|
|
device_initialize(&nvdimm_bus->dev);
|
|
lockdep_set_class(&nvdimm_bus->dev.mutex, &nvdimm_bus_key);
|
|
device_set_pm_not_required(&nvdimm_bus->dev);
|
|
rc = dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
|
|
if (rc)
|
|
goto err;
|
|
|
|
rc = device_add(&nvdimm_bus->dev);
|
|
if (rc) {
|
|
dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
|
|
goto err;
|
|
}
|
|
|
|
return nvdimm_bus;
|
|
err:
|
|
put_device(&nvdimm_bus->dev);
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nvdimm_bus_register);
|
|
|
|
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
|
|
{
|
|
if (!nvdimm_bus)
|
|
return;
|
|
device_unregister(&nvdimm_bus->dev);
|
|
}
|
|
EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
|
|
|
|
static int child_unregister(struct device *dev, void *data)
|
|
{
|
|
/*
|
|
* the singular ndctl class device per bus needs to be
|
|
* "device_destroy"ed, so skip it here
|
|
*
|
|
* i.e. remove classless children
|
|
*/
|
|
if (dev->class)
|
|
return 0;
|
|
|
|
if (is_nvdimm(dev))
|
|
nvdimm_delete(to_nvdimm(dev));
|
|
else
|
|
nd_device_unregister(dev, ND_SYNC);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void free_badrange_list(struct list_head *badrange_list)
|
|
{
|
|
struct badrange_entry *bre, *next;
|
|
|
|
list_for_each_entry_safe(bre, next, badrange_list, list) {
|
|
list_del(&bre->list);
|
|
kfree(bre);
|
|
}
|
|
list_del_init(badrange_list);
|
|
}
|
|
|
|
static void nd_bus_remove(struct device *dev)
|
|
{
|
|
struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
|
|
|
|
mutex_lock(&nvdimm_bus_list_mutex);
|
|
list_del_init(&nvdimm_bus->list);
|
|
mutex_unlock(&nvdimm_bus_list_mutex);
|
|
|
|
wait_event(nvdimm_bus->wait,
|
|
atomic_read(&nvdimm_bus->ioctl_active) == 0);
|
|
|
|
nd_synchronize();
|
|
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
|
|
|
|
spin_lock(&nvdimm_bus->badrange.lock);
|
|
free_badrange_list(&nvdimm_bus->badrange.list);
|
|
spin_unlock(&nvdimm_bus->badrange.lock);
|
|
|
|
nvdimm_bus_destroy_ndctl(nvdimm_bus);
|
|
}
|
|
|
|
static int nd_bus_probe(struct device *dev)
|
|
{
|
|
struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
|
|
int rc;
|
|
|
|
rc = nvdimm_bus_create_ndctl(nvdimm_bus);
|
|
if (rc)
|
|
return rc;
|
|
|
|
mutex_lock(&nvdimm_bus_list_mutex);
|
|
list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
|
|
mutex_unlock(&nvdimm_bus_list_mutex);
|
|
|
|
/* enable bus provider attributes to look up their local context */
|
|
dev_set_drvdata(dev, nvdimm_bus->nd_desc);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct nd_device_driver nd_bus_driver = {
|
|
.probe = nd_bus_probe,
|
|
.remove = nd_bus_remove,
|
|
.drv = {
|
|
.name = "nd_bus",
|
|
.suppress_bind_attrs = true,
|
|
.bus = &nvdimm_bus_type,
|
|
.owner = THIS_MODULE,
|
|
.mod_name = KBUILD_MODNAME,
|
|
},
|
|
};
|
|
|
|
static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
|
|
{
|
|
struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
|
|
|
|
if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver)
|
|
return true;
|
|
|
|
return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
|
|
}
|
|
|
|
static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
|
|
|
|
void nd_synchronize(void)
|
|
{
|
|
async_synchronize_full_domain(&nd_async_domain);
|
|
}
|
|
EXPORT_SYMBOL_GPL(nd_synchronize);
|
|
|
|
static void nd_async_device_register(void *d, async_cookie_t cookie)
|
|
{
|
|
struct device *dev = d;
|
|
|
|
if (device_add(dev) != 0) {
|
|
dev_err(dev, "%s: failed\n", __func__);
|
|
put_device(dev);
|
|
}
|
|
put_device(dev);
|
|
if (dev->parent)
|
|
put_device(dev->parent);
|
|
}
|
|
|
|
static void nd_async_device_unregister(void *d, async_cookie_t cookie)
|
|
{
|
|
struct device *dev = d;
|
|
|
|
/* flush bus operations before delete */
|
|
nvdimm_bus_lock(dev);
|
|
nvdimm_bus_unlock(dev);
|
|
|
|
device_unregister(dev);
|
|
put_device(dev);
|
|
}
|
|
|
|
static void __nd_device_register(struct device *dev, bool sync)
|
|
{
|
|
if (!dev)
|
|
return;
|
|
|
|
/*
|
|
* Ensure that region devices always have their NUMA node set as
|
|
* early as possible. This way we are able to make certain that
|
|
* any memory associated with the creation and the creation
|
|
* itself of the region is associated with the correct node.
|
|
*/
|
|
if (is_nd_region(dev))
|
|
set_dev_node(dev, to_nd_region(dev)->numa_node);
|
|
|
|
dev->bus = &nvdimm_bus_type;
|
|
device_set_pm_not_required(dev);
|
|
if (dev->parent) {
|
|
get_device(dev->parent);
|
|
if (dev_to_node(dev) == NUMA_NO_NODE)
|
|
set_dev_node(dev, dev_to_node(dev->parent));
|
|
}
|
|
get_device(dev);
|
|
|
|
if (sync)
|
|
nd_async_device_register(dev, 0);
|
|
else
|
|
async_schedule_dev_domain(nd_async_device_register, dev,
|
|
&nd_async_domain);
|
|
}
|
|
|
|
void nd_device_register(struct device *dev)
|
|
{
|
|
__nd_device_register(dev, false);
|
|
}
|
|
EXPORT_SYMBOL(nd_device_register);
|
|
|
|
void nd_device_register_sync(struct device *dev)
|
|
{
|
|
__nd_device_register(dev, true);
|
|
}
|
|
|
|
void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
|
|
{
|
|
bool killed;
|
|
|
|
switch (mode) {
|
|
case ND_ASYNC:
|
|
/*
|
|
* In the async case this is being triggered with the
|
|
* device lock held and the unregistration work needs to
|
|
* be moved out of line iff this is thread has won the
|
|
* race to schedule the deletion.
|
|
*/
|
|
if (!kill_device(dev))
|
|
return;
|
|
|
|
get_device(dev);
|
|
async_schedule_domain(nd_async_device_unregister, dev,
|
|
&nd_async_domain);
|
|
break;
|
|
case ND_SYNC:
|
|
/*
|
|
* In the sync case the device is being unregistered due
|
|
* to a state change of the parent. Claim the kill state
|
|
* to synchronize against other unregistration requests,
|
|
* or otherwise let the async path handle it if the
|
|
* unregistration was already queued.
|
|
*/
|
|
device_lock(dev);
|
|
killed = kill_device(dev);
|
|
device_unlock(dev);
|
|
|
|
if (!killed)
|
|
return;
|
|
|
|
nd_synchronize();
|
|
device_unregister(dev);
|
|
break;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(nd_device_unregister);
|
|
|
|
/**
|
|
* __nd_driver_register() - register a region or a namespace driver
|
|
* @nd_drv: driver to register
|
|
* @owner: automatically set by nd_driver_register() macro
|
|
* @mod_name: automatically set by nd_driver_register() macro
|
|
*/
|
|
int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
|
|
const char *mod_name)
|
|
{
|
|
struct device_driver *drv = &nd_drv->drv;
|
|
|
|
if (!nd_drv->type) {
|
|
pr_debug("driver type bitmask not set (%ps)\n",
|
|
__builtin_return_address(0));
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!nd_drv->probe) {
|
|
pr_debug("%s ->probe() must be specified\n", mod_name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
drv->bus = &nvdimm_bus_type;
|
|
drv->owner = owner;
|
|
drv->mod_name = mod_name;
|
|
|
|
return driver_register(drv);
|
|
}
|
|
EXPORT_SYMBOL(__nd_driver_register);
|
|
|
|
void nvdimm_check_and_set_ro(struct gendisk *disk)
|
|
{
|
|
struct device *dev = disk_to_dev(disk)->parent;
|
|
struct nd_region *nd_region = to_nd_region(dev->parent);
|
|
int disk_ro = get_disk_ro(disk);
|
|
|
|
/* catch the disk up with the region ro state */
|
|
if (disk_ro == nd_region->ro)
|
|
return;
|
|
|
|
dev_info(dev, "%s read-%s, marking %s read-%s\n",
|
|
dev_name(&nd_region->dev), nd_region->ro ? "only" : "write",
|
|
disk->disk_name, nd_region->ro ? "only" : "write");
|
|
set_disk_ro(disk, nd_region->ro);
|
|
}
|
|
EXPORT_SYMBOL(nvdimm_check_and_set_ro);
|
|
|
|
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
|
|
char *buf)
|
|
{
|
|
return sprintf(buf, ND_DEVICE_MODALIAS_FMT "\n",
|
|
to_nd_device_type(dev));
|
|
}
|
|
static DEVICE_ATTR_RO(modalias);
|
|
|
|
static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
|
|
char *buf)
|
|
{
|
|
return sprintf(buf, "%s\n", dev->type->name);
|
|
}
|
|
static DEVICE_ATTR_RO(devtype);
|
|
|
|
static struct attribute *nd_device_attributes[] = {
|
|
&dev_attr_modalias.attr,
|
|
&dev_attr_devtype.attr,
|
|
NULL,
|
|
};
|
|
|
|
/*
|
|
* nd_device_attribute_group - generic attributes for all devices on an nd bus
|
|
*/
|
|
const struct attribute_group nd_device_attribute_group = {
|
|
.attrs = nd_device_attributes,
|
|
};
|
|
|
|
static ssize_t numa_node_show(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
return sprintf(buf, "%d\n", dev_to_node(dev));
|
|
}
|
|
static DEVICE_ATTR_RO(numa_node);
|
|
|
|
static int nvdimm_dev_to_target_node(struct device *dev)
|
|
{
|
|
struct device *parent = dev->parent;
|
|
struct nd_region *nd_region = NULL;
|
|
|
|
if (is_nd_region(dev))
|
|
nd_region = to_nd_region(dev);
|
|
else if (parent && is_nd_region(parent))
|
|
nd_region = to_nd_region(parent);
|
|
|
|
if (!nd_region)
|
|
return NUMA_NO_NODE;
|
|
return nd_region->target_node;
|
|
}
|
|
|
|
static ssize_t target_node_show(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
return sprintf(buf, "%d\n", nvdimm_dev_to_target_node(dev));
|
|
}
|
|
static DEVICE_ATTR_RO(target_node);
|
|
|
|
static struct attribute *nd_numa_attributes[] = {
|
|
&dev_attr_numa_node.attr,
|
|
&dev_attr_target_node.attr,
|
|
NULL,
|
|
};
|
|
|
|
static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a,
|
|
int n)
|
|
{
|
|
struct device *dev = container_of(kobj, typeof(*dev), kobj);
|
|
|
|
if (!IS_ENABLED(CONFIG_NUMA))
|
|
return 0;
|
|
|
|
if (a == &dev_attr_target_node.attr &&
|
|
nvdimm_dev_to_target_node(dev) == NUMA_NO_NODE)
|
|
return 0;
|
|
|
|
return a->mode;
|
|
}
|
|
|
|
/*
|
|
* nd_numa_attribute_group - NUMA attributes for all devices on an nd bus
|
|
*/
|
|
const struct attribute_group nd_numa_attribute_group = {
|
|
.attrs = nd_numa_attributes,
|
|
.is_visible = nd_numa_attr_visible,
|
|
};
|
|
|
|
static void ndctl_release(struct device *dev)
|
|
{
|
|
kfree(dev);
|
|
}
|
|
|
|
static struct lock_class_key nvdimm_ndctl_key;
|
|
|
|
int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
|
|
{
|
|
dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id);
|
|
struct device *dev;
|
|
int rc;
|
|
|
|
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
|
|
if (!dev)
|
|
return -ENOMEM;
|
|
device_initialize(dev);
|
|
lockdep_set_class(&dev->mutex, &nvdimm_ndctl_key);
|
|
device_set_pm_not_required(dev);
|
|
dev->class = nd_class;
|
|
dev->parent = &nvdimm_bus->dev;
|
|
dev->devt = devt;
|
|
dev->release = ndctl_release;
|
|
rc = dev_set_name(dev, "ndctl%d", nvdimm_bus->id);
|
|
if (rc)
|
|
goto err;
|
|
|
|
rc = device_add(dev);
|
|
if (rc) {
|
|
dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %d\n",
|
|
nvdimm_bus->id, rc);
|
|
goto err;
|
|
}
|
|
return 0;
|
|
|
|
err:
|
|
put_device(dev);
|
|
return rc;
|
|
}
|
|
|
|
void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
|
|
{
|
|
device_destroy(nd_class, MKDEV(nvdimm_bus_major, nvdimm_bus->id));
|
|
}
|
|
|
|
static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = {
|
|
[ND_CMD_IMPLEMENTED] = { },
|
|
[ND_CMD_SMART] = {
|
|
.out_num = 2,
|
|
.out_sizes = { 4, 128, },
|
|
},
|
|
[ND_CMD_SMART_THRESHOLD] = {
|
|
.out_num = 2,
|
|
.out_sizes = { 4, 8, },
|
|
},
|
|
[ND_CMD_DIMM_FLAGS] = {
|
|
.out_num = 2,
|
|
.out_sizes = { 4, 4 },
|
|
},
|
|
[ND_CMD_GET_CONFIG_SIZE] = {
|
|
.out_num = 3,
|
|
.out_sizes = { 4, 4, 4, },
|
|
},
|
|
[ND_CMD_GET_CONFIG_DATA] = {
|
|
.in_num = 2,
|
|
.in_sizes = { 4, 4, },
|
|
.out_num = 2,
|
|
.out_sizes = { 4, UINT_MAX, },
|
|
},
|
|
[ND_CMD_SET_CONFIG_DATA] = {
|
|
.in_num = 3,
|
|
.in_sizes = { 4, 4, UINT_MAX, },
|
|
.out_num = 1,
|
|
.out_sizes = { 4, },
|
|
},
|
|
[ND_CMD_VENDOR] = {
|
|
.in_num = 3,
|
|
.in_sizes = { 4, 4, UINT_MAX, },
|
|
.out_num = 3,
|
|
.out_sizes = { 4, 4, UINT_MAX, },
|
|
},
|
|
[ND_CMD_CALL] = {
|
|
.in_num = 2,
|
|
.in_sizes = { sizeof(struct nd_cmd_pkg), UINT_MAX, },
|
|
.out_num = 1,
|
|
.out_sizes = { UINT_MAX, },
|
|
},
|
|
};
|
|
|
|
const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd)
|
|
{
|
|
if (cmd < ARRAY_SIZE(__nd_cmd_dimm_descs))
|
|
return &__nd_cmd_dimm_descs[cmd];
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nd_cmd_dimm_desc);
|
|
|
|
static const struct nd_cmd_desc __nd_cmd_bus_descs[] = {
|
|
[ND_CMD_IMPLEMENTED] = { },
|
|
[ND_CMD_ARS_CAP] = {
|
|
.in_num = 2,
|
|
.in_sizes = { 8, 8, },
|
|
.out_num = 4,
|
|
.out_sizes = { 4, 4, 4, 4, },
|
|
},
|
|
[ND_CMD_ARS_START] = {
|
|
.in_num = 5,
|
|
.in_sizes = { 8, 8, 2, 1, 5, },
|
|
.out_num = 2,
|
|
.out_sizes = { 4, 4, },
|
|
},
|
|
[ND_CMD_ARS_STATUS] = {
|
|
.out_num = 3,
|
|
.out_sizes = { 4, 4, UINT_MAX, },
|
|
},
|
|
[ND_CMD_CLEAR_ERROR] = {
|
|
.in_num = 2,
|
|
.in_sizes = { 8, 8, },
|
|
.out_num = 3,
|
|
.out_sizes = { 4, 4, 8, },
|
|
},
|
|
[ND_CMD_CALL] = {
|
|
.in_num = 2,
|
|
.in_sizes = { sizeof(struct nd_cmd_pkg), UINT_MAX, },
|
|
.out_num = 1,
|
|
.out_sizes = { UINT_MAX, },
|
|
},
|
|
};
|
|
|
|
const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd)
|
|
{
|
|
if (cmd < ARRAY_SIZE(__nd_cmd_bus_descs))
|
|
return &__nd_cmd_bus_descs[cmd];
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nd_cmd_bus_desc);
|
|
|
|
u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
|
|
const struct nd_cmd_desc *desc, int idx, void *buf)
|
|
{
|
|
if (idx >= desc->in_num)
|
|
return UINT_MAX;
|
|
|
|
if (desc->in_sizes[idx] < UINT_MAX)
|
|
return desc->in_sizes[idx];
|
|
|
|
if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA && idx == 2) {
|
|
struct nd_cmd_set_config_hdr *hdr = buf;
|
|
|
|
return hdr->in_length;
|
|
} else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) {
|
|
struct nd_cmd_vendor_hdr *hdr = buf;
|
|
|
|
return hdr->in_length;
|
|
} else if (cmd == ND_CMD_CALL) {
|
|
struct nd_cmd_pkg *pkg = buf;
|
|
|
|
return pkg->nd_size_in;
|
|
}
|
|
|
|
return UINT_MAX;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nd_cmd_in_size);
|
|
|
|
u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
|
|
const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
|
|
const u32 *out_field, unsigned long remainder)
|
|
{
|
|
if (idx >= desc->out_num)
|
|
return UINT_MAX;
|
|
|
|
if (desc->out_sizes[idx] < UINT_MAX)
|
|
return desc->out_sizes[idx];
|
|
|
|
if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && idx == 1)
|
|
return in_field[1];
|
|
else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
|
|
return out_field[1];
|
|
else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2) {
|
|
/*
|
|
* Per table 9-276 ARS Data in ACPI 6.1, out_field[1] is
|
|
* "Size of Output Buffer in bytes, including this
|
|
* field."
|
|
*/
|
|
if (out_field[1] < 4)
|
|
return 0;
|
|
/*
|
|
* ACPI 6.1 is ambiguous if 'status' is included in the
|
|
* output size. If we encounter an output size that
|
|
* overshoots the remainder by 4 bytes, assume it was
|
|
* including 'status'.
|
|
*/
|
|
if (out_field[1] - 4 == remainder)
|
|
return remainder;
|
|
return out_field[1] - 8;
|
|
} else if (cmd == ND_CMD_CALL) {
|
|
struct nd_cmd_pkg *pkg = (struct nd_cmd_pkg *) in_field;
|
|
|
|
return pkg->nd_size_out;
|
|
}
|
|
|
|
|
|
return UINT_MAX;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nd_cmd_out_size);
|
|
|
|
void wait_nvdimm_bus_probe_idle(struct device *dev)
|
|
{
|
|
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
|
|
|
|
do {
|
|
if (nvdimm_bus->probe_active == 0)
|
|
break;
|
|
nvdimm_bus_unlock(dev);
|
|
device_unlock(dev);
|
|
wait_event(nvdimm_bus->wait,
|
|
nvdimm_bus->probe_active == 0);
|
|
device_lock(dev);
|
|
nvdimm_bus_lock(dev);
|
|
} while (true);
|
|
}
|
|
|
|
static int nd_pmem_forget_poison_check(struct device *dev, void *data)
|
|
{
|
|
struct nd_cmd_clear_error *clear_err =
|
|
(struct nd_cmd_clear_error *)data;
|
|
struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
|
|
struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL;
|
|
struct nd_dax *nd_dax = is_nd_dax(dev) ? to_nd_dax(dev) : NULL;
|
|
struct nd_namespace_common *ndns = NULL;
|
|
struct nd_namespace_io *nsio;
|
|
resource_size_t offset = 0, end_trunc = 0, start, end, pstart, pend;
|
|
|
|
if (nd_dax || !dev->driver)
|
|
return 0;
|
|
|
|
start = clear_err->address;
|
|
end = clear_err->address + clear_err->cleared - 1;
|
|
|
|
if (nd_btt || nd_pfn || nd_dax) {
|
|
if (nd_btt)
|
|
ndns = nd_btt->ndns;
|
|
else if (nd_pfn)
|
|
ndns = nd_pfn->ndns;
|
|
else if (nd_dax)
|
|
ndns = nd_dax->nd_pfn.ndns;
|
|
|
|
if (!ndns)
|
|
return 0;
|
|
} else
|
|
ndns = to_ndns(dev);
|
|
|
|
nsio = to_nd_namespace_io(&ndns->dev);
|
|
pstart = nsio->res.start + offset;
|
|
pend = nsio->res.end - end_trunc;
|
|
|
|
if ((pstart >= start) && (pend <= end))
|
|
return -EBUSY;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
static int nd_ns_forget_poison_check(struct device *dev, void *data)
|
|
{
|
|
return device_for_each_child(dev, data, nd_pmem_forget_poison_check);
|
|
}
|
|
|
|
/* set_config requires an idle interleave set */
|
|
static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus,
|
|
struct nvdimm *nvdimm, unsigned int cmd, void *data)
|
|
{
|
|
struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
|
|
|
|
/* ask the bus provider if it would like to block this request */
|
|
if (nd_desc->clear_to_send) {
|
|
int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd, data);
|
|
|
|
if (rc)
|
|
return rc;
|
|
}
|
|
|
|
/* require clear error to go through the pmem driver */
|
|
if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR)
|
|
return device_for_each_child(&nvdimm_bus->dev, data,
|
|
nd_ns_forget_poison_check);
|
|
|
|
if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA)
|
|
return 0;
|
|
|
|
/* prevent label manipulation while the kernel owns label updates */
|
|
wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev);
|
|
if (atomic_read(&nvdimm->busy))
|
|
return -EBUSY;
|
|
return 0;
|
|
}
|
|
|
|
static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
|
|
int read_only, unsigned int ioctl_cmd, unsigned long arg)
|
|
{
|
|
struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
|
|
const struct nd_cmd_desc *desc = NULL;
|
|
unsigned int cmd = _IOC_NR(ioctl_cmd);
|
|
struct device *dev = &nvdimm_bus->dev;
|
|
void __user *p = (void __user *) arg;
|
|
char *out_env = NULL, *in_env = NULL;
|
|
const char *cmd_name, *dimm_name;
|
|
u32 in_len = 0, out_len = 0;
|
|
unsigned int func = cmd;
|
|
unsigned long cmd_mask;
|
|
struct nd_cmd_pkg pkg;
|
|
int rc, i, cmd_rc;
|
|
void *buf = NULL;
|
|
u64 buf_len = 0;
|
|
|
|
if (nvdimm) {
|
|
desc = nd_cmd_dimm_desc(cmd);
|
|
cmd_name = nvdimm_cmd_name(cmd);
|
|
cmd_mask = nvdimm->cmd_mask;
|
|
dimm_name = dev_name(&nvdimm->dev);
|
|
} else {
|
|
desc = nd_cmd_bus_desc(cmd);
|
|
cmd_name = nvdimm_bus_cmd_name(cmd);
|
|
cmd_mask = nd_desc->cmd_mask;
|
|
dimm_name = "bus";
|
|
}
|
|
|
|
/* Validate command family support against bus declared support */
|
|
if (cmd == ND_CMD_CALL) {
|
|
unsigned long *mask;
|
|
|
|
if (copy_from_user(&pkg, p, sizeof(pkg)))
|
|
return -EFAULT;
|
|
|
|
if (nvdimm) {
|
|
if (pkg.nd_family > NVDIMM_FAMILY_MAX)
|
|
return -EINVAL;
|
|
mask = &nd_desc->dimm_family_mask;
|
|
} else {
|
|
if (pkg.nd_family > NVDIMM_BUS_FAMILY_MAX)
|
|
return -EINVAL;
|
|
mask = &nd_desc->bus_family_mask;
|
|
}
|
|
|
|
if (!test_bit(pkg.nd_family, mask))
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!desc ||
|
|
(desc->out_num + desc->in_num == 0) ||
|
|
cmd > ND_CMD_CALL ||
|
|
!test_bit(cmd, &cmd_mask))
|
|
return -ENOTTY;
|
|
|
|
/* fail write commands (when read-only) */
|
|
if (read_only)
|
|
switch (cmd) {
|
|
case ND_CMD_VENDOR:
|
|
case ND_CMD_SET_CONFIG_DATA:
|
|
case ND_CMD_ARS_START:
|
|
case ND_CMD_CLEAR_ERROR:
|
|
case ND_CMD_CALL:
|
|
dev_dbg(dev, "'%s' command while read-only.\n",
|
|
nvdimm ? nvdimm_cmd_name(cmd)
|
|
: nvdimm_bus_cmd_name(cmd));
|
|
return -EPERM;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/* process an input envelope */
|
|
in_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
|
|
if (!in_env)
|
|
return -ENOMEM;
|
|
for (i = 0; i < desc->in_num; i++) {
|
|
u32 in_size, copy;
|
|
|
|
in_size = nd_cmd_in_size(nvdimm, cmd, desc, i, in_env);
|
|
if (in_size == UINT_MAX) {
|
|
dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
|
|
__func__, dimm_name, cmd_name, i);
|
|
rc = -ENXIO;
|
|
goto out;
|
|
}
|
|
if (in_len < ND_CMD_MAX_ENVELOPE)
|
|
copy = min_t(u32, ND_CMD_MAX_ENVELOPE - in_len, in_size);
|
|
else
|
|
copy = 0;
|
|
if (copy && copy_from_user(&in_env[in_len], p + in_len, copy)) {
|
|
rc = -EFAULT;
|
|
goto out;
|
|
}
|
|
in_len += in_size;
|
|
}
|
|
|
|
if (cmd == ND_CMD_CALL) {
|
|
func = pkg.nd_command;
|
|
dev_dbg(dev, "%s, idx: %llu, in: %u, out: %u, len %llu\n",
|
|
dimm_name, pkg.nd_command,
|
|
in_len, out_len, buf_len);
|
|
}
|
|
|
|
/* process an output envelope */
|
|
out_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
|
|
if (!out_env) {
|
|
rc = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
for (i = 0; i < desc->out_num; i++) {
|
|
u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
|
|
(u32 *) in_env, (u32 *) out_env, 0);
|
|
u32 copy;
|
|
|
|
if (out_size == UINT_MAX) {
|
|
dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
|
|
dimm_name, cmd_name, i);
|
|
rc = -EFAULT;
|
|
goto out;
|
|
}
|
|
if (out_len < ND_CMD_MAX_ENVELOPE)
|
|
copy = min_t(u32, ND_CMD_MAX_ENVELOPE - out_len, out_size);
|
|
else
|
|
copy = 0;
|
|
if (copy && copy_from_user(&out_env[out_len],
|
|
p + in_len + out_len, copy)) {
|
|
rc = -EFAULT;
|
|
goto out;
|
|
}
|
|
out_len += out_size;
|
|
}
|
|
|
|
buf_len = (u64) out_len + (u64) in_len;
|
|
if (buf_len > ND_IOCTL_MAX_BUFLEN) {
|
|
dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
|
|
cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
|
|
rc = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
buf = vmalloc(buf_len);
|
|
if (!buf) {
|
|
rc = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
if (copy_from_user(buf, p, buf_len)) {
|
|
rc = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
device_lock(dev);
|
|
nvdimm_bus_lock(dev);
|
|
rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
|
|
if (rc)
|
|
goto out_unlock;
|
|
|
|
rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, &cmd_rc);
|
|
if (rc < 0)
|
|
goto out_unlock;
|
|
|
|
if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR && cmd_rc >= 0) {
|
|
struct nd_cmd_clear_error *clear_err = buf;
|
|
|
|
nvdimm_account_cleared_poison(nvdimm_bus, clear_err->address,
|
|
clear_err->cleared);
|
|
}
|
|
|
|
if (copy_to_user(p, buf, buf_len))
|
|
rc = -EFAULT;
|
|
|
|
out_unlock:
|
|
nvdimm_bus_unlock(dev);
|
|
device_unlock(dev);
|
|
out:
|
|
kfree(in_env);
|
|
kfree(out_env);
|
|
vfree(buf);
|
|
return rc;
|
|
}
|
|
|
|
enum nd_ioctl_mode {
|
|
BUS_IOCTL,
|
|
DIMM_IOCTL,
|
|
};
|
|
|
|
static int match_dimm(struct device *dev, void *data)
|
|
{
|
|
long id = (long) data;
|
|
|
|
if (is_nvdimm(dev)) {
|
|
struct nvdimm *nvdimm = to_nvdimm(dev);
|
|
|
|
return nvdimm->id == id;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
|
|
enum nd_ioctl_mode mode)
|
|
|
|
{
|
|
struct nvdimm_bus *nvdimm_bus, *found = NULL;
|
|
long id = (long) file->private_data;
|
|
struct nvdimm *nvdimm = NULL;
|
|
int rc, ro;
|
|
|
|
ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
|
|
mutex_lock(&nvdimm_bus_list_mutex);
|
|
list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
|
|
if (mode == DIMM_IOCTL) {
|
|
struct device *dev;
|
|
|
|
dev = device_find_child(&nvdimm_bus->dev,
|
|
file->private_data, match_dimm);
|
|
if (!dev)
|
|
continue;
|
|
nvdimm = to_nvdimm(dev);
|
|
found = nvdimm_bus;
|
|
} else if (nvdimm_bus->id == id) {
|
|
found = nvdimm_bus;
|
|
}
|
|
|
|
if (found) {
|
|
atomic_inc(&nvdimm_bus->ioctl_active);
|
|
break;
|
|
}
|
|
}
|
|
mutex_unlock(&nvdimm_bus_list_mutex);
|
|
|
|
if (!found)
|
|
return -ENXIO;
|
|
|
|
nvdimm_bus = found;
|
|
rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg);
|
|
|
|
if (nvdimm)
|
|
put_device(&nvdimm->dev);
|
|
if (atomic_dec_and_test(&nvdimm_bus->ioctl_active))
|
|
wake_up(&nvdimm_bus->wait);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static long bus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
{
|
|
return nd_ioctl(file, cmd, arg, BUS_IOCTL);
|
|
}
|
|
|
|
static long dimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
{
|
|
return nd_ioctl(file, cmd, arg, DIMM_IOCTL);
|
|
}
|
|
|
|
static int nd_open(struct inode *inode, struct file *file)
|
|
{
|
|
long minor = iminor(inode);
|
|
|
|
file->private_data = (void *) minor;
|
|
return 0;
|
|
}
|
|
|
|
static const struct file_operations nvdimm_bus_fops = {
|
|
.owner = THIS_MODULE,
|
|
.open = nd_open,
|
|
.unlocked_ioctl = bus_ioctl,
|
|
.compat_ioctl = compat_ptr_ioctl,
|
|
.llseek = noop_llseek,
|
|
};
|
|
|
|
static const struct file_operations nvdimm_fops = {
|
|
.owner = THIS_MODULE,
|
|
.open = nd_open,
|
|
.unlocked_ioctl = dimm_ioctl,
|
|
.compat_ioctl = compat_ptr_ioctl,
|
|
.llseek = noop_llseek,
|
|
};
|
|
|
|
int __init nvdimm_bus_init(void)
|
|
{
|
|
int rc;
|
|
|
|
rc = bus_register(&nvdimm_bus_type);
|
|
if (rc)
|
|
return rc;
|
|
|
|
rc = register_chrdev(0, "ndctl", &nvdimm_bus_fops);
|
|
if (rc < 0)
|
|
goto err_bus_chrdev;
|
|
nvdimm_bus_major = rc;
|
|
|
|
rc = register_chrdev(0, "dimmctl", &nvdimm_fops);
|
|
if (rc < 0)
|
|
goto err_dimm_chrdev;
|
|
nvdimm_major = rc;
|
|
|
|
nd_class = class_create(THIS_MODULE, "nd");
|
|
if (IS_ERR(nd_class)) {
|
|
rc = PTR_ERR(nd_class);
|
|
goto err_class;
|
|
}
|
|
|
|
rc = driver_register(&nd_bus_driver.drv);
|
|
if (rc)
|
|
goto err_nd_bus;
|
|
|
|
return 0;
|
|
|
|
err_nd_bus:
|
|
class_destroy(nd_class);
|
|
err_class:
|
|
unregister_chrdev(nvdimm_major, "dimmctl");
|
|
err_dimm_chrdev:
|
|
unregister_chrdev(nvdimm_bus_major, "ndctl");
|
|
err_bus_chrdev:
|
|
bus_unregister(&nvdimm_bus_type);
|
|
|
|
return rc;
|
|
}
|
|
|
|
void nvdimm_bus_exit(void)
|
|
{
|
|
driver_unregister(&nd_bus_driver.drv);
|
|
class_destroy(nd_class);
|
|
unregister_chrdev(nvdimm_bus_major, "ndctl");
|
|
unregister_chrdev(nvdimm_major, "dimmctl");
|
|
bus_unregister(&nvdimm_bus_type);
|
|
ida_destroy(&nd_ida);
|
|
}
|