2017-07-13 08:58:21 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
|
2018-10-30 06:52:42 +08:00
|
|
|
#include <linux/memremap.h>
|
2017-07-13 08:58:21 +08:00
|
|
|
#include <linux/device.h>
|
2018-11-08 07:31:23 +08:00
|
|
|
#include <linux/mutex.h>
|
|
|
|
#include <linux/list.h>
|
2017-07-13 08:58:21 +08:00
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/dax.h>
|
2020-10-14 07:50:24 +08:00
|
|
|
#include <linux/io.h>
|
2017-07-13 08:58:21 +08:00
|
|
|
#include "dax-private.h"
|
|
|
|
#include "bus.h"
|
|
|
|
|
2018-11-08 07:31:23 +08:00
|
|
|
static DEFINE_MUTEX(dax_bus_lock);
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
/*
|
|
|
|
* All changes to the dax region configuration occur with this lock held
|
|
|
|
* for write.
|
|
|
|
*/
|
|
|
|
DECLARE_RWSEM(dax_region_rwsem);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* All changes to the dax device configuration occur with this lock held
|
|
|
|
* for write.
|
|
|
|
*/
|
|
|
|
DECLARE_RWSEM(dax_dev_rwsem);
|
|
|
|
|
2018-11-08 07:31:23 +08:00
|
|
|
#define DAX_NAME_LEN 30
|
|
|
|
struct dax_id {
|
|
|
|
struct list_head list;
|
|
|
|
char dev_name[DAX_NAME_LEN];
|
|
|
|
};
|
|
|
|
|
2023-01-11 19:30:17 +08:00
|
|
|
static int dax_bus_uevent(const struct device *dev, struct kobj_uevent_env *env)
|
2017-07-13 08:58:21 +08:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We only ever expect to handle device-dax instances, i.e. the
|
|
|
|
* @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
|
|
|
|
*/
|
|
|
|
return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
|
|
|
|
}
|
|
|
|
|
2018-11-08 07:31:23 +08:00
|
|
|
static struct dax_device_driver *to_dax_drv(struct device_driver *drv)
|
|
|
|
{
|
|
|
|
return container_of(drv, struct dax_device_driver, drv);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv,
|
|
|
|
const char *dev_name)
|
|
|
|
{
|
|
|
|
struct dax_id *dax_id;
|
|
|
|
|
|
|
|
lockdep_assert_held(&dax_bus_lock);
|
|
|
|
|
|
|
|
list_for_each_entry(dax_id, &dax_drv->ids, list)
|
|
|
|
if (sysfs_streq(dax_id->dev_name, dev_name))
|
|
|
|
return dax_id;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
|
|
|
|
{
|
|
|
|
int match;
|
|
|
|
|
|
|
|
mutex_lock(&dax_bus_lock);
|
|
|
|
match = !!__dax_match_id(dax_drv, dev_name(dev));
|
|
|
|
mutex_unlock(&dax_bus_lock);
|
|
|
|
|
|
|
|
return match;
|
|
|
|
}
|
|
|
|
|
dax: Assign RAM regions to memory-hotplug by default
The default mode for device-dax instances is backwards for RAM-regions
as evidenced by the fact that it tends to catch end users by surprise.
"Where is my memory?". Recall that platforms are increasingly shipping
with performance-differentiated memory pools beyond typical DRAM and
NUMA effects. This includes HBM (high-bandwidth-memory) and CXL (dynamic
interleave, varied media types, and future fabric attached
possibilities).
For this reason the EFI_MEMORY_SP (EFI Special Purpose Memory => Linux
'Soft Reserved') attribute is expected to be applied to all memory-pools
that are not the general purpose pool. This designation gives an
Operating System a chance to defer usage of a memory pool until later in
the boot process where its performance properties can be interrogated
and administrator policy can be applied.
'Soft Reserved' memory can be anything from too limited and precious to
be part of the general purpose pool (HBM), too slow to host hot kernel
data structures (some PMEM media), or anything in between. However, in
the absence of an explicit policy, the memory should at least be made
usable by default. The current device-dax default hides all
non-general-purpose memory behind a device interface.
The expectation is that the distribution of users that want the memory
online by default vs device-dedicated-access by default follows the
Pareto principle. A small number of enlightened users may want to do
userspace memory management through a device, but general users just
want the kernel to make the memory available with an option to get more
advanced later.
Arrange for all device-dax instances not backed by PMEM to default to
attaching to the dax_kmem driver. From there the baseline memory hotplug
policy (CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE / memhp_default_state=)
gates whether the memory comes online or stays offline. Where, if it
stays offline, it can be reliably converted back to device-mode where it
can be partitioned, or fronted by a userspace allocator.
So, if someone wants device-dax instances for their 'Soft Reserved'
memory:
1/ Build a kernel with CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=n or boot
with memhp_default_state=offline, or roll the dice and hope that the
kernel has not pinned a page in that memory before step 2.
2/ Write a udev rule to convert the target dax device(s) from
'system-ram' mode to 'devdax' mode:
daxctl reconfigure-device $dax -m devdax -f
Cc: Michal Hocko <mhocko@suse.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Gregory Price <gregory.price@memverge.com>
Tested-by: Fan Ni <fan.ni@samsung.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/167602003336.1924368.6809503401422267885.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-02-10 17:07:13 +08:00
|
|
|
static int dax_match_type(struct dax_device_driver *dax_drv, struct device *dev)
|
|
|
|
{
|
|
|
|
enum dax_driver_type type = DAXDRV_DEVICE_TYPE;
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
|
|
|
|
|
|
|
if (dev_dax->region->res.flags & IORESOURCE_DAX_KMEM)
|
|
|
|
type = DAXDRV_KMEM_TYPE;
|
|
|
|
|
|
|
|
if (dax_drv->type == type)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
/* default to device mode if dax_kmem is disabled */
|
|
|
|
if (dax_drv->type == DAXDRV_DEVICE_TYPE &&
|
|
|
|
!IS_ENABLED(CONFIG_DEV_DAX_KMEM))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-01-25 05:12:04 +08:00
|
|
|
enum id_action {
|
|
|
|
ID_REMOVE,
|
|
|
|
ID_ADD,
|
|
|
|
};
|
|
|
|
|
2018-11-08 07:31:23 +08:00
|
|
|
static ssize_t do_id_store(struct device_driver *drv, const char *buf,
|
2019-01-25 05:12:04 +08:00
|
|
|
size_t count, enum id_action action)
|
2018-11-08 07:31:23 +08:00
|
|
|
{
|
|
|
|
struct dax_device_driver *dax_drv = to_dax_drv(drv);
|
|
|
|
unsigned int region_id, id;
|
|
|
|
char devname[DAX_NAME_LEN];
|
|
|
|
struct dax_id *dax_id;
|
|
|
|
ssize_t rc = count;
|
|
|
|
int fields;
|
|
|
|
|
|
|
|
fields = sscanf(buf, "dax%d.%d", ®ion_id, &id);
|
|
|
|
if (fields != 2)
|
|
|
|
return -EINVAL;
|
|
|
|
sprintf(devname, "dax%d.%d", region_id, id);
|
|
|
|
if (!sysfs_streq(buf, devname))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
mutex_lock(&dax_bus_lock);
|
|
|
|
dax_id = __dax_match_id(dax_drv, buf);
|
|
|
|
if (!dax_id) {
|
2019-01-25 05:12:04 +08:00
|
|
|
if (action == ID_ADD) {
|
2018-11-08 07:31:23 +08:00
|
|
|
dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
|
|
|
|
if (dax_id) {
|
2023-09-13 09:10:24 +08:00
|
|
|
strscpy(dax_id->dev_name, buf, DAX_NAME_LEN);
|
2018-11-08 07:31:23 +08:00
|
|
|
list_add(&dax_id->list, &dax_drv->ids);
|
|
|
|
} else
|
|
|
|
rc = -ENOMEM;
|
2021-03-22 19:44:58 +08:00
|
|
|
}
|
2019-01-25 05:12:04 +08:00
|
|
|
} else if (action == ID_REMOVE) {
|
2018-11-08 07:31:23 +08:00
|
|
|
list_del(&dax_id->list);
|
|
|
|
kfree(dax_id);
|
2021-03-22 19:44:58 +08:00
|
|
|
}
|
2018-11-08 07:31:23 +08:00
|
|
|
mutex_unlock(&dax_bus_lock);
|
2019-01-25 05:12:04 +08:00
|
|
|
|
|
|
|
if (rc < 0)
|
|
|
|
return rc;
|
|
|
|
if (action == ID_ADD)
|
|
|
|
rc = driver_attach(drv);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
return count;
|
2018-11-08 07:31:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t new_id_store(struct device_driver *drv, const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
2019-01-25 05:12:04 +08:00
|
|
|
return do_id_store(drv, buf, count, ID_ADD);
|
2018-11-08 07:31:23 +08:00
|
|
|
}
|
|
|
|
static DRIVER_ATTR_WO(new_id);
|
|
|
|
|
|
|
|
static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
2019-01-25 05:12:04 +08:00
|
|
|
return do_id_store(drv, buf, count, ID_REMOVE);
|
2018-11-08 07:31:23 +08:00
|
|
|
}
|
|
|
|
static DRIVER_ATTR_WO(remove_id);
|
|
|
|
|
|
|
|
static struct attribute *dax_drv_attrs[] = {
|
|
|
|
&driver_attr_new_id.attr,
|
|
|
|
&driver_attr_remove_id.attr,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
ATTRIBUTE_GROUPS(dax_drv);
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
static int dax_bus_match(struct device *dev, struct device_driver *drv);
|
|
|
|
|
2022-01-15 06:04:33 +08:00
|
|
|
/*
|
|
|
|
* Static dax regions are regions created by an external subsystem
|
|
|
|
* nvdimm where a single range is assigned. Its boundaries are by the external
|
|
|
|
* subsystem and are usually limited to one physical memory range. For example,
|
|
|
|
* for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a
|
|
|
|
* single contiguous range)
|
|
|
|
*
|
|
|
|
* On dynamic dax regions, the assigned region can be partitioned by dax core
|
|
|
|
* into multiple subdivisions. A subdivision is represented into one
|
|
|
|
* /dev/daxN.M device composed by one or more potentially discontiguous ranges.
|
|
|
|
*
|
|
|
|
* When allocating a dax region, drivers must set whether it's static
|
|
|
|
* (IORESOURCE_DAX_STATIC). On static dax devices, the @pgmap is pre-assigned
|
|
|
|
* to dax core when calling devm_create_dev_dax(), whereas in dynamic dax
|
|
|
|
* devices it is NULL but afterwards allocated by dax core on device ->probe().
|
|
|
|
* Care is needed to make sure that dynamic dax devices are torn down with a
|
|
|
|
* cleared @pgmap field (see kill_dev_dax()).
|
|
|
|
*/
|
2020-10-14 07:50:03 +08:00
|
|
|
static bool is_static(struct dax_region *dax_region)
|
|
|
|
{
|
|
|
|
return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
|
|
|
|
}
|
|
|
|
|
2022-01-15 06:04:33 +08:00
|
|
|
bool static_dev_dax(struct dev_dax *dev_dax)
|
|
|
|
{
|
|
|
|
return is_static(dev_dax->region);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(static_dev_dax);
|
|
|
|
|
2020-10-14 07:50:39 +08:00
|
|
|
static u64 dev_dax_size(struct dev_dax *dev_dax)
|
|
|
|
{
|
|
|
|
u64 size = 0;
|
|
|
|
int i;
|
|
|
|
|
2024-05-01 01:44:23 +08:00
|
|
|
lockdep_assert_held(&dax_dev_rwsem);
|
2020-10-14 07:50:39 +08:00
|
|
|
|
|
|
|
for (i = 0; i < dev_dax->nr_range; i++)
|
|
|
|
size += range_len(&dev_dax->ranges[i].range);
|
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2020-10-14 07:50:08 +08:00
|
|
|
static int dax_bus_probe(struct device *dev)
|
|
|
|
{
|
|
|
|
struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
2020-10-14 07:50:13 +08:00
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
|
|
|
int rc;
|
2024-01-25 04:03:46 +08:00
|
|
|
u64 size;
|
2020-10-14 07:50:13 +08:00
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_read_interruptible(&dax_dev_rwsem);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
size = dev_dax_size(dev_dax);
|
|
|
|
up_read(&dax_dev_rwsem);
|
|
|
|
|
|
|
|
if (size == 0 || dev_dax->id < 0)
|
2020-10-14 07:50:13 +08:00
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
rc = dax_drv->probe(dev_dax);
|
|
|
|
|
|
|
|
if (rc || is_static(dax_region))
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Track new seed creation only after successful probe of the
|
|
|
|
* previous seed.
|
|
|
|
*/
|
|
|
|
if (dax_region->seed == dev)
|
|
|
|
dax_region->seed = NULL;
|
2020-10-14 07:50:08 +08:00
|
|
|
|
2020-10-14 07:50:13 +08:00
|
|
|
return 0;
|
2020-10-14 07:50:08 +08:00
|
|
|
}
|
|
|
|
|
2021-07-14 03:35:22 +08:00
|
|
|
static void dax_bus_remove(struct device *dev)
|
2020-10-14 07:50:08 +08:00
|
|
|
{
|
|
|
|
struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
|
|
|
|
2021-02-06 06:28:39 +08:00
|
|
|
if (dax_drv->remove)
|
2021-02-06 06:28:42 +08:00
|
|
|
dax_drv->remove(dev_dax);
|
2020-10-14 07:50:08 +08:00
|
|
|
}
|
|
|
|
|
2024-02-05 00:07:11 +08:00
|
|
|
static const struct bus_type dax_bus_type = {
|
2017-07-13 08:58:21 +08:00
|
|
|
.name = "dax",
|
|
|
|
.uevent = dax_bus_uevent,
|
|
|
|
.match = dax_bus_match,
|
2020-10-14 07:50:08 +08:00
|
|
|
.probe = dax_bus_probe,
|
|
|
|
.remove = dax_bus_remove,
|
2018-11-08 07:31:23 +08:00
|
|
|
.drv_groups = dax_drv_groups,
|
2017-07-13 08:58:21 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static int dax_bus_match(struct device *dev, struct device_driver *drv)
|
|
|
|
{
|
2018-11-08 07:31:23 +08:00
|
|
|
struct dax_device_driver *dax_drv = to_dax_drv(drv);
|
|
|
|
|
dax: Assign RAM regions to memory-hotplug by default
The default mode for device-dax instances is backwards for RAM-regions
as evidenced by the fact that it tends to catch end users by surprise.
"Where is my memory?". Recall that platforms are increasingly shipping
with performance-differentiated memory pools beyond typical DRAM and
NUMA effects. This includes HBM (high-bandwidth-memory) and CXL (dynamic
interleave, varied media types, and future fabric attached
possibilities).
For this reason the EFI_MEMORY_SP (EFI Special Purpose Memory => Linux
'Soft Reserved') attribute is expected to be applied to all memory-pools
that are not the general purpose pool. This designation gives an
Operating System a chance to defer usage of a memory pool until later in
the boot process where its performance properties can be interrogated
and administrator policy can be applied.
'Soft Reserved' memory can be anything from too limited and precious to
be part of the general purpose pool (HBM), too slow to host hot kernel
data structures (some PMEM media), or anything in between. However, in
the absence of an explicit policy, the memory should at least be made
usable by default. The current device-dax default hides all
non-general-purpose memory behind a device interface.
The expectation is that the distribution of users that want the memory
online by default vs device-dedicated-access by default follows the
Pareto principle. A small number of enlightened users may want to do
userspace memory management through a device, but general users just
want the kernel to make the memory available with an option to get more
advanced later.
Arrange for all device-dax instances not backed by PMEM to default to
attaching to the dax_kmem driver. From there the baseline memory hotplug
policy (CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE / memhp_default_state=)
gates whether the memory comes online or stays offline. Where, if it
stays offline, it can be reliably converted back to device-mode where it
can be partitioned, or fronted by a userspace allocator.
So, if someone wants device-dax instances for their 'Soft Reserved'
memory:
1/ Build a kernel with CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=n or boot
with memhp_default_state=offline, or roll the dice and hope that the
kernel has not pinned a page in that memory before step 2.
2/ Write a udev rule to convert the target dax device(s) from
'system-ram' mode to 'devdax' mode:
daxctl reconfigure-device $dax -m devdax -f
Cc: Michal Hocko <mhocko@suse.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Gregory Price <gregory.price@memverge.com>
Tested-by: Fan Ni <fan.ni@samsung.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/167602003336.1924368.6809503401422267885.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-02-10 17:07:13 +08:00
|
|
|
if (dax_match_id(dax_drv, dev))
|
2018-11-08 07:31:23 +08:00
|
|
|
return 1;
|
dax: Assign RAM regions to memory-hotplug by default
The default mode for device-dax instances is backwards for RAM-regions
as evidenced by the fact that it tends to catch end users by surprise.
"Where is my memory?". Recall that platforms are increasingly shipping
with performance-differentiated memory pools beyond typical DRAM and
NUMA effects. This includes HBM (high-bandwidth-memory) and CXL (dynamic
interleave, varied media types, and future fabric attached
possibilities).
For this reason the EFI_MEMORY_SP (EFI Special Purpose Memory => Linux
'Soft Reserved') attribute is expected to be applied to all memory-pools
that are not the general purpose pool. This designation gives an
Operating System a chance to defer usage of a memory pool until later in
the boot process where its performance properties can be interrogated
and administrator policy can be applied.
'Soft Reserved' memory can be anything from too limited and precious to
be part of the general purpose pool (HBM), too slow to host hot kernel
data structures (some PMEM media), or anything in between. However, in
the absence of an explicit policy, the memory should at least be made
usable by default. The current device-dax default hides all
non-general-purpose memory behind a device interface.
The expectation is that the distribution of users that want the memory
online by default vs device-dedicated-access by default follows the
Pareto principle. A small number of enlightened users may want to do
userspace memory management through a device, but general users just
want the kernel to make the memory available with an option to get more
advanced later.
Arrange for all device-dax instances not backed by PMEM to default to
attaching to the dax_kmem driver. From there the baseline memory hotplug
policy (CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE / memhp_default_state=)
gates whether the memory comes online or stays offline. Where, if it
stays offline, it can be reliably converted back to device-mode where it
can be partitioned, or fronted by a userspace allocator.
So, if someone wants device-dax instances for their 'Soft Reserved'
memory:
1/ Build a kernel with CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=n or boot
with memhp_default_state=offline, or roll the dice and hope that the
kernel has not pinned a page in that memory before step 2.
2/ Write a udev rule to convert the target dax device(s) from
'system-ram' mode to 'devdax' mode:
daxctl reconfigure-device $dax -m devdax -f
Cc: Michal Hocko <mhocko@suse.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Gregory Price <gregory.price@memverge.com>
Tested-by: Fan Ni <fan.ni@samsung.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/167602003336.1924368.6809503401422267885.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-02-10 17:07:13 +08:00
|
|
|
return dax_match_type(dax_drv, dev);
|
2017-07-13 08:58:21 +08:00
|
|
|
}
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
/*
|
|
|
|
* Rely on the fact that drvdata is set before the attributes are
|
|
|
|
* registered, and that the attributes are unregistered before drvdata
|
|
|
|
* is cleared to assume that drvdata is always valid.
|
|
|
|
*/
|
|
|
|
static ssize_t id_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_get_drvdata(dev);
|
|
|
|
|
2024-01-25 04:03:47 +08:00
|
|
|
return sysfs_emit(buf, "%d\n", dax_region->id);
|
2017-07-13 08:58:21 +08:00
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RO(id);
|
|
|
|
|
|
|
|
static ssize_t region_size_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_get_drvdata(dev);
|
|
|
|
|
2024-01-25 04:03:47 +08:00
|
|
|
return sysfs_emit(buf, "%llu\n",
|
|
|
|
(unsigned long long)resource_size(&dax_region->res));
|
2017-07-13 08:58:21 +08:00
|
|
|
}
|
|
|
|
static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
|
|
|
|
region_size_show, NULL);
|
|
|
|
|
2020-10-14 07:50:55 +08:00
|
|
|
static ssize_t region_align_show(struct device *dev,
|
2017-07-13 08:58:21 +08:00
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_get_drvdata(dev);
|
|
|
|
|
2024-01-25 04:03:47 +08:00
|
|
|
return sysfs_emit(buf, "%u\n", dax_region->align);
|
2017-07-13 08:58:21 +08:00
|
|
|
}
|
2020-10-14 07:50:55 +08:00
|
|
|
static struct device_attribute dev_attr_region_align =
|
|
|
|
__ATTR(align, 0400, region_align_show, NULL);
|
2017-07-13 08:58:21 +08:00
|
|
|
|
2020-10-14 07:50:03 +08:00
|
|
|
#define for_each_dax_region_resource(dax_region, res) \
|
|
|
|
for (res = (dax_region)->res.child; res; res = res->sibling)
|
|
|
|
|
|
|
|
static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
|
|
|
|
{
|
|
|
|
resource_size_t size = resource_size(&dax_region->res);
|
|
|
|
struct resource *res;
|
|
|
|
|
2024-05-01 01:44:23 +08:00
|
|
|
lockdep_assert_held(&dax_region_rwsem);
|
2020-10-14 07:50:03 +08:00
|
|
|
|
|
|
|
for_each_dax_region_resource(dax_region, res)
|
|
|
|
size -= resource_size(res);
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t available_size_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_get_drvdata(dev);
|
|
|
|
unsigned long long size;
|
2024-01-25 04:03:46 +08:00
|
|
|
int rc;
|
2020-10-14 07:50:03 +08:00
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_read_interruptible(&dax_region_rwsem);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2020-10-14 07:50:03 +08:00
|
|
|
size = dax_region_avail_size(dax_region);
|
2024-01-25 04:03:46 +08:00
|
|
|
up_read(&dax_region_rwsem);
|
2020-10-14 07:50:03 +08:00
|
|
|
|
2024-01-25 04:03:47 +08:00
|
|
|
return sysfs_emit(buf, "%llu\n", size);
|
2020-10-14 07:50:03 +08:00
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RO(available_size);
|
|
|
|
|
2020-10-14 07:50:13 +08:00
|
|
|
static ssize_t seed_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_get_drvdata(dev);
|
|
|
|
struct device *seed;
|
|
|
|
ssize_t rc;
|
|
|
|
|
|
|
|
if (is_static(dax_region))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_read_interruptible(&dax_region_rwsem);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2020-10-14 07:50:13 +08:00
|
|
|
seed = dax_region->seed;
|
2024-01-25 04:03:47 +08:00
|
|
|
rc = sysfs_emit(buf, "%s\n", seed ? dev_name(seed) : "");
|
2024-01-25 04:03:46 +08:00
|
|
|
up_read(&dax_region_rwsem);
|
2020-10-14 07:50:13 +08:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RO(seed);
|
|
|
|
|
|
|
|
static ssize_t create_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_get_drvdata(dev);
|
|
|
|
struct device *youngest;
|
|
|
|
ssize_t rc;
|
|
|
|
|
|
|
|
if (is_static(dax_region))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_read_interruptible(&dax_region_rwsem);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2020-10-14 07:50:13 +08:00
|
|
|
youngest = dax_region->youngest;
|
2024-01-25 04:03:47 +08:00
|
|
|
rc = sysfs_emit(buf, "%s\n", youngest ? dev_name(youngest) : "");
|
2024-01-25 04:03:46 +08:00
|
|
|
up_read(&dax_region_rwsem);
|
2020-10-14 07:50:13 +08:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data);
|
|
|
|
|
2020-10-14 07:50:13 +08:00
|
|
|
static ssize_t create_store(struct device *dev, struct device_attribute *attr,
|
|
|
|
const char *buf, size_t len)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_get_drvdata(dev);
|
|
|
|
unsigned long long avail;
|
|
|
|
ssize_t rc;
|
|
|
|
int val;
|
|
|
|
|
|
|
|
if (is_static(dax_region))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
rc = kstrtoint(buf, 0, &val);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
if (val != 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_write_killable(&dax_region_rwsem);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2020-10-14 07:50:13 +08:00
|
|
|
avail = dax_region_avail_size(dax_region);
|
|
|
|
if (avail == 0)
|
|
|
|
rc = -ENOSPC;
|
|
|
|
else {
|
|
|
|
struct dev_dax_data data = {
|
|
|
|
.dax_region = dax_region,
|
|
|
|
.size = 0,
|
|
|
|
.id = -1,
|
2023-11-07 15:22:43 +08:00
|
|
|
.memmap_on_memory = false,
|
2020-10-14 07:50:13 +08:00
|
|
|
};
|
2024-01-25 04:03:46 +08:00
|
|
|
struct dev_dax *dev_dax = __devm_create_dev_dax(&data);
|
2020-10-14 07:50:13 +08:00
|
|
|
|
|
|
|
if (IS_ERR(dev_dax))
|
|
|
|
rc = PTR_ERR(dev_dax);
|
|
|
|
else {
|
|
|
|
/*
|
|
|
|
* In support of crafting multiple new devices
|
|
|
|
* simultaneously multiple seeds can be created,
|
|
|
|
* but only the first one that has not been
|
|
|
|
* successfully bound is tracked as the region
|
|
|
|
* seed.
|
|
|
|
*/
|
|
|
|
if (!dax_region->seed)
|
|
|
|
dax_region->seed = &dev_dax->dev;
|
|
|
|
dax_region->youngest = &dev_dax->dev;
|
|
|
|
rc = len;
|
|
|
|
}
|
|
|
|
}
|
2024-01-25 04:03:46 +08:00
|
|
|
up_write(&dax_region_rwsem);
|
2020-10-14 07:50:13 +08:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RW(create);
|
|
|
|
|
|
|
|
void kill_dev_dax(struct dev_dax *dev_dax)
|
|
|
|
{
|
|
|
|
struct dax_device *dax_dev = dev_dax->dax_dev;
|
|
|
|
struct inode *inode = dax_inode(dax_dev);
|
|
|
|
|
|
|
|
kill_dax(dax_dev);
|
|
|
|
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
|
2022-01-15 06:04:33 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Dynamic dax region have the pgmap allocated via dev_kzalloc()
|
|
|
|
* and thus freed by devm. Clear the pgmap to not have stale pgmap
|
|
|
|
* ranges on probe() from previous reconfigurations of region devices.
|
|
|
|
*/
|
|
|
|
if (!static_dev_dax(dev_dax))
|
|
|
|
dev_dax->pgmap = NULL;
|
2020-10-14 07:50:13 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(kill_dev_dax);
|
|
|
|
|
2020-12-19 10:41:41 +08:00
|
|
|
static void trim_dev_dax_range(struct dev_dax *dev_dax)
|
2020-10-14 07:50:13 +08:00
|
|
|
{
|
2020-12-19 10:41:41 +08:00
|
|
|
int i = dev_dax->nr_range - 1;
|
|
|
|
struct range *range = &dev_dax->ranges[i].range;
|
2020-10-14 07:50:13 +08:00
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
|
|
|
|
2024-05-01 01:44:23 +08:00
|
|
|
lockdep_assert_held_write(&dax_region_rwsem);
|
2020-12-19 10:41:41 +08:00
|
|
|
dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
|
|
|
|
(unsigned long long)range->start,
|
|
|
|
(unsigned long long)range->end);
|
|
|
|
|
|
|
|
__release_region(&dax_region->res, range->start, range_len(range));
|
|
|
|
if (--dev_dax->nr_range == 0) {
|
|
|
|
kfree(dev_dax->ranges);
|
|
|
|
dev_dax->ranges = NULL;
|
2020-10-14 07:50:39 +08:00
|
|
|
}
|
2020-12-19 10:41:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void free_dev_dax_ranges(struct dev_dax *dev_dax)
|
|
|
|
{
|
|
|
|
while (dev_dax->nr_range)
|
|
|
|
trim_dev_dax_range(dev_dax);
|
2020-10-14 07:50:13 +08:00
|
|
|
}
|
|
|
|
|
2024-05-01 01:44:24 +08:00
|
|
|
static void unregister_dev_dax(void *dev)
|
2020-10-14 07:50:13 +08:00
|
|
|
{
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
|
|
|
|
|
|
|
dev_dbg(dev, "%s\n", __func__);
|
|
|
|
|
2024-05-01 01:44:24 +08:00
|
|
|
down_write(&dax_region_rwsem);
|
2020-10-14 07:50:13 +08:00
|
|
|
kill_dev_dax(dev_dax);
|
|
|
|
device_del(dev);
|
2023-02-16 16:36:02 +08:00
|
|
|
free_dev_dax_ranges(dev_dax);
|
2020-10-14 07:50:13 +08:00
|
|
|
put_device(dev);
|
2024-01-25 04:03:46 +08:00
|
|
|
up_write(&dax_region_rwsem);
|
|
|
|
}
|
|
|
|
|
2023-06-03 14:14:05 +08:00
|
|
|
static void dax_region_free(struct kref *kref)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region;
|
|
|
|
|
|
|
|
dax_region = container_of(kref, struct dax_region, kref);
|
|
|
|
kfree(dax_region);
|
|
|
|
}
|
|
|
|
|
2023-06-03 14:14:11 +08:00
|
|
|
static void dax_region_put(struct dax_region *dax_region)
|
2023-06-03 14:14:05 +08:00
|
|
|
{
|
|
|
|
kref_put(&dax_region->kref, dax_region_free);
|
|
|
|
}
|
|
|
|
|
2020-10-14 07:50:13 +08:00
|
|
|
/* a return value >= 0 indicates this invocation invalidated the id */
|
|
|
|
static int __free_dev_dax_id(struct dev_dax *dev_dax)
|
|
|
|
{
|
2023-06-03 14:14:05 +08:00
|
|
|
struct dax_region *dax_region;
|
2020-10-14 07:50:13 +08:00
|
|
|
int rc = dev_dax->id;
|
|
|
|
|
2024-05-01 01:44:23 +08:00
|
|
|
lockdep_assert_held_write(&dax_dev_rwsem);
|
2020-10-14 07:50:13 +08:00
|
|
|
|
2023-06-03 14:14:05 +08:00
|
|
|
if (!dev_dax->dyn_id || dev_dax->id < 0)
|
2020-10-14 07:50:13 +08:00
|
|
|
return -1;
|
2023-06-03 14:14:05 +08:00
|
|
|
dax_region = dev_dax->region;
|
2020-10-14 07:50:13 +08:00
|
|
|
ida_free(&dax_region->ida, dev_dax->id);
|
2023-06-03 14:14:05 +08:00
|
|
|
dax_region_put(dax_region);
|
2020-10-14 07:50:13 +08:00
|
|
|
dev_dax->id = -1;
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int free_dev_dax_id(struct dev_dax *dev_dax)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_write_killable(&dax_dev_rwsem);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2020-10-14 07:50:13 +08:00
|
|
|
rc = __free_dev_dax_id(dev_dax);
|
2024-01-25 04:03:46 +08:00
|
|
|
up_write(&dax_dev_rwsem);
|
2020-10-14 07:50:13 +08:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2023-06-03 14:14:05 +08:00
|
|
|
static int alloc_dev_dax_id(struct dev_dax *dev_dax)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
|
|
|
int id;
|
|
|
|
|
|
|
|
id = ida_alloc(&dax_region->ida, GFP_KERNEL);
|
|
|
|
if (id < 0)
|
|
|
|
return id;
|
|
|
|
kref_get(&dax_region->kref);
|
|
|
|
dev_dax->dyn_id = true;
|
|
|
|
dev_dax->id = id;
|
|
|
|
return id;
|
|
|
|
}
|
|
|
|
|
2020-10-14 07:50:13 +08:00
|
|
|
static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
|
|
|
|
const char *buf, size_t len)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_get_drvdata(dev);
|
|
|
|
struct dev_dax *dev_dax;
|
|
|
|
struct device *victim;
|
|
|
|
bool do_del = false;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
if (is_static(dax_region))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
victim = device_find_child_by_name(dax_region->dev, buf);
|
|
|
|
if (!victim)
|
|
|
|
return -ENXIO;
|
|
|
|
|
2024-05-01 01:44:24 +08:00
|
|
|
device_lock(dev);
|
|
|
|
device_lock(victim);
|
2020-10-14 07:50:13 +08:00
|
|
|
dev_dax = to_dev_dax(victim);
|
2024-05-01 01:44:24 +08:00
|
|
|
down_write(&dax_dev_rwsem);
|
2020-10-14 07:50:39 +08:00
|
|
|
if (victim->driver || dev_dax_size(dev_dax))
|
2020-10-14 07:50:13 +08:00
|
|
|
rc = -EBUSY;
|
|
|
|
else {
|
|
|
|
/*
|
|
|
|
* Invalidate the device so it does not become active
|
|
|
|
* again, but always preserve device-id-0 so that
|
|
|
|
* /sys/bus/dax/ is guaranteed to be populated while any
|
|
|
|
* dax_region is registered.
|
|
|
|
*/
|
|
|
|
if (dev_dax->id > 0) {
|
|
|
|
do_del = __free_dev_dax_id(dev_dax) >= 0;
|
|
|
|
rc = len;
|
|
|
|
if (dax_region->seed == victim)
|
|
|
|
dax_region->seed = NULL;
|
|
|
|
if (dax_region->youngest == victim)
|
|
|
|
dax_region->youngest = NULL;
|
|
|
|
} else
|
|
|
|
rc = -EBUSY;
|
|
|
|
}
|
2024-01-25 04:03:46 +08:00
|
|
|
up_write(&dax_dev_rwsem);
|
2024-05-01 01:44:24 +08:00
|
|
|
device_unlock(victim);
|
2020-10-14 07:50:13 +08:00
|
|
|
|
|
|
|
/* won the race to invalidate the device, clean it up */
|
|
|
|
if (do_del)
|
|
|
|
devm_release_action(dev, unregister_dev_dax, victim);
|
2024-05-01 01:44:24 +08:00
|
|
|
device_unlock(dev);
|
2020-10-14 07:50:13 +08:00
|
|
|
put_device(victim);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR_WO(delete);
|
|
|
|
|
2020-10-14 07:50:03 +08:00
|
|
|
static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a,
|
|
|
|
int n)
|
|
|
|
{
|
|
|
|
struct device *dev = container_of(kobj, struct device, kobj);
|
|
|
|
struct dax_region *dax_region = dev_get_drvdata(dev);
|
|
|
|
|
2020-10-14 07:50:13 +08:00
|
|
|
if (is_static(dax_region))
|
|
|
|
if (a == &dev_attr_available_size.attr
|
|
|
|
|| a == &dev_attr_create.attr
|
|
|
|
|| a == &dev_attr_seed.attr
|
|
|
|
|| a == &dev_attr_delete.attr)
|
|
|
|
return 0;
|
2020-10-14 07:50:03 +08:00
|
|
|
return a->mode;
|
|
|
|
}
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
static struct attribute *dax_region_attributes[] = {
|
2020-10-14 07:50:03 +08:00
|
|
|
&dev_attr_available_size.attr,
|
2017-07-13 08:58:21 +08:00
|
|
|
&dev_attr_region_size.attr,
|
2020-10-14 07:50:55 +08:00
|
|
|
&dev_attr_region_align.attr,
|
2020-10-14 07:50:13 +08:00
|
|
|
&dev_attr_create.attr,
|
|
|
|
&dev_attr_seed.attr,
|
|
|
|
&dev_attr_delete.attr,
|
2017-07-13 08:58:21 +08:00
|
|
|
&dev_attr_id.attr,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct attribute_group dax_region_attribute_group = {
|
|
|
|
.name = "dax_region",
|
|
|
|
.attrs = dax_region_attributes,
|
2020-10-14 07:50:03 +08:00
|
|
|
.is_visible = dax_region_visible,
|
2017-07-13 08:58:21 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static const struct attribute_group *dax_region_attribute_groups[] = {
|
|
|
|
&dax_region_attribute_group,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void dax_region_unregister(void *region)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = region;
|
|
|
|
|
|
|
|
sysfs_remove_groups(&dax_region->dev->kobj,
|
|
|
|
dax_region_attribute_groups);
|
|
|
|
dax_region_put(dax_region);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct dax_region *alloc_dax_region(struct device *parent, int region_id,
|
2020-10-14 07:50:29 +08:00
|
|
|
struct range *range, int target_node, unsigned int align,
|
2020-10-14 07:50:03 +08:00
|
|
|
unsigned long flags)
|
2017-07-13 08:58:21 +08:00
|
|
|
{
|
|
|
|
struct dax_region *dax_region;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The DAX core assumes that it can store its private data in
|
|
|
|
* parent->driver_data. This WARN is a reminder / safeguard for
|
|
|
|
* developers of device-dax drivers.
|
|
|
|
*/
|
|
|
|
if (dev_get_drvdata(parent)) {
|
|
|
|
dev_WARN(parent, "dax core failed to setup private data\n");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-10-14 07:50:29 +08:00
|
|
|
if (!IS_ALIGNED(range->start, align)
|
|
|
|
|| !IS_ALIGNED(range_len(range), align))
|
2017-07-13 08:58:21 +08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
|
|
|
|
if (!dax_region)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
dev_set_drvdata(parent, dax_region);
|
|
|
|
kref_init(&dax_region->kref);
|
|
|
|
dax_region->id = region_id;
|
|
|
|
dax_region->align = align;
|
|
|
|
dax_region->dev = parent;
|
2018-11-10 04:43:07 +08:00
|
|
|
dax_region->target_node = target_node;
|
2020-10-14 07:50:13 +08:00
|
|
|
ida_init(&dax_region->ida);
|
2020-10-14 07:50:03 +08:00
|
|
|
dax_region->res = (struct resource) {
|
2020-10-14 07:50:29 +08:00
|
|
|
.start = range->start,
|
|
|
|
.end = range->end,
|
2020-10-14 07:50:03 +08:00
|
|
|
.flags = IORESOURCE_MEM | flags,
|
|
|
|
};
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
|
|
|
|
kfree(dax_region);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
|
|
|
|
return NULL;
|
|
|
|
return dax_region;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(alloc_dax_region);
|
|
|
|
|
2020-10-14 07:50:45 +08:00
|
|
|
static void dax_mapping_release(struct device *dev)
|
|
|
|
{
|
|
|
|
struct dax_mapping *mapping = to_dax_mapping(dev);
|
2023-06-03 14:13:54 +08:00
|
|
|
struct device *parent = dev->parent;
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(parent);
|
2020-10-14 07:50:45 +08:00
|
|
|
|
|
|
|
ida_free(&dev_dax->ida, mapping->id);
|
|
|
|
kfree(mapping);
|
2023-06-03 14:13:54 +08:00
|
|
|
put_device(parent);
|
2020-10-14 07:50:45 +08:00
|
|
|
}
|
|
|
|
|
2024-05-01 01:44:24 +08:00
|
|
|
static void unregister_dax_mapping(void *data)
|
2020-10-14 07:50:45 +08:00
|
|
|
{
|
|
|
|
struct device *dev = data;
|
|
|
|
struct dax_mapping *mapping = to_dax_mapping(dev);
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev->parent);
|
|
|
|
|
|
|
|
dev_dbg(dev, "%s\n", __func__);
|
|
|
|
|
|
|
|
dev_dax->ranges[mapping->range_id].mapping = NULL;
|
|
|
|
mapping->range_id = -1;
|
|
|
|
|
2023-06-03 14:13:59 +08:00
|
|
|
device_unregister(dev);
|
2020-10-14 07:50:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct dev_dax_range *get_dax_range(struct device *dev)
|
|
|
|
{
|
|
|
|
struct dax_mapping *mapping = to_dax_mapping(dev);
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev->parent);
|
2024-01-25 04:03:46 +08:00
|
|
|
int rc;
|
2020-10-14 07:50:45 +08:00
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_write_killable(&dax_region_rwsem);
|
|
|
|
if (rc)
|
|
|
|
return NULL;
|
2020-10-14 07:50:45 +08:00
|
|
|
if (mapping->range_id < 0) {
|
2024-01-25 04:03:46 +08:00
|
|
|
up_write(&dax_region_rwsem);
|
2020-10-14 07:50:45 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return &dev_dax->ranges[mapping->range_id];
|
|
|
|
}
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
static void put_dax_range(void)
|
2020-10-14 07:50:45 +08:00
|
|
|
{
|
2024-01-25 04:03:46 +08:00
|
|
|
up_write(&dax_region_rwsem);
|
2020-10-14 07:50:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t start_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dev_dax_range *dax_range;
|
|
|
|
ssize_t rc;
|
|
|
|
|
|
|
|
dax_range = get_dax_range(dev);
|
|
|
|
if (!dax_range)
|
|
|
|
return -ENXIO;
|
2024-01-25 04:03:47 +08:00
|
|
|
rc = sysfs_emit(buf, "%#llx\n", dax_range->range.start);
|
2024-01-25 04:03:46 +08:00
|
|
|
put_dax_range();
|
2020-10-14 07:50:45 +08:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR(start, 0400, start_show, NULL);
|
|
|
|
|
|
|
|
static ssize_t end_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dev_dax_range *dax_range;
|
|
|
|
ssize_t rc;
|
|
|
|
|
|
|
|
dax_range = get_dax_range(dev);
|
|
|
|
if (!dax_range)
|
|
|
|
return -ENXIO;
|
2024-01-25 04:03:47 +08:00
|
|
|
rc = sysfs_emit(buf, "%#llx\n", dax_range->range.end);
|
2024-01-25 04:03:46 +08:00
|
|
|
put_dax_range();
|
2020-10-14 07:50:45 +08:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR(end, 0400, end_show, NULL);
|
|
|
|
|
|
|
|
static ssize_t pgoff_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dev_dax_range *dax_range;
|
|
|
|
ssize_t rc;
|
|
|
|
|
|
|
|
dax_range = get_dax_range(dev);
|
|
|
|
if (!dax_range)
|
|
|
|
return -ENXIO;
|
2024-01-25 04:03:47 +08:00
|
|
|
rc = sysfs_emit(buf, "%#lx\n", dax_range->pgoff);
|
2024-01-25 04:03:46 +08:00
|
|
|
put_dax_range();
|
2020-10-14 07:50:45 +08:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL);
|
|
|
|
|
|
|
|
static struct attribute *dax_mapping_attributes[] = {
|
|
|
|
&dev_attr_start.attr,
|
|
|
|
&dev_attr_end.attr,
|
|
|
|
&dev_attr_page_offset.attr,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct attribute_group dax_mapping_attribute_group = {
|
|
|
|
.attrs = dax_mapping_attributes,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct attribute_group *dax_mapping_attribute_groups[] = {
|
|
|
|
&dax_mapping_attribute_group,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
2024-02-19 20:47:28 +08:00
|
|
|
static const struct device_type dax_mapping_type = {
|
2020-10-14 07:50:45 +08:00
|
|
|
.release = dax_mapping_release,
|
|
|
|
.groups = dax_mapping_attribute_groups,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
|
|
|
struct dax_mapping *mapping;
|
|
|
|
struct device *dev;
|
|
|
|
int rc;
|
|
|
|
|
2024-05-01 01:44:23 +08:00
|
|
|
lockdep_assert_held_write(&dax_region_rwsem);
|
2020-10-14 07:50:45 +08:00
|
|
|
|
|
|
|
if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
|
|
|
|
"region disabled\n"))
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
|
|
|
|
if (!mapping)
|
|
|
|
return -ENOMEM;
|
|
|
|
mapping->range_id = range_id;
|
|
|
|
mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL);
|
|
|
|
if (mapping->id < 0) {
|
|
|
|
kfree(mapping);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
dev_dax->ranges[range_id].mapping = mapping;
|
|
|
|
dev = &mapping->dev;
|
|
|
|
device_initialize(dev);
|
|
|
|
dev->parent = &dev_dax->dev;
|
2023-06-03 14:13:54 +08:00
|
|
|
get_device(dev->parent);
|
2020-10-14 07:50:45 +08:00
|
|
|
dev->type = &dax_mapping_type;
|
|
|
|
dev_set_name(dev, "mapping%d", mapping->id);
|
|
|
|
rc = device_add(dev);
|
|
|
|
if (rc) {
|
|
|
|
put_device(dev);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping,
|
|
|
|
dev);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-10-14 07:50:24 +08:00
|
|
|
static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
|
|
|
|
resource_size_t size)
|
2020-10-14 07:50:03 +08:00
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
|
|
|
struct resource *res = &dax_region->res;
|
|
|
|
struct device *dev = &dev_dax->dev;
|
2020-10-14 07:50:39 +08:00
|
|
|
struct dev_dax_range *ranges;
|
|
|
|
unsigned long pgoff = 0;
|
2020-10-14 07:50:03 +08:00
|
|
|
struct resource *alloc;
|
2020-10-14 07:50:45 +08:00
|
|
|
int i, rc;
|
2020-10-14 07:50:03 +08:00
|
|
|
|
2024-05-01 01:44:23 +08:00
|
|
|
lockdep_assert_held_write(&dax_region_rwsem);
|
2020-10-14 07:50:03 +08:00
|
|
|
|
2020-10-14 07:50:13 +08:00
|
|
|
/* handle the seed alloc special case */
|
|
|
|
if (!size) {
|
2020-10-14 07:50:39 +08:00
|
|
|
if (dev_WARN_ONCE(dev, dev_dax->nr_range,
|
|
|
|
"0-size allocation must be first\n"))
|
|
|
|
return -EBUSY;
|
|
|
|
/* nr_range == 0 is elsewhere special cased as 0-size device */
|
2020-10-14 07:50:13 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-12-19 16:18:40 +08:00
|
|
|
alloc = __request_region(res, start, size, dev_name(dev), 0);
|
|
|
|
if (!alloc)
|
2020-10-14 07:50:39 +08:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2020-12-19 16:18:40 +08:00
|
|
|
ranges = krealloc(dev_dax->ranges, sizeof(*ranges)
|
|
|
|
* (dev_dax->nr_range + 1), GFP_KERNEL);
|
|
|
|
if (!ranges) {
|
|
|
|
__release_region(res, alloc->start, resource_size(alloc));
|
2020-10-14 07:50:03 +08:00
|
|
|
return -ENOMEM;
|
2020-10-14 07:50:39 +08:00
|
|
|
}
|
2020-10-14 07:50:03 +08:00
|
|
|
|
2020-10-14 07:50:39 +08:00
|
|
|
for (i = 0; i < dev_dax->nr_range; i++)
|
|
|
|
pgoff += PHYS_PFN(range_len(&ranges[i].range));
|
|
|
|
dev_dax->ranges = ranges;
|
|
|
|
ranges[dev_dax->nr_range++] = (struct dev_dax_range) {
|
|
|
|
.pgoff = pgoff,
|
|
|
|
.range = {
|
|
|
|
.start = alloc->start,
|
|
|
|
.end = alloc->end,
|
|
|
|
},
|
2020-10-14 07:50:03 +08:00
|
|
|
};
|
|
|
|
|
2020-10-14 07:50:39 +08:00
|
|
|
dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
|
|
|
|
&alloc->start, &alloc->end);
|
2020-10-14 07:50:45 +08:00
|
|
|
/*
|
|
|
|
* A dev_dax instance must be registered before mapping device
|
|
|
|
* children can be added. Defer to devm_create_dev_dax() to add
|
|
|
|
* the initial mapping device.
|
|
|
|
*/
|
|
|
|
if (!device_is_registered(&dev_dax->dev))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1);
|
2020-12-19 10:41:41 +08:00
|
|
|
if (rc)
|
|
|
|
trim_dev_dax_range(dev_dax);
|
2020-10-14 07:50:39 +08:00
|
|
|
|
2020-12-19 10:41:41 +08:00
|
|
|
return rc;
|
2020-10-14 07:50:03 +08:00
|
|
|
}
|
|
|
|
|
2020-10-14 07:50:24 +08:00
|
|
|
static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
|
|
|
|
{
|
2020-10-14 07:50:39 +08:00
|
|
|
int last_range = dev_dax->nr_range - 1;
|
|
|
|
struct dev_dax_range *dax_range = &dev_dax->ranges[last_range];
|
|
|
|
bool is_shrink = resource_size(res) > size;
|
|
|
|
struct range *range = &dax_range->range;
|
|
|
|
struct device *dev = &dev_dax->dev;
|
|
|
|
int rc;
|
2020-10-14 07:50:24 +08:00
|
|
|
|
2024-05-01 01:44:23 +08:00
|
|
|
lockdep_assert_held_write(&dax_region_rwsem);
|
2020-10-14 07:50:24 +08:00
|
|
|
|
2020-10-14 07:50:39 +08:00
|
|
|
if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
rc = adjust_resource(res, range->start, size);
|
2020-10-14 07:50:24 +08:00
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
2020-10-14 07:50:39 +08:00
|
|
|
*range = (struct range) {
|
2020-10-14 07:50:24 +08:00
|
|
|
.start = range->start,
|
|
|
|
.end = range->start + size - 1,
|
|
|
|
};
|
|
|
|
|
2020-10-14 07:50:39 +08:00
|
|
|
dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend",
|
|
|
|
last_range, (unsigned long long) range->start,
|
|
|
|
(unsigned long long) range->end);
|
|
|
|
|
2020-10-14 07:50:24 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
static ssize_t size_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
2020-10-14 07:50:39 +08:00
|
|
|
unsigned long long size;
|
2024-01-25 04:03:46 +08:00
|
|
|
int rc;
|
2020-10-14 07:50:39 +08:00
|
|
|
|
2024-05-01 01:44:26 +08:00
|
|
|
rc = down_read_interruptible(&dax_dev_rwsem);
|
2024-01-25 04:03:46 +08:00
|
|
|
if (rc)
|
|
|
|
return rc;
|
2020-10-14 07:50:39 +08:00
|
|
|
size = dev_dax_size(dev_dax);
|
2024-05-01 01:44:26 +08:00
|
|
|
up_read(&dax_dev_rwsem);
|
2017-07-13 08:58:21 +08:00
|
|
|
|
2024-01-25 04:03:47 +08:00
|
|
|
return sysfs_emit(buf, "%llu\n", size);
|
2017-07-13 08:58:21 +08:00
|
|
|
}
|
2020-10-14 07:50:24 +08:00
|
|
|
|
2020-10-14 07:50:55 +08:00
|
|
|
static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size)
|
2020-10-14 07:50:24 +08:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The minimum mapping granularity for a device instance is a
|
|
|
|
* single subsection, unless the arch says otherwise.
|
|
|
|
*/
|
2020-10-14 07:50:55 +08:00
|
|
|
return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align()));
|
2020-10-14 07:50:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size)
|
|
|
|
{
|
2020-10-14 07:50:39 +08:00
|
|
|
resource_size_t to_shrink = dev_dax_size(dev_dax) - size;
|
2020-10-14 07:50:24 +08:00
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
|
|
|
struct device *dev = &dev_dax->dev;
|
2020-10-14 07:50:39 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = dev_dax->nr_range - 1; i >= 0; i--) {
|
|
|
|
struct range *range = &dev_dax->ranges[i].range;
|
2020-10-14 07:50:45 +08:00
|
|
|
struct dax_mapping *mapping = dev_dax->ranges[i].mapping;
|
2020-10-14 07:50:39 +08:00
|
|
|
struct resource *adjust = NULL, *res;
|
|
|
|
resource_size_t shrink;
|
|
|
|
|
|
|
|
shrink = min_t(u64, to_shrink, range_len(range));
|
|
|
|
if (shrink >= range_len(range)) {
|
2020-10-14 07:50:45 +08:00
|
|
|
devm_release_action(dax_region->dev,
|
|
|
|
unregister_dax_mapping, &mapping->dev);
|
2020-12-19 10:41:41 +08:00
|
|
|
trim_dev_dax_range(dev_dax);
|
2020-10-14 07:50:39 +08:00
|
|
|
to_shrink -= shrink;
|
|
|
|
if (!to_shrink)
|
|
|
|
break;
|
|
|
|
continue;
|
2020-10-14 07:50:24 +08:00
|
|
|
}
|
|
|
|
|
2020-10-14 07:50:39 +08:00
|
|
|
for_each_dax_region_resource(dax_region, res)
|
|
|
|
if (strcmp(res->name, dev_name(dev)) == 0
|
|
|
|
&& res->start == range->start) {
|
|
|
|
adjust = res;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1,
|
|
|
|
"failed to find matching resource\n"))
|
|
|
|
return -ENXIO;
|
|
|
|
return adjust_dev_dax_range(dev_dax, adjust, range_len(range)
|
|
|
|
- shrink);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Only allow adjustments that preserve the relative pgoff of existing
|
|
|
|
* allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff.
|
|
|
|
*/
|
|
|
|
static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res)
|
|
|
|
{
|
|
|
|
struct dev_dax_range *last;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (dev_dax->nr_range == 0)
|
|
|
|
return false;
|
|
|
|
if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0)
|
|
|
|
return false;
|
|
|
|
last = &dev_dax->ranges[dev_dax->nr_range - 1];
|
|
|
|
if (last->range.start != res->start || last->range.end != res->end)
|
|
|
|
return false;
|
|
|
|
for (i = 0; i < dev_dax->nr_range - 1; i++) {
|
|
|
|
struct dev_dax_range *dax_range = &dev_dax->ranges[i];
|
|
|
|
|
|
|
|
if (dax_range->pgoff > last->pgoff)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
2020-10-14 07:50:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t dev_dax_resize(struct dax_region *dax_region,
|
|
|
|
struct dev_dax *dev_dax, resource_size_t size)
|
|
|
|
{
|
|
|
|
resource_size_t avail = dax_region_avail_size(dax_region), to_alloc;
|
2020-10-14 07:50:39 +08:00
|
|
|
resource_size_t dev_size = dev_dax_size(dev_dax);
|
2020-10-14 07:50:24 +08:00
|
|
|
struct resource *region_res = &dax_region->res;
|
|
|
|
struct device *dev = &dev_dax->dev;
|
|
|
|
struct resource *res, *first;
|
2020-10-14 07:50:39 +08:00
|
|
|
resource_size_t alloc = 0;
|
|
|
|
int rc;
|
2020-10-14 07:50:24 +08:00
|
|
|
|
|
|
|
if (dev->driver)
|
|
|
|
return -EBUSY;
|
|
|
|
if (size == dev_size)
|
|
|
|
return 0;
|
|
|
|
if (size > dev_size && size - dev_size > avail)
|
|
|
|
return -ENOSPC;
|
|
|
|
if (size < dev_size)
|
|
|
|
return dev_dax_shrink(dev_dax, size);
|
|
|
|
|
|
|
|
to_alloc = size - dev_size;
|
2020-10-14 07:50:55 +08:00
|
|
|
if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc),
|
2020-10-14 07:50:24 +08:00
|
|
|
"resize of %pa misaligned\n", &to_alloc))
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Expand the device into the unused portion of the region. This
|
|
|
|
* may involve adjusting the end of an existing resource, or
|
|
|
|
* allocating a new resource.
|
|
|
|
*/
|
2020-10-14 07:50:39 +08:00
|
|
|
retry:
|
2020-10-14 07:50:24 +08:00
|
|
|
first = region_res->child;
|
|
|
|
if (!first)
|
|
|
|
return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc);
|
2020-10-14 07:50:39 +08:00
|
|
|
|
|
|
|
rc = -ENOSPC;
|
|
|
|
for (res = first; res; res = res->sibling) {
|
2020-10-14 07:50:24 +08:00
|
|
|
struct resource *next = res->sibling;
|
|
|
|
|
|
|
|
/* space at the beginning of the region */
|
2020-10-14 07:50:39 +08:00
|
|
|
if (res == first && res->start > dax_region->res.start) {
|
|
|
|
alloc = min(res->start - dax_region->res.start, to_alloc);
|
|
|
|
rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc);
|
|
|
|
break;
|
|
|
|
}
|
2020-10-14 07:50:24 +08:00
|
|
|
|
2020-10-14 07:50:39 +08:00
|
|
|
alloc = 0;
|
2020-10-14 07:50:24 +08:00
|
|
|
/* space between allocations */
|
|
|
|
if (next && next->start > res->end + 1)
|
2020-10-14 07:50:39 +08:00
|
|
|
alloc = min(next->start - (res->end + 1), to_alloc);
|
2020-10-14 07:50:24 +08:00
|
|
|
|
|
|
|
/* space at the end of the region */
|
2020-10-14 07:50:39 +08:00
|
|
|
if (!alloc && !next && res->end < region_res->end)
|
|
|
|
alloc = min(region_res->end - res->end, to_alloc);
|
2020-10-14 07:50:24 +08:00
|
|
|
|
2020-10-14 07:50:39 +08:00
|
|
|
if (!alloc)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (adjust_ok(dev_dax, res)) {
|
|
|
|
rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc);
|
|
|
|
break;
|
2020-10-14 07:50:24 +08:00
|
|
|
}
|
2020-10-14 07:50:39 +08:00
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
to_alloc -= alloc;
|
|
|
|
if (to_alloc)
|
|
|
|
goto retry;
|
|
|
|
return 0;
|
2020-10-14 07:50:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t size_store(struct device *dev, struct device_attribute *attr,
|
|
|
|
const char *buf, size_t len)
|
|
|
|
{
|
|
|
|
ssize_t rc;
|
|
|
|
unsigned long long val;
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
|
|
|
|
|
|
|
rc = kstrtoull(buf, 0, &val);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
2020-10-14 07:50:55 +08:00
|
|
|
if (!alloc_is_aligned(dev_dax, val)) {
|
2020-10-14 07:50:24 +08:00
|
|
|
dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_write_killable(&dax_region_rwsem);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2020-10-14 07:50:24 +08:00
|
|
|
if (!dax_region->dev->driver) {
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = -ENXIO;
|
|
|
|
goto err_region;
|
2020-10-14 07:50:24 +08:00
|
|
|
}
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_write_killable(&dax_dev_rwsem);
|
|
|
|
if (rc)
|
|
|
|
goto err_dev;
|
|
|
|
|
2020-10-14 07:50:24 +08:00
|
|
|
rc = dev_dax_resize(dax_region, dev_dax, val);
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
err_dev:
|
|
|
|
up_write(&dax_dev_rwsem);
|
|
|
|
err_region:
|
|
|
|
up_write(&dax_region_rwsem);
|
|
|
|
|
|
|
|
if (rc == 0)
|
|
|
|
return len;
|
|
|
|
return rc;
|
2020-10-14 07:50:24 +08:00
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RW(size);
|
2017-07-13 08:58:21 +08:00
|
|
|
|
2020-10-14 07:51:06 +08:00
|
|
|
static ssize_t range_parse(const char *opt, size_t len, struct range *range)
|
|
|
|
{
|
|
|
|
unsigned long long addr = 0;
|
|
|
|
char *start, *end, *str;
|
2021-01-26 10:13:31 +08:00
|
|
|
ssize_t rc = -EINVAL;
|
2020-10-14 07:51:06 +08:00
|
|
|
|
|
|
|
str = kstrdup(opt, GFP_KERNEL);
|
|
|
|
if (!str)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
end = str;
|
|
|
|
start = strsep(&end, "-");
|
|
|
|
if (!start || !end)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
rc = kstrtoull(start, 16, &addr);
|
|
|
|
if (rc)
|
|
|
|
goto err;
|
|
|
|
range->start = addr;
|
|
|
|
|
|
|
|
rc = kstrtoull(end, 16, &addr);
|
|
|
|
if (rc)
|
|
|
|
goto err;
|
|
|
|
range->end = addr;
|
|
|
|
|
|
|
|
err:
|
|
|
|
kfree(str);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
|
|
|
|
const char *buf, size_t len)
|
|
|
|
{
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
|
|
|
size_t to_alloc;
|
|
|
|
struct range r;
|
|
|
|
ssize_t rc;
|
|
|
|
|
|
|
|
rc = range_parse(buf, len, &r);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_write_killable(&dax_region_rwsem);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2020-10-14 07:51:06 +08:00
|
|
|
if (!dax_region->dev->driver) {
|
2024-01-25 04:03:46 +08:00
|
|
|
up_write(&dax_region_rwsem);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
rc = down_write_killable(&dax_dev_rwsem);
|
|
|
|
if (rc) {
|
|
|
|
up_write(&dax_region_rwsem);
|
2020-10-14 07:51:06 +08:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
to_alloc = range_len(&r);
|
|
|
|
if (alloc_is_aligned(dev_dax, to_alloc))
|
|
|
|
rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
|
2024-01-25 04:03:46 +08:00
|
|
|
up_write(&dax_dev_rwsem);
|
|
|
|
up_write(&dax_region_rwsem);
|
2020-10-14 07:51:06 +08:00
|
|
|
|
|
|
|
return rc == 0 ? len : rc;
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR_WO(mapping);
|
|
|
|
|
2020-10-14 07:50:55 +08:00
|
|
|
static ssize_t align_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
|
|
|
|
2024-01-25 04:03:47 +08:00
|
|
|
return sysfs_emit(buf, "%d\n", dev_dax->align);
|
2020-10-14 07:50:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax)
|
|
|
|
{
|
|
|
|
struct device *dev = &dev_dax->dev;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < dev_dax->nr_range; i++) {
|
|
|
|
size_t len = range_len(&dev_dax->ranges[i].range);
|
|
|
|
|
|
|
|
if (!alloc_is_aligned(dev_dax, len)) {
|
|
|
|
dev_dbg(dev, "%s: align %u invalid for range %d\n",
|
|
|
|
__func__, dev_dax->align, i);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t align_store(struct device *dev, struct device_attribute *attr,
|
|
|
|
const char *buf, size_t len)
|
|
|
|
{
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
|
|
|
unsigned long val, align_save;
|
|
|
|
ssize_t rc;
|
|
|
|
|
|
|
|
rc = kstrtoul(buf, 0, &val);
|
|
|
|
if (rc)
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
if (!dax_align_valid(val))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_write_killable(&dax_region_rwsem);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2020-10-14 07:50:55 +08:00
|
|
|
if (!dax_region->dev->driver) {
|
2024-01-25 04:03:46 +08:00
|
|
|
up_write(&dax_region_rwsem);
|
2020-10-14 07:50:55 +08:00
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
rc = down_write_killable(&dax_dev_rwsem);
|
|
|
|
if (rc) {
|
|
|
|
up_write(&dax_region_rwsem);
|
|
|
|
return rc;
|
|
|
|
}
|
2020-10-14 07:50:55 +08:00
|
|
|
if (dev->driver) {
|
|
|
|
rc = -EBUSY;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
align_save = dev_dax->align;
|
|
|
|
dev_dax->align = val;
|
|
|
|
rc = dev_dax_validate_align(dev_dax);
|
|
|
|
if (rc)
|
|
|
|
dev_dax->align = align_save;
|
|
|
|
out_unlock:
|
2024-01-25 04:03:46 +08:00
|
|
|
up_write(&dax_dev_rwsem);
|
|
|
|
up_write(&dax_region_rwsem);
|
2020-10-14 07:50:55 +08:00
|
|
|
return rc == 0 ? len : rc;
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RW(align);
|
|
|
|
|
2019-02-21 03:39:36 +08:00
|
|
|
static int dev_dax_target_node(struct dev_dax *dev_dax)
|
|
|
|
{
|
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
|
|
|
|
|
|
|
return dax_region->target_node;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t target_node_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
|
|
|
|
2024-01-25 04:03:47 +08:00
|
|
|
return sysfs_emit(buf, "%d\n", dev_dax_target_node(dev_dax));
|
2019-02-21 03:39:36 +08:00
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RO(target_node);
|
|
|
|
|
2019-06-21 08:40:38 +08:00
|
|
|
static ssize_t resource_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
2020-10-14 07:50:39 +08:00
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
|
|
|
unsigned long long start;
|
|
|
|
|
|
|
|
if (dev_dax->nr_range < 1)
|
|
|
|
start = dax_region->res.start;
|
|
|
|
else
|
|
|
|
start = dev_dax->ranges[0].range.start;
|
2019-06-21 08:40:38 +08:00
|
|
|
|
2024-01-25 04:03:47 +08:00
|
|
|
return sysfs_emit(buf, "%#llx\n", start);
|
2019-06-21 08:40:38 +08:00
|
|
|
}
|
2019-11-13 09:12:38 +08:00
|
|
|
static DEVICE_ATTR(resource, 0400, resource_show, NULL);
|
2019-06-21 08:40:38 +08:00
|
|
|
|
2019-02-23 07:58:54 +08:00
|
|
|
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We only ever expect to handle device-dax instances, i.e. the
|
|
|
|
* @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
|
|
|
|
*/
|
2024-01-25 04:03:47 +08:00
|
|
|
return sysfs_emit(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
|
2019-02-23 07:58:54 +08:00
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RO(modalias);
|
|
|
|
|
2019-11-13 09:13:31 +08:00
|
|
|
static ssize_t numa_node_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
2024-01-25 04:03:47 +08:00
|
|
|
return sysfs_emit(buf, "%d\n", dev_to_node(dev));
|
2019-11-13 09:13:31 +08:00
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RO(numa_node);
|
|
|
|
|
2024-01-25 04:03:50 +08:00
|
|
|
static ssize_t memmap_on_memory_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
|
|
|
|
|
|
|
return sysfs_emit(buf, "%d\n", dev_dax->memmap_on_memory);
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t memmap_on_memory_store(struct device *dev,
|
|
|
|
struct device_attribute *attr,
|
|
|
|
const char *buf, size_t len)
|
|
|
|
{
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
|
|
|
bool val;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
rc = kstrtobool(buf, &val);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
if (val == true && !mhp_supports_memmap_on_memory()) {
|
|
|
|
dev_dbg(dev, "memmap_on_memory is not available\n");
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = down_write_killable(&dax_dev_rwsem);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
if (dev_dax->memmap_on_memory != val && dev->driver &&
|
|
|
|
to_dax_drv(dev->driver)->type == DAXDRV_KMEM_TYPE) {
|
|
|
|
up_write(&dax_dev_rwsem);
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
|
|
|
dev_dax->memmap_on_memory = val;
|
|
|
|
up_write(&dax_dev_rwsem);
|
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RW(memmap_on_memory);
|
|
|
|
|
2019-02-21 03:39:36 +08:00
|
|
|
static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
|
|
|
|
{
|
|
|
|
struct device *dev = container_of(kobj, struct device, kobj);
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
2020-10-14 07:50:24 +08:00
|
|
|
struct dax_region *dax_region = dev_dax->region;
|
2019-02-21 03:39:36 +08:00
|
|
|
|
|
|
|
if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
|
|
|
|
return 0;
|
2019-11-13 09:13:31 +08:00
|
|
|
if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
|
|
|
|
return 0;
|
2020-10-14 07:51:06 +08:00
|
|
|
if (a == &dev_attr_mapping.attr && is_static(dax_region))
|
|
|
|
return 0;
|
2020-10-14 07:50:55 +08:00
|
|
|
if ((a == &dev_attr_align.attr ||
|
|
|
|
a == &dev_attr_size.attr) && is_static(dax_region))
|
2020-10-14 07:50:24 +08:00
|
|
|
return 0444;
|
2019-02-21 03:39:36 +08:00
|
|
|
return a->mode;
|
|
|
|
}
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
static struct attribute *dev_dax_attributes[] = {
|
2019-02-23 07:58:54 +08:00
|
|
|
&dev_attr_modalias.attr,
|
2017-07-13 08:58:21 +08:00
|
|
|
&dev_attr_size.attr,
|
2020-10-14 07:51:06 +08:00
|
|
|
&dev_attr_mapping.attr,
|
2019-02-21 03:39:36 +08:00
|
|
|
&dev_attr_target_node.attr,
|
2020-10-14 07:50:55 +08:00
|
|
|
&dev_attr_align.attr,
|
2019-06-21 08:40:38 +08:00
|
|
|
&dev_attr_resource.attr,
|
2019-11-13 09:13:31 +08:00
|
|
|
&dev_attr_numa_node.attr,
|
2024-01-25 04:03:50 +08:00
|
|
|
&dev_attr_memmap_on_memory.attr,
|
2017-07-13 08:58:21 +08:00
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct attribute_group dev_dax_attribute_group = {
|
|
|
|
.attrs = dev_dax_attributes,
|
2019-02-21 03:39:36 +08:00
|
|
|
.is_visible = dev_dax_visible,
|
2017-07-13 08:58:21 +08:00
|
|
|
};
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
static const struct attribute_group *dax_attribute_groups[] = {
|
2017-07-13 08:58:21 +08:00
|
|
|
&dev_dax_attribute_group,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
static void dev_dax_release(struct device *dev)
|
2017-07-13 08:58:21 +08:00
|
|
|
{
|
|
|
|
struct dev_dax *dev_dax = to_dev_dax(dev);
|
|
|
|
struct dax_device *dax_dev = dev_dax->dax_dev;
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
put_dax(dax_dev);
|
2020-10-14 07:50:13 +08:00
|
|
|
free_dev_dax_id(dev_dax);
|
2020-10-14 07:49:43 +08:00
|
|
|
kfree(dev_dax->pgmap);
|
2017-07-13 08:58:21 +08:00
|
|
|
kfree(dev_dax);
|
|
|
|
}
|
|
|
|
|
2019-11-13 09:12:23 +08:00
|
|
|
static const struct device_type dev_dax_type = {
|
|
|
|
.release = dev_dax_release,
|
|
|
|
.groups = dax_attribute_groups,
|
|
|
|
};
|
|
|
|
|
2024-01-25 04:03:46 +08:00
|
|
|
static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data)
|
2017-07-13 08:58:21 +08:00
|
|
|
{
|
2020-10-14 07:49:38 +08:00
|
|
|
struct dax_region *dax_region = data->dax_region;
|
2017-07-13 08:58:21 +08:00
|
|
|
struct device *parent = dax_region->dev;
|
|
|
|
struct dax_device *dax_dev;
|
|
|
|
struct dev_dax *dev_dax;
|
|
|
|
struct inode *inode;
|
|
|
|
struct device *dev;
|
2020-10-14 07:50:03 +08:00
|
|
|
int rc;
|
2017-07-13 08:58:21 +08:00
|
|
|
|
|
|
|
dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
|
|
|
|
if (!dev_dax)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2023-06-03 14:14:05 +08:00
|
|
|
dev_dax->region = dax_region;
|
2020-10-14 07:50:13 +08:00
|
|
|
if (is_static(dax_region)) {
|
|
|
|
if (dev_WARN_ONCE(parent, data->id < 0,
|
|
|
|
"dynamic id specified to static region\n")) {
|
|
|
|
rc = -EINVAL;
|
|
|
|
goto err_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
dev_dax->id = data->id;
|
|
|
|
} else {
|
|
|
|
if (dev_WARN_ONCE(parent, data->id >= 0,
|
|
|
|
"static id specified to dynamic region\n")) {
|
|
|
|
rc = -EINVAL;
|
|
|
|
goto err_id;
|
|
|
|
}
|
|
|
|
|
2023-06-03 14:14:05 +08:00
|
|
|
rc = alloc_dev_dax_id(dev_dax);
|
2020-10-14 07:50:13 +08:00
|
|
|
if (rc < 0)
|
|
|
|
goto err_id;
|
|
|
|
}
|
|
|
|
|
2020-10-14 07:50:03 +08:00
|
|
|
dev = &dev_dax->dev;
|
|
|
|
device_initialize(dev);
|
2020-10-14 07:50:13 +08:00
|
|
|
dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
|
2020-10-14 07:50:03 +08:00
|
|
|
|
2020-10-14 07:50:24 +08:00
|
|
|
rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size);
|
2020-10-14 07:50:03 +08:00
|
|
|
if (rc)
|
|
|
|
goto err_range;
|
|
|
|
|
2020-10-14 07:49:43 +08:00
|
|
|
if (data->pgmap) {
|
2020-10-14 07:50:03 +08:00
|
|
|
dev_WARN_ONCE(parent, !is_static(dax_region),
|
|
|
|
"custom dev_pagemap requires a static dax_region\n");
|
|
|
|
|
2020-10-14 07:49:43 +08:00
|
|
|
dev_dax->pgmap = kmemdup(data->pgmap,
|
|
|
|
sizeof(struct dev_pagemap), GFP_KERNEL);
|
2020-10-14 07:50:03 +08:00
|
|
|
if (!dev_dax->pgmap) {
|
|
|
|
rc = -ENOMEM;
|
2020-10-14 07:49:43 +08:00
|
|
|
goto err_pgmap;
|
2020-10-14 07:50:03 +08:00
|
|
|
}
|
2020-10-14 07:49:43 +08:00
|
|
|
}
|
2018-10-30 06:52:42 +08:00
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
/*
|
2021-11-29 18:21:38 +08:00
|
|
|
* No dax_operations since there is no access to this device outside of
|
|
|
|
* mmap of the resulting character device.
|
2017-07-13 08:58:21 +08:00
|
|
|
*/
|
2021-12-15 16:45:07 +08:00
|
|
|
dax_dev = alloc_dax(dev_dax, NULL);
|
2020-04-02 00:11:25 +08:00
|
|
|
if (IS_ERR(dax_dev)) {
|
|
|
|
rc = PTR_ERR(dax_dev);
|
2020-10-14 07:49:43 +08:00
|
|
|
goto err_alloc_dax;
|
2020-04-02 00:11:25 +08:00
|
|
|
}
|
2021-12-15 16:45:07 +08:00
|
|
|
set_dax_synchronous(dax_dev);
|
2021-12-15 16:45:08 +08:00
|
|
|
set_dax_nocache(dax_dev);
|
|
|
|
set_dax_nomc(dax_dev);
|
2017-07-13 08:58:21 +08:00
|
|
|
|
|
|
|
/* a device_dax instance is dead while the driver is not attached */
|
|
|
|
kill_dax(dax_dev);
|
|
|
|
|
|
|
|
dev_dax->dax_dev = dax_dev;
|
2018-11-10 04:43:07 +08:00
|
|
|
dev_dax->target_node = dax_region->target_node;
|
2020-10-14 07:50:50 +08:00
|
|
|
dev_dax->align = dax_region->align;
|
2020-10-14 07:50:45 +08:00
|
|
|
ida_init(&dev_dax->ida);
|
2017-07-13 08:58:21 +08:00
|
|
|
|
2023-11-07 15:22:43 +08:00
|
|
|
dev_dax->memmap_on_memory = data->memmap_on_memory;
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
inode = dax_inode(dax_dev);
|
|
|
|
dev->devt = inode->i_rdev;
|
2021-11-16 05:20:57 +08:00
|
|
|
dev->bus = &dax_bus_type;
|
2017-07-13 08:58:21 +08:00
|
|
|
dev->parent = parent;
|
2019-11-13 09:12:23 +08:00
|
|
|
dev->type = &dev_dax_type;
|
2017-07-13 08:58:21 +08:00
|
|
|
|
|
|
|
rc = device_add(dev);
|
|
|
|
if (rc) {
|
|
|
|
kill_dev_dax(dev_dax);
|
|
|
|
put_device(dev);
|
|
|
|
return ERR_PTR(rc);
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
|
|
|
|
if (rc)
|
|
|
|
return ERR_PTR(rc);
|
|
|
|
|
2020-10-14 07:50:45 +08:00
|
|
|
/* register mapping device for the initial allocation range */
|
|
|
|
if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) {
|
|
|
|
rc = devm_register_dax_mapping(dev_dax, 0);
|
|
|
|
if (rc)
|
|
|
|
return ERR_PTR(rc);
|
|
|
|
}
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
return dev_dax;
|
2020-10-14 07:50:03 +08:00
|
|
|
|
2020-10-14 07:49:43 +08:00
|
|
|
err_alloc_dax:
|
|
|
|
kfree(dev_dax->pgmap);
|
|
|
|
err_pgmap:
|
2020-10-14 07:50:39 +08:00
|
|
|
free_dev_dax_ranges(dev_dax);
|
2020-10-14 07:50:03 +08:00
|
|
|
err_range:
|
2020-10-14 07:50:13 +08:00
|
|
|
free_dev_dax_id(dev_dax);
|
|
|
|
err_id:
|
2017-07-13 08:58:21 +08:00
|
|
|
kfree(dev_dax);
|
|
|
|
|
|
|
|
return ERR_PTR(rc);
|
|
|
|
}
|
2024-01-25 04:03:46 +08:00
|
|
|
|
|
|
|
struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
|
|
|
|
{
|
|
|
|
struct dev_dax *dev_dax;
|
|
|
|
|
2024-05-01 01:44:25 +08:00
|
|
|
down_write(&dax_region_rwsem);
|
2024-01-25 04:03:46 +08:00
|
|
|
dev_dax = __devm_create_dev_dax(data);
|
|
|
|
up_write(&dax_region_rwsem);
|
|
|
|
|
|
|
|
return dev_dax;
|
|
|
|
}
|
2020-10-14 07:49:38 +08:00
|
|
|
EXPORT_SYMBOL_GPL(devm_create_dev_dax);
|
2017-07-13 08:58:21 +08:00
|
|
|
|
2018-11-08 07:31:23 +08:00
|
|
|
int __dax_driver_register(struct dax_device_driver *dax_drv,
|
2017-07-13 08:58:21 +08:00
|
|
|
struct module *module, const char *mod_name)
|
|
|
|
{
|
2018-11-08 07:31:23 +08:00
|
|
|
struct device_driver *drv = &dax_drv->drv;
|
|
|
|
|
2021-02-06 06:28:38 +08:00
|
|
|
/*
|
|
|
|
* dax_bus_probe() calls dax_drv->probe() unconditionally.
|
|
|
|
* So better be safe than sorry and ensure it is provided.
|
|
|
|
*/
|
|
|
|
if (!dax_drv->probe)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2018-11-08 07:31:23 +08:00
|
|
|
INIT_LIST_HEAD(&dax_drv->ids);
|
2017-07-13 08:58:21 +08:00
|
|
|
drv->owner = module;
|
|
|
|
drv->name = mod_name;
|
|
|
|
drv->mod_name = mod_name;
|
|
|
|
drv->bus = &dax_bus_type;
|
2018-11-08 07:31:23 +08:00
|
|
|
|
dax: Assign RAM regions to memory-hotplug by default
The default mode for device-dax instances is backwards for RAM-regions
as evidenced by the fact that it tends to catch end users by surprise.
"Where is my memory?". Recall that platforms are increasingly shipping
with performance-differentiated memory pools beyond typical DRAM and
NUMA effects. This includes HBM (high-bandwidth-memory) and CXL (dynamic
interleave, varied media types, and future fabric attached
possibilities).
For this reason the EFI_MEMORY_SP (EFI Special Purpose Memory => Linux
'Soft Reserved') attribute is expected to be applied to all memory-pools
that are not the general purpose pool. This designation gives an
Operating System a chance to defer usage of a memory pool until later in
the boot process where its performance properties can be interrogated
and administrator policy can be applied.
'Soft Reserved' memory can be anything from too limited and precious to
be part of the general purpose pool (HBM), too slow to host hot kernel
data structures (some PMEM media), or anything in between. However, in
the absence of an explicit policy, the memory should at least be made
usable by default. The current device-dax default hides all
non-general-purpose memory behind a device interface.
The expectation is that the distribution of users that want the memory
online by default vs device-dedicated-access by default follows the
Pareto principle. A small number of enlightened users may want to do
userspace memory management through a device, but general users just
want the kernel to make the memory available with an option to get more
advanced later.
Arrange for all device-dax instances not backed by PMEM to default to
attaching to the dax_kmem driver. From there the baseline memory hotplug
policy (CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE / memhp_default_state=)
gates whether the memory comes online or stays offline. Where, if it
stays offline, it can be reliably converted back to device-mode where it
can be partitioned, or fronted by a userspace allocator.
So, if someone wants device-dax instances for their 'Soft Reserved'
memory:
1/ Build a kernel with CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=n or boot
with memhp_default_state=offline, or roll the dice and hope that the
kernel has not pinned a page in that memory before step 2.
2/ Write a udev rule to convert the target dax device(s) from
'system-ram' mode to 'devdax' mode:
daxctl reconfigure-device $dax -m devdax -f
Cc: Michal Hocko <mhocko@suse.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Gregory Price <gregory.price@memverge.com>
Tested-by: Fan Ni <fan.ni@samsung.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/167602003336.1924368.6809503401422267885.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-02-10 17:07:13 +08:00
|
|
|
return driver_register(drv);
|
2017-07-13 08:58:21 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(__dax_driver_register);
|
|
|
|
|
2018-11-08 07:31:23 +08:00
|
|
|
void dax_driver_unregister(struct dax_device_driver *dax_drv)
|
|
|
|
{
|
2017-07-17 04:51:53 +08:00
|
|
|
struct device_driver *drv = &dax_drv->drv;
|
2018-11-08 07:31:23 +08:00
|
|
|
struct dax_id *dax_id, *_id;
|
|
|
|
|
|
|
|
mutex_lock(&dax_bus_lock);
|
|
|
|
list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
|
|
|
|
list_del(&dax_id->list);
|
|
|
|
kfree(dax_id);
|
|
|
|
}
|
|
|
|
mutex_unlock(&dax_bus_lock);
|
2017-07-17 04:51:53 +08:00
|
|
|
driver_unregister(drv);
|
2018-11-08 07:31:23 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(dax_driver_unregister);
|
|
|
|
|
2017-07-13 08:58:21 +08:00
|
|
|
int __init dax_bus_init(void)
|
|
|
|
{
|
2021-11-16 05:20:57 +08:00
|
|
|
return bus_register(&dax_bus_type);
|
2017-07-13 08:58:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void __exit dax_bus_exit(void)
|
|
|
|
{
|
|
|
|
bus_unregister(&dax_bus_type);
|
|
|
|
}
|