2019-06-04 16:11:33 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2016-11-17 04:46:13 +08:00
|
|
|
/*
|
|
|
|
* Mediated device Core Driver
|
|
|
|
*
|
|
|
|
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
|
|
|
* Author: Neo Jia <cjia@nvidia.com>
|
|
|
|
* Kirti Wankhede <kwankhede@nvidia.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/sysfs.h>
|
|
|
|
#include <linux/mdev.h>
|
|
|
|
|
|
|
|
#include "mdev_private.h"
|
|
|
|
|
|
|
|
#define DRIVER_VERSION "0.1"
|
|
|
|
#define DRIVER_AUTHOR "NVIDIA Corporation"
|
|
|
|
#define DRIVER_DESC "Mediated device Core Driver"
|
|
|
|
|
|
|
|
static struct class_compat *mdev_bus_compat_class;
|
|
|
|
|
2016-12-30 23:13:33 +08:00
|
|
|
static LIST_HEAD(mdev_list);
|
|
|
|
static DEFINE_MUTEX(mdev_list_lock);
|
|
|
|
|
2019-06-07 00:52:33 +08:00
|
|
|
/* Caller must hold parent unreg_sem read or write lock */
|
|
|
|
static void mdev_device_remove_common(struct mdev_device *mdev)
|
|
|
|
{
|
2021-04-07 03:40:33 +08:00
|
|
|
struct mdev_parent *parent = mdev->type->parent;
|
2019-06-07 00:52:33 +08:00
|
|
|
|
2021-04-07 03:40:28 +08:00
|
|
|
mdev_remove_sysfs_files(mdev);
|
2019-06-07 00:52:33 +08:00
|
|
|
device_del(&mdev->dev);
|
|
|
|
lockdep_assert_held(&parent->unreg_sem);
|
|
|
|
/* Balances with device_initialize() */
|
|
|
|
put_device(&mdev->dev);
|
|
|
|
}
|
|
|
|
|
2016-11-17 04:46:13 +08:00
|
|
|
static int mdev_device_remove_cb(struct device *dev, void *data)
|
|
|
|
{
|
2022-09-23 17:26:44 +08:00
|
|
|
if (dev->bus == &mdev_bus_type)
|
|
|
|
mdev_device_remove_common(to_mdev_device(dev));
|
2019-05-01 06:49:33 +08:00
|
|
|
return 0;
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2022-09-23 17:26:42 +08:00
|
|
|
* mdev_register_parent: Register a device as parent for mdevs
|
|
|
|
* @parent: parent structure registered
|
2016-11-17 04:46:13 +08:00
|
|
|
* @dev: device structure representing parent device.
|
2022-04-11 22:14:01 +08:00
|
|
|
* @mdev_driver: Device driver to bind to the newly created mdev
|
2022-09-23 17:26:43 +08:00
|
|
|
* @types: Array of supported mdev types
|
|
|
|
* @nr_types: Number of entries in @types
|
2016-11-17 04:46:13 +08:00
|
|
|
*
|
2022-09-23 17:26:42 +08:00
|
|
|
* Registers the @parent stucture as a parent for mdev types and thus mdev
|
|
|
|
* devices. The caller needs to hold a reference on @dev that must not be
|
|
|
|
* released until after the call to mdev_unregister_parent().
|
|
|
|
*
|
2016-11-17 04:46:13 +08:00
|
|
|
* Returns a negative value on error, otherwise 0.
|
|
|
|
*/
|
2022-09-23 17:26:42 +08:00
|
|
|
int mdev_register_parent(struct mdev_parent *parent, struct device *dev,
|
2022-09-23 17:26:43 +08:00
|
|
|
struct mdev_driver *mdev_driver, struct mdev_type **types,
|
|
|
|
unsigned int nr_types)
|
2016-11-17 04:46:13 +08:00
|
|
|
{
|
2019-07-12 03:26:52 +08:00
|
|
|
char *env_string = "MDEV_STATE=registered";
|
|
|
|
char *envp[] = { env_string, NULL };
|
2022-09-23 17:26:42 +08:00
|
|
|
int ret;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2022-09-23 17:26:42 +08:00
|
|
|
memset(parent, 0, sizeof(*parent));
|
2019-06-07 00:52:33 +08:00
|
|
|
init_rwsem(&parent->unreg_sem);
|
2016-11-17 04:46:13 +08:00
|
|
|
parent->dev = dev;
|
2022-04-11 22:14:01 +08:00
|
|
|
parent->mdev_driver = mdev_driver;
|
2022-09-23 17:26:43 +08:00
|
|
|
parent->types = types;
|
|
|
|
parent->nr_types = nr_types;
|
2022-09-23 17:26:52 +08:00
|
|
|
atomic_set(&parent->available_instances, mdev_driver->max_instances);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
|
|
|
ret = parent_create_sysfs_files(parent);
|
|
|
|
if (ret)
|
2022-09-23 17:26:42 +08:00
|
|
|
return ret;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
|
|
|
ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
|
|
|
|
if (ret)
|
|
|
|
dev_warn(dev, "Failed to create compatibility class link\n");
|
|
|
|
|
|
|
|
dev_info(dev, "MDEV: Registered\n");
|
2019-07-12 03:26:52 +08:00
|
|
|
kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
|
2016-11-17 04:46:13 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2022-09-23 17:26:42 +08:00
|
|
|
EXPORT_SYMBOL(mdev_register_parent);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
|
|
|
/*
|
2022-09-23 17:26:42 +08:00
|
|
|
* mdev_unregister_parent : Unregister a parent device
|
|
|
|
* @parent: parent structure to unregister
|
2016-11-17 04:46:13 +08:00
|
|
|
*/
|
2022-09-23 17:26:42 +08:00
|
|
|
void mdev_unregister_parent(struct mdev_parent *parent)
|
2016-11-17 04:46:13 +08:00
|
|
|
{
|
2019-07-12 03:26:52 +08:00
|
|
|
char *env_string = "MDEV_STATE=unregistered";
|
|
|
|
char *envp[] = { env_string, NULL };
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2022-09-23 17:26:42 +08:00
|
|
|
dev_info(parent->dev, "MDEV: Unregistering\n");
|
2019-06-07 00:52:33 +08:00
|
|
|
|
|
|
|
down_write(&parent->unreg_sem);
|
2022-09-23 17:26:42 +08:00
|
|
|
class_compat_remove_link(mdev_bus_compat_class, parent->dev, NULL);
|
|
|
|
device_for_each_child(parent->dev, NULL, mdev_device_remove_cb);
|
2016-11-17 04:46:13 +08:00
|
|
|
parent_remove_sysfs_files(parent);
|
2019-06-07 00:52:33 +08:00
|
|
|
up_write(&parent->unreg_sem);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2022-09-23 17:26:42 +08:00
|
|
|
kobject_uevent_env(&parent->dev->kobj, KOBJ_CHANGE, envp);
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
2022-09-23 17:26:42 +08:00
|
|
|
EXPORT_SYMBOL(mdev_unregister_parent);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2021-04-07 03:40:31 +08:00
|
|
|
static void mdev_device_release(struct device *dev)
|
2016-11-17 04:46:13 +08:00
|
|
|
{
|
2021-04-07 03:40:31 +08:00
|
|
|
struct mdev_device *mdev = to_mdev_device(dev);
|
2022-09-23 17:26:52 +08:00
|
|
|
struct mdev_parent *parent = mdev->type->parent;
|
2021-04-07 03:40:31 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mutex_lock(&mdev_list_lock);
|
|
|
|
list_del(&mdev->next);
|
2022-09-23 17:26:52 +08:00
|
|
|
if (!parent->mdev_driver->get_available)
|
|
|
|
atomic_inc(&parent->available_instances);
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mutex_unlock(&mdev_list_lock);
|
|
|
|
|
2022-09-23 17:26:52 +08:00
|
|
|
/* Pairs with the get in mdev_device_create() */
|
|
|
|
kobject_put(&mdev->type->kobj);
|
|
|
|
|
2016-11-17 04:46:13 +08:00
|
|
|
dev_dbg(&mdev->dev, "MDEV: destroying\n");
|
|
|
|
kfree(mdev);
|
|
|
|
}
|
|
|
|
|
2021-04-07 03:40:28 +08:00
|
|
|
int mdev_device_create(struct mdev_type *type, const guid_t *uuid)
|
2016-11-17 04:46:13 +08:00
|
|
|
{
|
|
|
|
int ret;
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
struct mdev_device *mdev, *tmp;
|
2021-04-07 03:40:29 +08:00
|
|
|
struct mdev_parent *parent = type->parent;
|
2022-04-11 22:14:01 +08:00
|
|
|
struct mdev_driver *drv = parent->mdev_driver;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mutex_lock(&mdev_list_lock);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
|
|
|
/* Check for duplicate */
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
list_for_each_entry(tmp, &mdev_list, next) {
|
2019-01-11 03:00:27 +08:00
|
|
|
if (guid_equal(&tmp->uuid, uuid)) {
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mutex_unlock(&mdev_list_lock);
|
2021-04-07 03:40:31 +08:00
|
|
|
return -EEXIST;
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
}
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
|
2022-09-23 17:26:52 +08:00
|
|
|
if (!drv->get_available) {
|
|
|
|
/*
|
|
|
|
* Note: that non-atomic read and dec is fine here because
|
|
|
|
* all modifications are under mdev_list_lock.
|
|
|
|
*/
|
|
|
|
if (!atomic_read(&parent->available_instances)) {
|
|
|
|
mutex_unlock(&mdev_list_lock);
|
|
|
|
return -EUSERS;
|
|
|
|
}
|
|
|
|
atomic_dec(&parent->available_instances);
|
|
|
|
}
|
|
|
|
|
2016-11-17 04:46:13 +08:00
|
|
|
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
|
|
|
|
if (!mdev) {
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mutex_unlock(&mdev_list_lock);
|
2021-04-07 03:40:31 +08:00
|
|
|
return -ENOMEM;
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
|
2021-04-07 03:40:31 +08:00
|
|
|
device_initialize(&mdev->dev);
|
|
|
|
mdev->dev.parent = parent->dev;
|
|
|
|
mdev->dev.bus = &mdev_bus_type;
|
|
|
|
mdev->dev.release = mdev_device_release;
|
2022-04-11 22:14:02 +08:00
|
|
|
mdev->dev.groups = mdev_device_groups;
|
2021-04-07 03:40:31 +08:00
|
|
|
mdev->type = type;
|
|
|
|
/* Pairs with the put in mdev_device_release() */
|
2021-04-07 03:40:33 +08:00
|
|
|
kobject_get(&type->kobj);
|
2021-04-07 03:40:31 +08:00
|
|
|
|
2019-01-11 03:00:27 +08:00
|
|
|
guid_copy(&mdev->uuid, uuid);
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
list_add(&mdev->next, &mdev_list);
|
|
|
|
mutex_unlock(&mdev_list_lock);
|
|
|
|
|
2021-04-07 03:40:32 +08:00
|
|
|
ret = dev_set_name(&mdev->dev, "%pUl", uuid);
|
|
|
|
if (ret)
|
|
|
|
goto out_put_device;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2019-06-07 00:52:33 +08:00
|
|
|
/* Check if parent unregistration has started */
|
|
|
|
if (!down_read_trylock(&parent->unreg_sem)) {
|
|
|
|
ret = -ENODEV;
|
2021-04-07 03:40:31 +08:00
|
|
|
goto out_put_device;
|
2019-06-07 00:52:33 +08:00
|
|
|
}
|
|
|
|
|
vfio/mdev: Improve the create/remove sequence
This patch addresses below two issues and prepares the code to address
3rd issue listed below.
1. mdev device is placed on the mdev bus before it is created in the
vendor driver. Once a device is placed on the mdev bus without creating
its supporting underlying vendor device, mdev driver's probe() gets
triggered. However there isn't a stable mdev available to work on.
create_store()
mdev_create_device()
device_register()
...
vfio_mdev_probe()
[...]
parent->ops->create()
vfio_ap_mdev_create()
mdev_set_drvdata(mdev, matrix_mdev);
/* Valid pointer set above */
Due to this way of initialization, mdev driver who wants to use the mdev,
doesn't have a valid mdev to work on.
2. Current creation sequence is,
parent->ops_create()
groups_register()
Remove sequence is,
parent->ops->remove()
groups_unregister()
However, remove sequence should be exact mirror of creation sequence.
Once this is achieved, all users of the mdev will be terminated first
before removing underlying vendor device.
(Follow standard linux driver model).
At that point vendor's remove() ops shouldn't fail because taking the
device off the bus should terminate any usage.
3. When remove operation fails, mdev sysfs removal attempts to add the
file back on already removed device. Following call trace [1] is observed.
[1] call trace:
kernel: WARNING: CPU: 2 PID: 9348 at fs/sysfs/file.c:327 sysfs_create_file_ns+0x7f/0x90
kernel: CPU: 2 PID: 9348 Comm: bash Kdump: loaded Not tainted 5.1.0-rc6-vdevbus+ #6
kernel: Hardware name: Supermicro SYS-6028U-TR4+/X10DRU-i+, BIOS 2.0b 08/09/2016
kernel: RIP: 0010:sysfs_create_file_ns+0x7f/0x90
kernel: Call Trace:
kernel: remove_store+0xdc/0x100 [mdev]
kernel: kernfs_fop_write+0x113/0x1a0
kernel: vfs_write+0xad/0x1b0
kernel: ksys_write+0x5a/0xe0
kernel: do_syscall_64+0x5a/0x210
kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe
Therefore, mdev core is improved in following ways.
1. Split the device registration/deregistration sequence so that some
things can be done between initialization of the device and hooking it
up to the bus respectively after deregistering it from the bus but
before giving up our final reference.
In particular, this means invoking the ->create() and ->remove()
callbacks in those new windows. This gives the vendor driver an
initialized mdev device to work with during creation.
At the same time, a bus driver who wish to bind to mdev driver also
gets initialized mdev device.
This follows standard Linux kernel bus and device model.
2. During remove flow, first remove the device from the bus. This
ensures that any bus specific devices are removed.
Once device is taken off the mdev bus, invoke remove() of mdev
from the vendor driver.
3. The driver core device model provides way to register and auto
unregister the device sysfs attribute groups at dev->groups.
Make use of dev->groups to let core create the groups and eliminate
code to avoid explicit groups creation and removal.
To ensure, that new sequence is solid, a below stack dump of a
process is taken who attempts to remove the device while device is in
use by vfio driver and user application.
This stack dump validates that vfio driver guards against such device
removal when device is in use.
cat /proc/21962/stack
[<0>] vfio_del_group_dev+0x216/0x3c0 [vfio]
[<0>] mdev_remove+0x21/0x40 [mdev]
[<0>] device_release_driver_internal+0xe8/0x1b0
[<0>] bus_remove_device+0xf9/0x170
[<0>] device_del+0x168/0x350
[<0>] mdev_device_remove_common+0x1d/0x50 [mdev]
[<0>] mdev_device_remove+0x8c/0xd0 [mdev]
[<0>] remove_store+0x71/0x90 [mdev]
[<0>] kernfs_fop_write+0x113/0x1a0
[<0>] vfs_write+0xad/0x1b0
[<0>] ksys_write+0x5a/0xe0
[<0>] do_syscall_64+0x5a/0x210
[<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[<0>] 0xffffffffffffffff
This prepares the code to eliminate calling device_create_file() in
subsequent patch.
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2019-06-07 00:52:32 +08:00
|
|
|
ret = device_add(&mdev->dev);
|
2016-11-17 04:46:13 +08:00
|
|
|
if (ret)
|
2022-04-11 22:13:59 +08:00
|
|
|
goto out_unlock;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2021-06-17 22:22:15 +08:00
|
|
|
ret = device_driver_attach(&drv->driver, &mdev->dev);
|
|
|
|
if (ret)
|
|
|
|
goto out_del;
|
|
|
|
|
2021-04-07 03:40:28 +08:00
|
|
|
ret = mdev_create_sysfs_files(mdev);
|
vfio/mdev: Improve the create/remove sequence
This patch addresses below two issues and prepares the code to address
3rd issue listed below.
1. mdev device is placed on the mdev bus before it is created in the
vendor driver. Once a device is placed on the mdev bus without creating
its supporting underlying vendor device, mdev driver's probe() gets
triggered. However there isn't a stable mdev available to work on.
create_store()
mdev_create_device()
device_register()
...
vfio_mdev_probe()
[...]
parent->ops->create()
vfio_ap_mdev_create()
mdev_set_drvdata(mdev, matrix_mdev);
/* Valid pointer set above */
Due to this way of initialization, mdev driver who wants to use the mdev,
doesn't have a valid mdev to work on.
2. Current creation sequence is,
parent->ops_create()
groups_register()
Remove sequence is,
parent->ops->remove()
groups_unregister()
However, remove sequence should be exact mirror of creation sequence.
Once this is achieved, all users of the mdev will be terminated first
before removing underlying vendor device.
(Follow standard linux driver model).
At that point vendor's remove() ops shouldn't fail because taking the
device off the bus should terminate any usage.
3. When remove operation fails, mdev sysfs removal attempts to add the
file back on already removed device. Following call trace [1] is observed.
[1] call trace:
kernel: WARNING: CPU: 2 PID: 9348 at fs/sysfs/file.c:327 sysfs_create_file_ns+0x7f/0x90
kernel: CPU: 2 PID: 9348 Comm: bash Kdump: loaded Not tainted 5.1.0-rc6-vdevbus+ #6
kernel: Hardware name: Supermicro SYS-6028U-TR4+/X10DRU-i+, BIOS 2.0b 08/09/2016
kernel: RIP: 0010:sysfs_create_file_ns+0x7f/0x90
kernel: Call Trace:
kernel: remove_store+0xdc/0x100 [mdev]
kernel: kernfs_fop_write+0x113/0x1a0
kernel: vfs_write+0xad/0x1b0
kernel: ksys_write+0x5a/0xe0
kernel: do_syscall_64+0x5a/0x210
kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe
Therefore, mdev core is improved in following ways.
1. Split the device registration/deregistration sequence so that some
things can be done between initialization of the device and hooking it
up to the bus respectively after deregistering it from the bus but
before giving up our final reference.
In particular, this means invoking the ->create() and ->remove()
callbacks in those new windows. This gives the vendor driver an
initialized mdev device to work with during creation.
At the same time, a bus driver who wish to bind to mdev driver also
gets initialized mdev device.
This follows standard Linux kernel bus and device model.
2. During remove flow, first remove the device from the bus. This
ensures that any bus specific devices are removed.
Once device is taken off the mdev bus, invoke remove() of mdev
from the vendor driver.
3. The driver core device model provides way to register and auto
unregister the device sysfs attribute groups at dev->groups.
Make use of dev->groups to let core create the groups and eliminate
code to avoid explicit groups creation and removal.
To ensure, that new sequence is solid, a below stack dump of a
process is taken who attempts to remove the device while device is in
use by vfio driver and user application.
This stack dump validates that vfio driver guards against such device
removal when device is in use.
cat /proc/21962/stack
[<0>] vfio_del_group_dev+0x216/0x3c0 [vfio]
[<0>] mdev_remove+0x21/0x40 [mdev]
[<0>] device_release_driver_internal+0xe8/0x1b0
[<0>] bus_remove_device+0xf9/0x170
[<0>] device_del+0x168/0x350
[<0>] mdev_device_remove_common+0x1d/0x50 [mdev]
[<0>] mdev_device_remove+0x8c/0xd0 [mdev]
[<0>] remove_store+0x71/0x90 [mdev]
[<0>] kernfs_fop_write+0x113/0x1a0
[<0>] vfs_write+0xad/0x1b0
[<0>] ksys_write+0x5a/0xe0
[<0>] do_syscall_64+0x5a/0x210
[<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[<0>] 0xffffffffffffffff
This prepares the code to eliminate calling device_create_file() in
subsequent patch.
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2019-06-07 00:52:32 +08:00
|
|
|
if (ret)
|
2021-04-07 03:40:31 +08:00
|
|
|
goto out_del;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mdev->active = true;
|
2016-11-17 04:46:13 +08:00
|
|
|
dev_dbg(&mdev->dev, "MDEV: created\n");
|
2019-06-07 00:52:33 +08:00
|
|
|
up_read(&parent->unreg_sem);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
return 0;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2021-04-07 03:40:31 +08:00
|
|
|
out_del:
|
vfio/mdev: Improve the create/remove sequence
This patch addresses below two issues and prepares the code to address
3rd issue listed below.
1. mdev device is placed on the mdev bus before it is created in the
vendor driver. Once a device is placed on the mdev bus without creating
its supporting underlying vendor device, mdev driver's probe() gets
triggered. However there isn't a stable mdev available to work on.
create_store()
mdev_create_device()
device_register()
...
vfio_mdev_probe()
[...]
parent->ops->create()
vfio_ap_mdev_create()
mdev_set_drvdata(mdev, matrix_mdev);
/* Valid pointer set above */
Due to this way of initialization, mdev driver who wants to use the mdev,
doesn't have a valid mdev to work on.
2. Current creation sequence is,
parent->ops_create()
groups_register()
Remove sequence is,
parent->ops->remove()
groups_unregister()
However, remove sequence should be exact mirror of creation sequence.
Once this is achieved, all users of the mdev will be terminated first
before removing underlying vendor device.
(Follow standard linux driver model).
At that point vendor's remove() ops shouldn't fail because taking the
device off the bus should terminate any usage.
3. When remove operation fails, mdev sysfs removal attempts to add the
file back on already removed device. Following call trace [1] is observed.
[1] call trace:
kernel: WARNING: CPU: 2 PID: 9348 at fs/sysfs/file.c:327 sysfs_create_file_ns+0x7f/0x90
kernel: CPU: 2 PID: 9348 Comm: bash Kdump: loaded Not tainted 5.1.0-rc6-vdevbus+ #6
kernel: Hardware name: Supermicro SYS-6028U-TR4+/X10DRU-i+, BIOS 2.0b 08/09/2016
kernel: RIP: 0010:sysfs_create_file_ns+0x7f/0x90
kernel: Call Trace:
kernel: remove_store+0xdc/0x100 [mdev]
kernel: kernfs_fop_write+0x113/0x1a0
kernel: vfs_write+0xad/0x1b0
kernel: ksys_write+0x5a/0xe0
kernel: do_syscall_64+0x5a/0x210
kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe
Therefore, mdev core is improved in following ways.
1. Split the device registration/deregistration sequence so that some
things can be done between initialization of the device and hooking it
up to the bus respectively after deregistering it from the bus but
before giving up our final reference.
In particular, this means invoking the ->create() and ->remove()
callbacks in those new windows. This gives the vendor driver an
initialized mdev device to work with during creation.
At the same time, a bus driver who wish to bind to mdev driver also
gets initialized mdev device.
This follows standard Linux kernel bus and device model.
2. During remove flow, first remove the device from the bus. This
ensures that any bus specific devices are removed.
Once device is taken off the mdev bus, invoke remove() of mdev
from the vendor driver.
3. The driver core device model provides way to register and auto
unregister the device sysfs attribute groups at dev->groups.
Make use of dev->groups to let core create the groups and eliminate
code to avoid explicit groups creation and removal.
To ensure, that new sequence is solid, a below stack dump of a
process is taken who attempts to remove the device while device is in
use by vfio driver and user application.
This stack dump validates that vfio driver guards against such device
removal when device is in use.
cat /proc/21962/stack
[<0>] vfio_del_group_dev+0x216/0x3c0 [vfio]
[<0>] mdev_remove+0x21/0x40 [mdev]
[<0>] device_release_driver_internal+0xe8/0x1b0
[<0>] bus_remove_device+0xf9/0x170
[<0>] device_del+0x168/0x350
[<0>] mdev_device_remove_common+0x1d/0x50 [mdev]
[<0>] mdev_device_remove+0x8c/0xd0 [mdev]
[<0>] remove_store+0x71/0x90 [mdev]
[<0>] kernfs_fop_write+0x113/0x1a0
[<0>] vfs_write+0xad/0x1b0
[<0>] ksys_write+0x5a/0xe0
[<0>] do_syscall_64+0x5a/0x210
[<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[<0>] 0xffffffffffffffff
This prepares the code to eliminate calling device_create_file() in
subsequent patch.
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2019-06-07 00:52:32 +08:00
|
|
|
device_del(&mdev->dev);
|
2021-04-07 03:40:31 +08:00
|
|
|
out_unlock:
|
2019-06-07 00:52:33 +08:00
|
|
|
up_read(&parent->unreg_sem);
|
2021-04-07 03:40:31 +08:00
|
|
|
out_put_device:
|
vfio/mdev: Improve the create/remove sequence
This patch addresses below two issues and prepares the code to address
3rd issue listed below.
1. mdev device is placed on the mdev bus before it is created in the
vendor driver. Once a device is placed on the mdev bus without creating
its supporting underlying vendor device, mdev driver's probe() gets
triggered. However there isn't a stable mdev available to work on.
create_store()
mdev_create_device()
device_register()
...
vfio_mdev_probe()
[...]
parent->ops->create()
vfio_ap_mdev_create()
mdev_set_drvdata(mdev, matrix_mdev);
/* Valid pointer set above */
Due to this way of initialization, mdev driver who wants to use the mdev,
doesn't have a valid mdev to work on.
2. Current creation sequence is,
parent->ops_create()
groups_register()
Remove sequence is,
parent->ops->remove()
groups_unregister()
However, remove sequence should be exact mirror of creation sequence.
Once this is achieved, all users of the mdev will be terminated first
before removing underlying vendor device.
(Follow standard linux driver model).
At that point vendor's remove() ops shouldn't fail because taking the
device off the bus should terminate any usage.
3. When remove operation fails, mdev sysfs removal attempts to add the
file back on already removed device. Following call trace [1] is observed.
[1] call trace:
kernel: WARNING: CPU: 2 PID: 9348 at fs/sysfs/file.c:327 sysfs_create_file_ns+0x7f/0x90
kernel: CPU: 2 PID: 9348 Comm: bash Kdump: loaded Not tainted 5.1.0-rc6-vdevbus+ #6
kernel: Hardware name: Supermicro SYS-6028U-TR4+/X10DRU-i+, BIOS 2.0b 08/09/2016
kernel: RIP: 0010:sysfs_create_file_ns+0x7f/0x90
kernel: Call Trace:
kernel: remove_store+0xdc/0x100 [mdev]
kernel: kernfs_fop_write+0x113/0x1a0
kernel: vfs_write+0xad/0x1b0
kernel: ksys_write+0x5a/0xe0
kernel: do_syscall_64+0x5a/0x210
kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe
Therefore, mdev core is improved in following ways.
1. Split the device registration/deregistration sequence so that some
things can be done between initialization of the device and hooking it
up to the bus respectively after deregistering it from the bus but
before giving up our final reference.
In particular, this means invoking the ->create() and ->remove()
callbacks in those new windows. This gives the vendor driver an
initialized mdev device to work with during creation.
At the same time, a bus driver who wish to bind to mdev driver also
gets initialized mdev device.
This follows standard Linux kernel bus and device model.
2. During remove flow, first remove the device from the bus. This
ensures that any bus specific devices are removed.
Once device is taken off the mdev bus, invoke remove() of mdev
from the vendor driver.
3. The driver core device model provides way to register and auto
unregister the device sysfs attribute groups at dev->groups.
Make use of dev->groups to let core create the groups and eliminate
code to avoid explicit groups creation and removal.
To ensure, that new sequence is solid, a below stack dump of a
process is taken who attempts to remove the device while device is in
use by vfio driver and user application.
This stack dump validates that vfio driver guards against such device
removal when device is in use.
cat /proc/21962/stack
[<0>] vfio_del_group_dev+0x216/0x3c0 [vfio]
[<0>] mdev_remove+0x21/0x40 [mdev]
[<0>] device_release_driver_internal+0xe8/0x1b0
[<0>] bus_remove_device+0xf9/0x170
[<0>] device_del+0x168/0x350
[<0>] mdev_device_remove_common+0x1d/0x50 [mdev]
[<0>] mdev_device_remove+0x8c/0xd0 [mdev]
[<0>] remove_store+0x71/0x90 [mdev]
[<0>] kernfs_fop_write+0x113/0x1a0
[<0>] vfs_write+0xad/0x1b0
[<0>] ksys_write+0x5a/0xe0
[<0>] do_syscall_64+0x5a/0x210
[<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[<0>] 0xffffffffffffffff
This prepares the code to eliminate calling device_create_file() in
subsequent patch.
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2019-06-07 00:52:32 +08:00
|
|
|
put_device(&mdev->dev);
|
2016-11-17 04:46:13 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-04-07 03:40:26 +08:00
|
|
|
int mdev_device_remove(struct mdev_device *mdev)
|
2016-11-17 04:46:13 +08:00
|
|
|
{
|
2021-04-07 03:40:26 +08:00
|
|
|
struct mdev_device *tmp;
|
2021-04-07 03:40:33 +08:00
|
|
|
struct mdev_parent *parent = mdev->type->parent;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2016-12-30 23:13:33 +08:00
|
|
|
mutex_lock(&mdev_list_lock);
|
|
|
|
list_for_each_entry(tmp, &mdev_list, next) {
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
if (tmp == mdev)
|
2016-12-30 23:13:33 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
if (tmp != mdev) {
|
|
|
|
mutex_unlock(&mdev_list_lock);
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
2016-12-30 23:13:33 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
if (!mdev->active) {
|
|
|
|
mutex_unlock(&mdev_list_lock);
|
|
|
|
return -EAGAIN;
|
|
|
|
}
|
2016-12-30 23:13:33 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mdev->active = false;
|
|
|
|
mutex_unlock(&mdev_list_lock);
|
2016-12-30 23:13:33 +08:00
|
|
|
|
2019-06-07 00:52:33 +08:00
|
|
|
/* Check if parent unregistration has started */
|
|
|
|
if (!down_read_trylock(&parent->unreg_sem))
|
|
|
|
return -ENODEV;
|
2016-12-30 23:13:33 +08:00
|
|
|
|
2019-06-07 00:52:33 +08:00
|
|
|
mdev_device_remove_common(mdev);
|
|
|
|
up_read(&parent->unreg_sem);
|
2016-12-30 23:13:33 +08:00
|
|
|
return 0;
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __init mdev_init(void)
|
|
|
|
{
|
2023-06-26 21:36:42 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = bus_register(&mdev_bus_type);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
mdev_bus_compat_class = class_compat_register("mdev_bus");
|
|
|
|
if (!mdev_bus_compat_class) {
|
|
|
|
bus_unregister(&mdev_bus_type);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit mdev_exit(void)
|
|
|
|
{
|
2023-06-26 21:36:42 +08:00
|
|
|
class_compat_unregister(mdev_bus_compat_class);
|
2022-04-11 22:13:59 +08:00
|
|
|
bus_unregister(&mdev_bus_type);
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
|
2021-07-26 22:35:23 +08:00
|
|
|
subsys_initcall(mdev_init)
|
2016-11-17 04:46:13 +08:00
|
|
|
module_exit(mdev_exit)
|
|
|
|
|
|
|
|
MODULE_VERSION(DRIVER_VERSION);
|
|
|
|
MODULE_LICENSE("GPL v2");
|
|
|
|
MODULE_AUTHOR(DRIVER_AUTHOR);
|
|
|
|
MODULE_DESCRIPTION(DRIVER_DESC);
|