mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-21 05:14:52 +08:00
Merge branch 'v5.12/vfio/next-vaddr' into v5.12/vfio/next
This commit is contained in:
commit
76adb20f92
@ -1220,6 +1220,11 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
|
|||||||
static int vfio_fops_release(struct inode *inode, struct file *filep)
|
static int vfio_fops_release(struct inode *inode, struct file *filep)
|
||||||
{
|
{
|
||||||
struct vfio_container *container = filep->private_data;
|
struct vfio_container *container = filep->private_data;
|
||||||
|
struct vfio_iommu_driver *driver = container->iommu_driver;
|
||||||
|
|
||||||
|
if (driver && driver->ops->notify)
|
||||||
|
driver->ops->notify(container->iommu_data,
|
||||||
|
VFIO_IOMMU_CONTAINER_CLOSE);
|
||||||
|
|
||||||
filep->private_data = NULL;
|
filep->private_data = NULL;
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include <linux/rbtree.h>
|
#include <linux/rbtree.h>
|
||||||
#include <linux/sched/signal.h>
|
#include <linux/sched/signal.h>
|
||||||
#include <linux/sched/mm.h>
|
#include <linux/sched/mm.h>
|
||||||
|
#include <linux/kthread.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <linux/vfio.h>
|
#include <linux/vfio.h>
|
||||||
@ -69,11 +70,15 @@ struct vfio_iommu {
|
|||||||
struct rb_root dma_list;
|
struct rb_root dma_list;
|
||||||
struct blocking_notifier_head notifier;
|
struct blocking_notifier_head notifier;
|
||||||
unsigned int dma_avail;
|
unsigned int dma_avail;
|
||||||
|
unsigned int vaddr_invalid_count;
|
||||||
uint64_t pgsize_bitmap;
|
uint64_t pgsize_bitmap;
|
||||||
uint64_t num_non_pinned_groups;
|
uint64_t num_non_pinned_groups;
|
||||||
|
wait_queue_head_t vaddr_wait;
|
||||||
bool v2;
|
bool v2;
|
||||||
bool nesting;
|
bool nesting;
|
||||||
bool dirty_page_tracking;
|
bool dirty_page_tracking;
|
||||||
|
bool pinned_page_dirty_scope;
|
||||||
|
bool container_open;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vfio_domain {
|
struct vfio_domain {
|
||||||
@ -92,6 +97,7 @@ struct vfio_dma {
|
|||||||
int prot; /* IOMMU_READ/WRITE */
|
int prot; /* IOMMU_READ/WRITE */
|
||||||
bool iommu_mapped;
|
bool iommu_mapped;
|
||||||
bool lock_cap; /* capable(CAP_IPC_LOCK) */
|
bool lock_cap; /* capable(CAP_IPC_LOCK) */
|
||||||
|
bool vaddr_invalid;
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
struct rb_root pfn_list; /* Ex-user pinned pfn list */
|
struct rb_root pfn_list; /* Ex-user pinned pfn list */
|
||||||
unsigned long *bitmap;
|
unsigned long *bitmap;
|
||||||
@ -143,6 +149,8 @@ struct vfio_regions {
|
|||||||
#define DIRTY_BITMAP_PAGES_MAX ((u64)INT_MAX)
|
#define DIRTY_BITMAP_PAGES_MAX ((u64)INT_MAX)
|
||||||
#define DIRTY_BITMAP_SIZE_MAX DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX)
|
#define DIRTY_BITMAP_SIZE_MAX DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX)
|
||||||
|
|
||||||
|
#define WAITED 1
|
||||||
|
|
||||||
static int put_pfn(unsigned long pfn, int prot);
|
static int put_pfn(unsigned long pfn, int prot);
|
||||||
|
|
||||||
static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
|
static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
|
||||||
@ -172,6 +180,31 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu,
|
||||||
|
dma_addr_t start, size_t size)
|
||||||
|
{
|
||||||
|
struct rb_node *res = NULL;
|
||||||
|
struct rb_node *node = iommu->dma_list.rb_node;
|
||||||
|
struct vfio_dma *dma_res = NULL;
|
||||||
|
|
||||||
|
while (node) {
|
||||||
|
struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
|
||||||
|
|
||||||
|
if (start < dma->iova + dma->size) {
|
||||||
|
res = node;
|
||||||
|
dma_res = dma;
|
||||||
|
if (start >= dma->iova)
|
||||||
|
break;
|
||||||
|
node = node->rb_left;
|
||||||
|
} else {
|
||||||
|
node = node->rb_right;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (res && size && dma_res->iova >= start + size)
|
||||||
|
res = NULL;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
|
static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
|
||||||
{
|
{
|
||||||
struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
|
struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
|
||||||
@ -490,6 +523,61 @@ done:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int vfio_wait(struct vfio_iommu *iommu)
|
||||||
|
{
|
||||||
|
DEFINE_WAIT(wait);
|
||||||
|
|
||||||
|
prepare_to_wait(&iommu->vaddr_wait, &wait, TASK_KILLABLE);
|
||||||
|
mutex_unlock(&iommu->lock);
|
||||||
|
schedule();
|
||||||
|
mutex_lock(&iommu->lock);
|
||||||
|
finish_wait(&iommu->vaddr_wait, &wait);
|
||||||
|
if (kthread_should_stop() || !iommu->container_open ||
|
||||||
|
fatal_signal_pending(current)) {
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
return WAITED;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find dma struct and wait for its vaddr to be valid. iommu lock is dropped
|
||||||
|
* if the task waits, but is re-locked on return. Return result in *dma_p.
|
||||||
|
* Return 0 on success with no waiting, WAITED on success if waited, and -errno
|
||||||
|
* on error.
|
||||||
|
*/
|
||||||
|
static int vfio_find_dma_valid(struct vfio_iommu *iommu, dma_addr_t start,
|
||||||
|
size_t size, struct vfio_dma **dma_p)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
do {
|
||||||
|
*dma_p = vfio_find_dma(iommu, start, size);
|
||||||
|
if (!*dma_p)
|
||||||
|
ret = -EINVAL;
|
||||||
|
else if (!(*dma_p)->vaddr_invalid)
|
||||||
|
ret = 0;
|
||||||
|
else
|
||||||
|
ret = vfio_wait(iommu);
|
||||||
|
} while (ret > 0);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wait for all vaddr in the dma_list to become valid. iommu lock is dropped
|
||||||
|
* if the task waits, but is re-locked on return. Return 0 on success with no
|
||||||
|
* waiting, WAITED on success if waited, and -errno on error.
|
||||||
|
*/
|
||||||
|
static int vfio_wait_all_valid(struct vfio_iommu *iommu)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
while (iommu->vaddr_invalid_count && ret >= 0)
|
||||||
|
ret = vfio_wait(iommu);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Attempt to pin pages. We really don't want to track all the pfns and
|
* Attempt to pin pages. We really don't want to track all the pfns and
|
||||||
* the iommu can only map chunks of consecutive pfns anyway, so get the
|
* the iommu can only map chunks of consecutive pfns anyway, so get the
|
||||||
@ -651,6 +739,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
|
|||||||
unsigned long remote_vaddr;
|
unsigned long remote_vaddr;
|
||||||
struct vfio_dma *dma;
|
struct vfio_dma *dma;
|
||||||
bool do_accounting;
|
bool do_accounting;
|
||||||
|
dma_addr_t iova;
|
||||||
|
|
||||||
if (!iommu || !user_pfn || !phys_pfn)
|
if (!iommu || !user_pfn || !phys_pfn)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -661,6 +750,22 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
|
|||||||
|
|
||||||
mutex_lock(&iommu->lock);
|
mutex_lock(&iommu->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wait for all necessary vaddr's to be valid so they can be used in
|
||||||
|
* the main loop without dropping the lock, to avoid racing vs unmap.
|
||||||
|
*/
|
||||||
|
again:
|
||||||
|
if (iommu->vaddr_invalid_count) {
|
||||||
|
for (i = 0; i < npage; i++) {
|
||||||
|
iova = user_pfn[i] << PAGE_SHIFT;
|
||||||
|
ret = vfio_find_dma_valid(iommu, iova, PAGE_SIZE, &dma);
|
||||||
|
if (ret < 0)
|
||||||
|
goto pin_done;
|
||||||
|
if (ret == WAITED)
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Fail if notifier list is empty */
|
/* Fail if notifier list is empty */
|
||||||
if (!iommu->notifier.head) {
|
if (!iommu->notifier.head) {
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
@ -675,7 +780,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
|
|||||||
do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
|
do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
|
||||||
|
|
||||||
for (i = 0; i < npage; i++) {
|
for (i = 0; i < npage; i++) {
|
||||||
dma_addr_t iova;
|
|
||||||
struct vfio_pfn *vpfn;
|
struct vfio_pfn *vpfn;
|
||||||
|
|
||||||
iova = user_pfn[i] << PAGE_SHIFT;
|
iova = user_pfn[i] << PAGE_SHIFT;
|
||||||
@ -961,6 +1065,10 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
|
|||||||
vfio_unlink_dma(iommu, dma);
|
vfio_unlink_dma(iommu, dma);
|
||||||
put_task_struct(dma->task);
|
put_task_struct(dma->task);
|
||||||
vfio_dma_bitmap_free(dma);
|
vfio_dma_bitmap_free(dma);
|
||||||
|
if (dma->vaddr_invalid) {
|
||||||
|
iommu->vaddr_invalid_count--;
|
||||||
|
wake_up_all(&iommu->vaddr_wait);
|
||||||
|
}
|
||||||
kfree(dma);
|
kfree(dma);
|
||||||
iommu->dma_avail++;
|
iommu->dma_avail++;
|
||||||
}
|
}
|
||||||
@ -1086,34 +1194,36 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
|
|||||||
{
|
{
|
||||||
struct vfio_dma *dma, *dma_last = NULL;
|
struct vfio_dma *dma, *dma_last = NULL;
|
||||||
size_t unmapped = 0, pgsize;
|
size_t unmapped = 0, pgsize;
|
||||||
int ret = 0, retries = 0;
|
int ret = -EINVAL, retries = 0;
|
||||||
unsigned long pgshift;
|
unsigned long pgshift;
|
||||||
|
dma_addr_t iova = unmap->iova;
|
||||||
|
unsigned long size = unmap->size;
|
||||||
|
bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL;
|
||||||
|
bool invalidate_vaddr = unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR;
|
||||||
|
struct rb_node *n, *first_n;
|
||||||
|
|
||||||
mutex_lock(&iommu->lock);
|
mutex_lock(&iommu->lock);
|
||||||
|
|
||||||
pgshift = __ffs(iommu->pgsize_bitmap);
|
pgshift = __ffs(iommu->pgsize_bitmap);
|
||||||
pgsize = (size_t)1 << pgshift;
|
pgsize = (size_t)1 << pgshift;
|
||||||
|
|
||||||
if (unmap->iova & (pgsize - 1)) {
|
if (iova & (pgsize - 1))
|
||||||
ret = -EINVAL;
|
goto unlock;
|
||||||
|
|
||||||
|
if (unmap_all) {
|
||||||
|
if (iova || size)
|
||||||
|
goto unlock;
|
||||||
|
size = SIZE_MAX;
|
||||||
|
} else if (!size || size & (pgsize - 1)) {
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!unmap->size || unmap->size & (pgsize - 1)) {
|
if (iova + size - 1 < iova || size > SIZE_MAX)
|
||||||
ret = -EINVAL;
|
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
|
||||||
|
|
||||||
if (unmap->iova + unmap->size - 1 < unmap->iova ||
|
|
||||||
unmap->size > SIZE_MAX) {
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto unlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* When dirty tracking is enabled, allow only min supported pgsize */
|
/* When dirty tracking is enabled, allow only min supported pgsize */
|
||||||
if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
|
if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
|
||||||
(!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) {
|
(!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) {
|
||||||
ret = -EINVAL;
|
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1150,21 +1260,25 @@ again:
|
|||||||
* will only return success and a size of zero if there were no
|
* will only return success and a size of zero if there were no
|
||||||
* mappings within the range.
|
* mappings within the range.
|
||||||
*/
|
*/
|
||||||
if (iommu->v2) {
|
if (iommu->v2 && !unmap_all) {
|
||||||
dma = vfio_find_dma(iommu, unmap->iova, 1);
|
dma = vfio_find_dma(iommu, iova, 1);
|
||||||
if (dma && dma->iova != unmap->iova) {
|
if (dma && dma->iova != iova)
|
||||||
ret = -EINVAL;
|
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
|
||||||
dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0);
|
dma = vfio_find_dma(iommu, iova + size - 1, 0);
|
||||||
if (dma && dma->iova + dma->size != unmap->iova + unmap->size) {
|
if (dma && dma->iova + dma->size != iova + size)
|
||||||
ret = -EINVAL;
|
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
|
ret = 0;
|
||||||
if (!iommu->v2 && unmap->iova > dma->iova)
|
n = first_n = vfio_find_dma_first_node(iommu, iova, size);
|
||||||
|
|
||||||
|
while (n) {
|
||||||
|
dma = rb_entry(n, struct vfio_dma, node);
|
||||||
|
if (dma->iova >= iova + size)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (!iommu->v2 && iova > dma->iova)
|
||||||
break;
|
break;
|
||||||
/*
|
/*
|
||||||
* Task with same address space who mapped this iova range is
|
* Task with same address space who mapped this iova range is
|
||||||
@ -1173,6 +1287,27 @@ again:
|
|||||||
if (dma->task->mm != current->mm)
|
if (dma->task->mm != current->mm)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
if (invalidate_vaddr) {
|
||||||
|
if (dma->vaddr_invalid) {
|
||||||
|
struct rb_node *last_n = n;
|
||||||
|
|
||||||
|
for (n = first_n; n != last_n; n = rb_next(n)) {
|
||||||
|
dma = rb_entry(n,
|
||||||
|
struct vfio_dma, node);
|
||||||
|
dma->vaddr_invalid = false;
|
||||||
|
iommu->vaddr_invalid_count--;
|
||||||
|
}
|
||||||
|
ret = -EINVAL;
|
||||||
|
unmapped = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
dma->vaddr_invalid = true;
|
||||||
|
iommu->vaddr_invalid_count++;
|
||||||
|
unmapped += dma->size;
|
||||||
|
n = rb_next(n);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
|
if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
|
||||||
struct vfio_iommu_type1_dma_unmap nb_unmap;
|
struct vfio_iommu_type1_dma_unmap nb_unmap;
|
||||||
|
|
||||||
@ -1202,12 +1337,13 @@ again:
|
|||||||
|
|
||||||
if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
|
if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
|
||||||
ret = update_user_bitmap(bitmap->data, iommu, dma,
|
ret = update_user_bitmap(bitmap->data, iommu, dma,
|
||||||
unmap->iova, pgsize);
|
iova, pgsize);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
unmapped += dma->size;
|
unmapped += dma->size;
|
||||||
|
n = rb_next(n);
|
||||||
vfio_remove_dma(iommu, dma);
|
vfio_remove_dma(iommu, dma);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1311,6 +1447,7 @@ static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
|
|||||||
static int vfio_dma_do_map(struct vfio_iommu *iommu,
|
static int vfio_dma_do_map(struct vfio_iommu *iommu,
|
||||||
struct vfio_iommu_type1_dma_map *map)
|
struct vfio_iommu_type1_dma_map *map)
|
||||||
{
|
{
|
||||||
|
bool set_vaddr = map->flags & VFIO_DMA_MAP_FLAG_VADDR;
|
||||||
dma_addr_t iova = map->iova;
|
dma_addr_t iova = map->iova;
|
||||||
unsigned long vaddr = map->vaddr;
|
unsigned long vaddr = map->vaddr;
|
||||||
size_t size = map->size;
|
size_t size = map->size;
|
||||||
@ -1328,13 +1465,16 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
|
|||||||
if (map->flags & VFIO_DMA_MAP_FLAG_READ)
|
if (map->flags & VFIO_DMA_MAP_FLAG_READ)
|
||||||
prot |= IOMMU_READ;
|
prot |= IOMMU_READ;
|
||||||
|
|
||||||
|
if ((prot && set_vaddr) || (!prot && !set_vaddr))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
mutex_lock(&iommu->lock);
|
mutex_lock(&iommu->lock);
|
||||||
|
|
||||||
pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap);
|
pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap);
|
||||||
|
|
||||||
WARN_ON((pgsize - 1) & PAGE_MASK);
|
WARN_ON((pgsize - 1) & PAGE_MASK);
|
||||||
|
|
||||||
if (!prot || !size || (size | iova | vaddr) & (pgsize - 1)) {
|
if (!size || (size | iova | vaddr) & (pgsize - 1)) {
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
@ -1345,7 +1485,21 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
|
|||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vfio_find_dma(iommu, iova, size)) {
|
dma = vfio_find_dma(iommu, iova, size);
|
||||||
|
if (set_vaddr) {
|
||||||
|
if (!dma) {
|
||||||
|
ret = -ENOENT;
|
||||||
|
} else if (!dma->vaddr_invalid || dma->iova != iova ||
|
||||||
|
dma->size != size) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
} else {
|
||||||
|
dma->vaddr = vaddr;
|
||||||
|
dma->vaddr_invalid = false;
|
||||||
|
iommu->vaddr_invalid_count--;
|
||||||
|
wake_up_all(&iommu->vaddr_wait);
|
||||||
|
}
|
||||||
|
goto out_unlock;
|
||||||
|
} else if (dma) {
|
||||||
ret = -EEXIST;
|
ret = -EEXIST;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
@ -1442,6 +1596,10 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
|
|||||||
unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
ret = vfio_wait_all_valid(iommu);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
/* Arbitrarily pick the first domain in the list for lookups */
|
/* Arbitrarily pick the first domain in the list for lookups */
|
||||||
if (!list_empty(&iommu->domain_list))
|
if (!list_empty(&iommu->domain_list))
|
||||||
d = list_first_entry(&iommu->domain_list,
|
d = list_first_entry(&iommu->domain_list,
|
||||||
@ -2417,8 +2575,10 @@ static void *vfio_iommu_type1_open(unsigned long arg)
|
|||||||
INIT_LIST_HEAD(&iommu->iova_list);
|
INIT_LIST_HEAD(&iommu->iova_list);
|
||||||
iommu->dma_list = RB_ROOT;
|
iommu->dma_list = RB_ROOT;
|
||||||
iommu->dma_avail = dma_entry_limit;
|
iommu->dma_avail = dma_entry_limit;
|
||||||
|
iommu->container_open = true;
|
||||||
mutex_init(&iommu->lock);
|
mutex_init(&iommu->lock);
|
||||||
BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
|
BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
|
||||||
|
init_waitqueue_head(&iommu->vaddr_wait);
|
||||||
|
|
||||||
return iommu;
|
return iommu;
|
||||||
}
|
}
|
||||||
@ -2487,6 +2647,8 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu,
|
|||||||
case VFIO_TYPE1_IOMMU:
|
case VFIO_TYPE1_IOMMU:
|
||||||
case VFIO_TYPE1v2_IOMMU:
|
case VFIO_TYPE1v2_IOMMU:
|
||||||
case VFIO_TYPE1_NESTING_IOMMU:
|
case VFIO_TYPE1_NESTING_IOMMU:
|
||||||
|
case VFIO_UNMAP_ALL:
|
||||||
|
case VFIO_UPDATE_VADDR:
|
||||||
return 1;
|
return 1;
|
||||||
case VFIO_DMA_CC_IOMMU:
|
case VFIO_DMA_CC_IOMMU:
|
||||||
if (!iommu)
|
if (!iommu)
|
||||||
@ -2658,7 +2820,8 @@ static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu,
|
|||||||
{
|
{
|
||||||
struct vfio_iommu_type1_dma_map map;
|
struct vfio_iommu_type1_dma_map map;
|
||||||
unsigned long minsz;
|
unsigned long minsz;
|
||||||
uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
|
uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE |
|
||||||
|
VFIO_DMA_MAP_FLAG_VADDR;
|
||||||
|
|
||||||
minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
|
minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
|
||||||
|
|
||||||
@ -2676,6 +2839,9 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
|
|||||||
{
|
{
|
||||||
struct vfio_iommu_type1_dma_unmap unmap;
|
struct vfio_iommu_type1_dma_unmap unmap;
|
||||||
struct vfio_bitmap bitmap = { 0 };
|
struct vfio_bitmap bitmap = { 0 };
|
||||||
|
uint32_t mask = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP |
|
||||||
|
VFIO_DMA_UNMAP_FLAG_VADDR |
|
||||||
|
VFIO_DMA_UNMAP_FLAG_ALL;
|
||||||
unsigned long minsz;
|
unsigned long minsz;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
@ -2684,8 +2850,12 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
|
|||||||
if (copy_from_user(&unmap, (void __user *)arg, minsz))
|
if (copy_from_user(&unmap, (void __user *)arg, minsz))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
if (unmap.argsz < minsz ||
|
if (unmap.argsz < minsz || unmap.flags & ~mask)
|
||||||
unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP)
|
return -EINVAL;
|
||||||
|
|
||||||
|
if ((unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
|
||||||
|
(unmap.flags & (VFIO_DMA_UNMAP_FLAG_ALL |
|
||||||
|
VFIO_DMA_UNMAP_FLAG_VADDR)))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
|
if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
|
||||||
@ -2876,12 +3046,13 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
|
|||||||
struct vfio_dma *dma;
|
struct vfio_dma *dma;
|
||||||
bool kthread = current->mm == NULL;
|
bool kthread = current->mm == NULL;
|
||||||
size_t offset;
|
size_t offset;
|
||||||
|
int ret;
|
||||||
|
|
||||||
*copied = 0;
|
*copied = 0;
|
||||||
|
|
||||||
dma = vfio_find_dma(iommu, user_iova, 1);
|
ret = vfio_find_dma_valid(iommu, user_iova, 1, &dma);
|
||||||
if (!dma)
|
if (ret < 0)
|
||||||
return -EINVAL;
|
return ret;
|
||||||
|
|
||||||
if ((write && !(dma->prot & IOMMU_WRITE)) ||
|
if ((write && !(dma->prot & IOMMU_WRITE)) ||
|
||||||
!(dma->prot & IOMMU_READ))
|
!(dma->prot & IOMMU_READ))
|
||||||
@ -2973,6 +3144,19 @@ vfio_iommu_type1_group_iommu_domain(void *iommu_data,
|
|||||||
return domain;
|
return domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void vfio_iommu_type1_notify(void *iommu_data,
|
||||||
|
enum vfio_iommu_notify_type event)
|
||||||
|
{
|
||||||
|
struct vfio_iommu *iommu = iommu_data;
|
||||||
|
|
||||||
|
if (event != VFIO_IOMMU_CONTAINER_CLOSE)
|
||||||
|
return;
|
||||||
|
mutex_lock(&iommu->lock);
|
||||||
|
iommu->container_open = false;
|
||||||
|
mutex_unlock(&iommu->lock);
|
||||||
|
wake_up_all(&iommu->vaddr_wait);
|
||||||
|
}
|
||||||
|
|
||||||
static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
|
static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
|
||||||
.name = "vfio-iommu-type1",
|
.name = "vfio-iommu-type1",
|
||||||
.owner = THIS_MODULE,
|
.owner = THIS_MODULE,
|
||||||
@ -2987,6 +3171,7 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
|
|||||||
.unregister_notifier = vfio_iommu_type1_unregister_notifier,
|
.unregister_notifier = vfio_iommu_type1_unregister_notifier,
|
||||||
.dma_rw = vfio_iommu_type1_dma_rw,
|
.dma_rw = vfio_iommu_type1_dma_rw,
|
||||||
.group_iommu_domain = vfio_iommu_type1_group_iommu_domain,
|
.group_iommu_domain = vfio_iommu_type1_group_iommu_domain,
|
||||||
|
.notify = vfio_iommu_type1_notify,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int __init vfio_iommu_type1_init(void)
|
static int __init vfio_iommu_type1_init(void)
|
||||||
|
@ -57,6 +57,11 @@ extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
|
|||||||
extern void vfio_device_put(struct vfio_device *device);
|
extern void vfio_device_put(struct vfio_device *device);
|
||||||
extern void *vfio_device_data(struct vfio_device *device);
|
extern void *vfio_device_data(struct vfio_device *device);
|
||||||
|
|
||||||
|
/* events for the backend driver notify callback */
|
||||||
|
enum vfio_iommu_notify_type {
|
||||||
|
VFIO_IOMMU_CONTAINER_CLOSE = 0,
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
|
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
|
||||||
*/
|
*/
|
||||||
@ -92,6 +97,8 @@ struct vfio_iommu_driver_ops {
|
|||||||
void *data, size_t count, bool write);
|
void *data, size_t count, bool write);
|
||||||
struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
|
struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
|
||||||
struct iommu_group *group);
|
struct iommu_group *group);
|
||||||
|
void (*notify)(void *iommu_data,
|
||||||
|
enum vfio_iommu_notify_type event);
|
||||||
};
|
};
|
||||||
|
|
||||||
extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
|
extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
|
||||||
|
@ -46,6 +46,12 @@
|
|||||||
*/
|
*/
|
||||||
#define VFIO_NOIOMMU_IOMMU 8
|
#define VFIO_NOIOMMU_IOMMU 8
|
||||||
|
|
||||||
|
/* Supports VFIO_DMA_UNMAP_FLAG_ALL */
|
||||||
|
#define VFIO_UNMAP_ALL 9
|
||||||
|
|
||||||
|
/* Supports the vaddr flag for DMA map and unmap */
|
||||||
|
#define VFIO_UPDATE_VADDR 10
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The IOCTL interface is designed for extensibility by embedding the
|
* The IOCTL interface is designed for extensibility by embedding the
|
||||||
* structure length (argsz) and flags into structures passed between
|
* structure length (argsz) and flags into structures passed between
|
||||||
@ -1074,12 +1080,22 @@ struct vfio_iommu_type1_info_dma_avail {
|
|||||||
*
|
*
|
||||||
* Map process virtual addresses to IO virtual addresses using the
|
* Map process virtual addresses to IO virtual addresses using the
|
||||||
* provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
|
* provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
|
||||||
|
*
|
||||||
|
* If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and
|
||||||
|
* unblock translation of host virtual addresses in the iova range. The vaddr
|
||||||
|
* must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To
|
||||||
|
* maintain memory consistency within the user application, the updated vaddr
|
||||||
|
* must address the same memory object as originally mapped. Failure to do so
|
||||||
|
* will result in user memory corruption and/or device misbehavior. iova and
|
||||||
|
* size must match those in the original MAP_DMA call. Protection is not
|
||||||
|
* changed, and the READ & WRITE flags must be 0.
|
||||||
*/
|
*/
|
||||||
struct vfio_iommu_type1_dma_map {
|
struct vfio_iommu_type1_dma_map {
|
||||||
__u32 argsz;
|
__u32 argsz;
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
|
#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
|
||||||
#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
|
#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
|
||||||
|
#define VFIO_DMA_MAP_FLAG_VADDR (1 << 2)
|
||||||
__u64 vaddr; /* Process virtual address */
|
__u64 vaddr; /* Process virtual address */
|
||||||
__u64 iova; /* IO virtual address */
|
__u64 iova; /* IO virtual address */
|
||||||
__u64 size; /* Size of mapping (bytes) */
|
__u64 size; /* Size of mapping (bytes) */
|
||||||
@ -1102,6 +1118,7 @@ struct vfio_bitmap {
|
|||||||
* field. No guarantee is made to the user that arbitrary unmaps of iova
|
* field. No guarantee is made to the user that arbitrary unmaps of iova
|
||||||
* or size different from those used in the original mapping call will
|
* or size different from those used in the original mapping call will
|
||||||
* succeed.
|
* succeed.
|
||||||
|
*
|
||||||
* VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
|
* VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
|
||||||
* before unmapping IO virtual addresses. When this flag is set, the user must
|
* before unmapping IO virtual addresses. When this flag is set, the user must
|
||||||
* provide a struct vfio_bitmap in data[]. User must provide zero-allocated
|
* provide a struct vfio_bitmap in data[]. User must provide zero-allocated
|
||||||
@ -1111,11 +1128,21 @@ struct vfio_bitmap {
|
|||||||
* indicates that the page at that offset from iova is dirty. A Bitmap of the
|
* indicates that the page at that offset from iova is dirty. A Bitmap of the
|
||||||
* pages in the range of unmapped size is returned in the user-provided
|
* pages in the range of unmapped size is returned in the user-provided
|
||||||
* vfio_bitmap.data.
|
* vfio_bitmap.data.
|
||||||
|
*
|
||||||
|
* If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses. iova and size
|
||||||
|
* must be 0. This cannot be combined with the get-dirty-bitmap flag.
|
||||||
|
*
|
||||||
|
* If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host
|
||||||
|
* virtual addresses in the iova range. Tasks that attempt to translate an
|
||||||
|
* iova's vaddr will block. DMA to already-mapped pages continues. This
|
||||||
|
* cannot be combined with the get-dirty-bitmap flag.
|
||||||
*/
|
*/
|
||||||
struct vfio_iommu_type1_dma_unmap {
|
struct vfio_iommu_type1_dma_unmap {
|
||||||
__u32 argsz;
|
__u32 argsz;
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
|
#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
|
||||||
|
#define VFIO_DMA_UNMAP_FLAG_ALL (1 << 1)
|
||||||
|
#define VFIO_DMA_UNMAP_FLAG_VADDR (1 << 2)
|
||||||
__u64 iova; /* IO virtual address */
|
__u64 iova; /* IO virtual address */
|
||||||
__u64 size; /* Size of mapping (bytes) */
|
__u64 size; /* Size of mapping (bytes) */
|
||||||
__u8 data[];
|
__u8 data[];
|
||||||
|
Loading…
Reference in New Issue
Block a user