linux/drivers/vdpa/vdpa_user/iova_domain.c
Maxime Coquelin d7b4e3287c vduse: implement DMA sync callbacks
Since commit 295525e29a ("virtio_net: merge dma
operations when filling mergeable buffers"), VDUSE device
require support for DMA's .sync_single_for_cpu() operation
as the memory is non-coherent between the device and CPU
because of the use of a bounce buffer.

This patch implements both .sync_single_for_cpu() and
.sync_single_for_device() callbacks, and also skip bounce
buffer copies during DMA map and unmap operations if the
DMA_ATTR_SKIP_CPU_SYNC attribute is set to avoid extra
copies of the same buffer.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Message-Id: <20240219170606.587290-1-maxime.coquelin@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-03-19 02:45:49 -04:00

646 lines
16 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* MMU-based software IOTLB.
*
* Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
*
* Author: Xie Yongji <xieyongji@bytedance.com>
*
*/
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/anon_inodes.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <linux/vdpa.h>
#include "iova_domain.h"
static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
u64 start, u64 last,
u64 addr, unsigned int perm,
struct file *file, u64 offset)
{
struct vdpa_map_file *map_file;
int ret;
map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
if (!map_file)
return -ENOMEM;
map_file->file = get_file(file);
map_file->offset = offset;
ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
addr, perm, map_file);
if (ret) {
fput(map_file->file);
kfree(map_file);
return ret;
}
return 0;
}
static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
u64 start, u64 last)
{
struct vdpa_map_file *map_file;
struct vhost_iotlb_map *map;
while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
map_file = (struct vdpa_map_file *)map->opaque;
fput(map_file->file);
kfree(map_file);
vhost_iotlb_map_free(domain->iotlb, map);
}
}
int vduse_domain_set_map(struct vduse_iova_domain *domain,
struct vhost_iotlb *iotlb)
{
struct vdpa_map_file *map_file;
struct vhost_iotlb_map *map;
u64 start = 0ULL, last = ULLONG_MAX;
int ret;
spin_lock(&domain->iotlb_lock);
vduse_iotlb_del_range(domain, start, last);
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
map = vhost_iotlb_itree_next(map, start, last)) {
map_file = (struct vdpa_map_file *)map->opaque;
ret = vduse_iotlb_add_range(domain, map->start, map->last,
map->addr, map->perm,
map_file->file,
map_file->offset);
if (ret)
goto err;
}
spin_unlock(&domain->iotlb_lock);
return 0;
err:
vduse_iotlb_del_range(domain, start, last);
spin_unlock(&domain->iotlb_lock);
return ret;
}
void vduse_domain_clear_map(struct vduse_iova_domain *domain,
struct vhost_iotlb *iotlb)
{
struct vhost_iotlb_map *map;
u64 start = 0ULL, last = ULLONG_MAX;
spin_lock(&domain->iotlb_lock);
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
map = vhost_iotlb_itree_next(map, start, last)) {
vduse_iotlb_del_range(domain, map->start, map->last);
}
spin_unlock(&domain->iotlb_lock);
}
static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
u64 iova, u64 size, u64 paddr)
{
struct vduse_bounce_map *map;
u64 last = iova + size - 1;
while (iova <= last) {
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
if (!map->bounce_page) {
map->bounce_page = alloc_page(GFP_ATOMIC);
if (!map->bounce_page)
return -ENOMEM;
}
map->orig_phys = paddr;
paddr += PAGE_SIZE;
iova += PAGE_SIZE;
}
return 0;
}
static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
u64 iova, u64 size)
{
struct vduse_bounce_map *map;
u64 last = iova + size - 1;
while (iova <= last) {
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
map->orig_phys = INVALID_PHYS_ADDR;
iova += PAGE_SIZE;
}
}
static void do_bounce(phys_addr_t orig, void *addr, size_t size,
enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(orig);
unsigned int offset = offset_in_page(orig);
struct page *page;
unsigned int sz = 0;
while (size) {
sz = min_t(size_t, PAGE_SIZE - offset, size);
page = pfn_to_page(pfn);
if (dir == DMA_TO_DEVICE)
memcpy_from_page(addr, page, offset, sz);
else
memcpy_to_page(page, offset, addr, sz);
size -= sz;
pfn++;
addr += sz;
offset = 0;
}
}
static void vduse_domain_bounce(struct vduse_iova_domain *domain,
dma_addr_t iova, size_t size,
enum dma_data_direction dir)
{
struct vduse_bounce_map *map;
unsigned int offset;
void *addr;
size_t sz;
if (iova >= domain->bounce_size)
return;
while (size) {
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
offset = offset_in_page(iova);
sz = min_t(size_t, PAGE_SIZE - offset, size);
if (WARN_ON(!map->bounce_page ||
map->orig_phys == INVALID_PHYS_ADDR))
return;
addr = kmap_local_page(map->bounce_page);
do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
kunmap_local(addr);
size -= sz;
iova += sz;
}
}
static struct page *
vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
{
u64 start = iova & PAGE_MASK;
u64 last = start + PAGE_SIZE - 1;
struct vhost_iotlb_map *map;
struct page *page = NULL;
spin_lock(&domain->iotlb_lock);
map = vhost_iotlb_itree_first(domain->iotlb, start, last);
if (!map)
goto out;
page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
get_page(page);
out:
spin_unlock(&domain->iotlb_lock);
return page;
}
static struct page *
vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
{
struct vduse_bounce_map *map;
struct page *page = NULL;
read_lock(&domain->bounce_lock);
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
if (domain->user_bounce_pages || !map->bounce_page)
goto out;
page = map->bounce_page;
get_page(page);
out:
read_unlock(&domain->bounce_lock);
return page;
}
static void
vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
{
struct vduse_bounce_map *map;
unsigned long pfn, bounce_pfns;
bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
for (pfn = 0; pfn < bounce_pfns; pfn++) {
map = &domain->bounce_maps[pfn];
if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
continue;
if (!map->bounce_page)
continue;
__free_page(map->bounce_page);
map->bounce_page = NULL;
}
}
int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
struct page **pages, int count)
{
struct vduse_bounce_map *map;
int i, ret;
/* Now we don't support partial mapping */
if (count != (domain->bounce_size >> PAGE_SHIFT))
return -EINVAL;
write_lock(&domain->bounce_lock);
ret = -EEXIST;
if (domain->user_bounce_pages)
goto out;
for (i = 0; i < count; i++) {
map = &domain->bounce_maps[i];
if (map->bounce_page) {
/* Copy kernel page to user page if it's in use */
if (map->orig_phys != INVALID_PHYS_ADDR)
memcpy_to_page(pages[i], 0,
page_address(map->bounce_page),
PAGE_SIZE);
__free_page(map->bounce_page);
}
map->bounce_page = pages[i];
get_page(pages[i]);
}
domain->user_bounce_pages = true;
ret = 0;
out:
write_unlock(&domain->bounce_lock);
return ret;
}
void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
{
struct vduse_bounce_map *map;
unsigned long i, count;
write_lock(&domain->bounce_lock);
if (!domain->user_bounce_pages)
goto out;
count = domain->bounce_size >> PAGE_SHIFT;
for (i = 0; i < count; i++) {
struct page *page = NULL;
map = &domain->bounce_maps[i];
if (WARN_ON(!map->bounce_page))
continue;
/* Copy user page to kernel page if it's in use */
if (map->orig_phys != INVALID_PHYS_ADDR) {
page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
memcpy_from_page(page_address(page),
map->bounce_page, 0, PAGE_SIZE);
}
put_page(map->bounce_page);
map->bounce_page = page;
}
domain->user_bounce_pages = false;
out:
write_unlock(&domain->bounce_lock);
}
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
{
if (!domain->bounce_map)
return;
spin_lock(&domain->iotlb_lock);
if (!domain->bounce_map)
goto unlock;
vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
domain->bounce_map = 0;
unlock:
spin_unlock(&domain->iotlb_lock);
}
static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
{
int ret = 0;
if (domain->bounce_map)
return 0;
spin_lock(&domain->iotlb_lock);
if (domain->bounce_map)
goto unlock;
ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
0, VHOST_MAP_RW, domain->file, 0);
if (ret)
goto unlock;
domain->bounce_map = 1;
unlock:
spin_unlock(&domain->iotlb_lock);
return ret;
}
static dma_addr_t
vduse_domain_alloc_iova(struct iova_domain *iovad,
unsigned long size, unsigned long limit)
{
unsigned long shift = iova_shift(iovad);
unsigned long iova_len = iova_align(iovad, size) >> shift;
unsigned long iova_pfn;
iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
return (dma_addr_t)iova_pfn << shift;
}
static void vduse_domain_free_iova(struct iova_domain *iovad,
dma_addr_t iova, size_t size)
{
unsigned long shift = iova_shift(iovad);
unsigned long iova_len = iova_align(iovad, size) >> shift;
free_iova_fast(iovad, iova >> shift, iova_len);
}
void vduse_domain_sync_single_for_device(struct vduse_iova_domain *domain,
dma_addr_t dma_addr, size_t size,
enum dma_data_direction dir)
{
read_lock(&domain->bounce_lock);
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
vduse_domain_bounce(domain, dma_addr, size, DMA_TO_DEVICE);
read_unlock(&domain->bounce_lock);
}
void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain *domain,
dma_addr_t dma_addr, size_t size,
enum dma_data_direction dir)
{
read_lock(&domain->bounce_lock);
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
read_unlock(&domain->bounce_lock);
}
dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
struct page *page, unsigned long offset,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
struct iova_domain *iovad = &domain->stream_iovad;
unsigned long limit = domain->bounce_size - 1;
phys_addr_t pa = page_to_phys(page) + offset;
dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
if (!iova)
return DMA_MAPPING_ERROR;
if (vduse_domain_init_bounce_map(domain))
goto err;
read_lock(&domain->bounce_lock);
if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
goto err_unlock;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
read_unlock(&domain->bounce_lock);
return iova;
err_unlock:
read_unlock(&domain->bounce_lock);
err:
vduse_domain_free_iova(iovad, iova, size);
return DMA_MAPPING_ERROR;
}
void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
dma_addr_t dma_addr, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
struct iova_domain *iovad = &domain->stream_iovad;
read_lock(&domain->bounce_lock);
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
read_unlock(&domain->bounce_lock);
vduse_domain_free_iova(iovad, dma_addr, size);
}
void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
size_t size, dma_addr_t *dma_addr,
gfp_t flag, unsigned long attrs)
{
struct iova_domain *iovad = &domain->consistent_iovad;
unsigned long limit = domain->iova_limit;
dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
void *orig = alloc_pages_exact(size, flag);
if (!iova || !orig)
goto err;
spin_lock(&domain->iotlb_lock);
if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
virt_to_phys(orig), VHOST_MAP_RW,
domain->file, (u64)iova)) {
spin_unlock(&domain->iotlb_lock);
goto err;
}
spin_unlock(&domain->iotlb_lock);
*dma_addr = iova;
return orig;
err:
*dma_addr = DMA_MAPPING_ERROR;
if (orig)
free_pages_exact(orig, size);
if (iova)
vduse_domain_free_iova(iovad, iova, size);
return NULL;
}
void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
void *vaddr, dma_addr_t dma_addr,
unsigned long attrs)
{
struct iova_domain *iovad = &domain->consistent_iovad;
struct vhost_iotlb_map *map;
struct vdpa_map_file *map_file;
phys_addr_t pa;
spin_lock(&domain->iotlb_lock);
map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
(u64)dma_addr + size - 1);
if (WARN_ON(!map)) {
spin_unlock(&domain->iotlb_lock);
return;
}
map_file = (struct vdpa_map_file *)map->opaque;
fput(map_file->file);
kfree(map_file);
pa = map->addr;
vhost_iotlb_map_free(domain->iotlb, map);
spin_unlock(&domain->iotlb_lock);
vduse_domain_free_iova(iovad, dma_addr, size);
free_pages_exact(phys_to_virt(pa), size);
}
static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
{
struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
unsigned long iova = vmf->pgoff << PAGE_SHIFT;
struct page *page;
if (!domain)
return VM_FAULT_SIGBUS;
if (iova < domain->bounce_size)
page = vduse_domain_get_bounce_page(domain, iova);
else
page = vduse_domain_get_coherent_page(domain, iova);
if (!page)
return VM_FAULT_SIGBUS;
vmf->page = page;
return 0;
}
static const struct vm_operations_struct vduse_domain_mmap_ops = {
.fault = vduse_domain_mmap_fault,
};
static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
{
struct vduse_iova_domain *domain = file->private_data;
vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND);
vma->vm_private_data = domain;
vma->vm_ops = &vduse_domain_mmap_ops;
return 0;
}
static int vduse_domain_release(struct inode *inode, struct file *file)
{
struct vduse_iova_domain *domain = file->private_data;
spin_lock(&domain->iotlb_lock);
vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
vduse_domain_remove_user_bounce_pages(domain);
vduse_domain_free_kernel_bounce_pages(domain);
spin_unlock(&domain->iotlb_lock);
put_iova_domain(&domain->stream_iovad);
put_iova_domain(&domain->consistent_iovad);
vhost_iotlb_free(domain->iotlb);
vfree(domain->bounce_maps);
kfree(domain);
return 0;
}
static const struct file_operations vduse_domain_fops = {
.owner = THIS_MODULE,
.mmap = vduse_domain_mmap,
.release = vduse_domain_release,
};
void vduse_domain_destroy(struct vduse_iova_domain *domain)
{
fput(domain->file);
}
struct vduse_iova_domain *
vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
{
struct vduse_iova_domain *domain;
struct file *file;
struct vduse_bounce_map *map;
unsigned long pfn, bounce_pfns;
int ret;
bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
if (iova_limit <= bounce_size)
return NULL;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
domain->iotlb = vhost_iotlb_alloc(0, 0);
if (!domain->iotlb)
goto err_iotlb;
domain->iova_limit = iova_limit;
domain->bounce_size = PAGE_ALIGN(bounce_size);
domain->bounce_maps = vzalloc(bounce_pfns *
sizeof(struct vduse_bounce_map));
if (!domain->bounce_maps)
goto err_map;
for (pfn = 0; pfn < bounce_pfns; pfn++) {
map = &domain->bounce_maps[pfn];
map->orig_phys = INVALID_PHYS_ADDR;
}
file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
domain, O_RDWR);
if (IS_ERR(file))
goto err_file;
domain->file = file;
rwlock_init(&domain->bounce_lock);
spin_lock_init(&domain->iotlb_lock);
init_iova_domain(&domain->stream_iovad,
PAGE_SIZE, IOVA_START_PFN);
ret = iova_domain_init_rcaches(&domain->stream_iovad);
if (ret)
goto err_iovad_stream;
init_iova_domain(&domain->consistent_iovad,
PAGE_SIZE, bounce_pfns);
ret = iova_domain_init_rcaches(&domain->consistent_iovad);
if (ret)
goto err_iovad_consistent;
return domain;
err_iovad_consistent:
put_iova_domain(&domain->stream_iovad);
err_iovad_stream:
fput(file);
err_file:
vfree(domain->bounce_maps);
err_map:
vhost_iotlb_free(domain->iotlb);
err_iotlb:
kfree(domain);
return NULL;
}
int vduse_domain_init(void)
{
return iova_cache_get();
}
void vduse_domain_exit(void)
{
iova_cache_put();
}