freedreno/drm/virtio: Pre-allocate cmdstream buffers

We know cmdstream buffers are immediately mmap'd, which is both
expensive on the host, and breaks the pipelining as guest is forced
to stall waiting for the host.  So pre-allocate some cmdestream
buffers, so that we have something that is (hopefully) already
allocated and mapped to guest's physical memory before we need it.
The older buffer from the head of the prealloc list replaces the
newly allocated buffer which is pushed to the tail of the prealloc
list.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19656>
This commit is contained in:
Rob Clark 2022-11-10 07:36:22 -08:00 committed by Marge Bot
parent 1bb0cd6682
commit 5a6f95db13
7 changed files with 51 additions and 5 deletions

View File

@ -150,8 +150,7 @@ _fd_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap)
struct fd_bo *
fd_bo_new_ring(struct fd_device *dev, uint32_t size)
{
uint32_t flags = FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT;
struct fd_bo *bo = bo_new(dev, size, flags, &dev->ring_cache);
struct fd_bo *bo = bo_new(dev, size, RING_FLAGS, &dev->ring_cache);
if (bo) {
bo->bo_reuse = RING_CACHE;
bo->reloc_flags |= FD_RELOC_DUMP;

View File

@ -108,8 +108,9 @@ struct fd_fence {
/* Hint that the bo will be exported/shared: */
#define FD_BO_SHARED BITSET_BIT(5)
/* backend private bo flags: */
/* internal bo flags: */
#define _FD_BO_VIRTIO_SHM BITSET_BIT(6)
#define _FD_BO_RING BITSET_BIT(7)
/* bo access flags: (keep aligned to MSM_PREP_x) */
#define FD_BO_PREP_READ BITSET_BIT(0)

View File

@ -54,6 +54,7 @@
extern simple_mtx_t table_lock;
#define SUBALLOC_SIZE (32 * 1024)
#define RING_FLAGS (FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT | _FD_BO_RING)
/*
* Stupid/simple growable array implementation:

View File

@ -372,8 +372,8 @@ fail:
}
/* allocate a buffer object: */
struct fd_bo *
virtio_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags)
static struct fd_bo *
virtio_bo_new_impl(struct fd_device *dev, uint32_t size, uint32_t flags)
{
struct virtio_device *virtio_dev = to_virtio_device(dev);
struct drm_virtgpu_resource_create_blob args = {
@ -449,3 +449,38 @@ fail:
}
return NULL;
}
struct fd_bo *
virtio_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags)
{
struct fd_bo *bo = virtio_bo_new_impl(dev, size, flags);
if (bo && (flags == RING_FLAGS) && (size == SUBALLOC_SIZE)) {
struct virtio_device *virtio_dev = to_virtio_device(dev);
/*
* Swap the bo with an earlier pre-allocated one, since we know
* this one will be immediately mmap'd and want to avoid the
* latency hit of waiting for the host to catch up.
*/
simple_mtx_lock(&virtio_dev->eb_lock);
list_addtail(&bo->list, &virtio_dev->prealloc_list);
bo = list_first_entry(&virtio_dev->prealloc_list, struct fd_bo, list);
list_delinit(&bo->list);
simple_mtx_unlock(&virtio_dev->eb_lock);
}
return bo;
}
void virtio_bo_setup_prealloc(struct fd_device *dev)
{
struct virtio_device *virtio_dev = to_virtio_device(dev);
for (int i = 0; i < 16; i++) {
struct fd_bo *bo = virtio_bo_new_impl(dev, SUBALLOC_SIZE, RING_FLAGS);
if (!bo)
break;
list_addtail(&bo->list, &virtio_dev->prealloc_list);
}
}

View File

@ -204,6 +204,8 @@ virtio_device_new(int fd, drmVersionPtr version)
set_debuginfo(dev);
list_inithead(&virtio_dev->prealloc_list);
util_vma_heap_init(&virtio_dev->address_space,
caps.u.msm.va_start,
caps.u.msm.va_size);

View File

@ -220,6 +220,8 @@ init_shmem(struct fd_device *dev)
uint32_t offset = virtio_dev->shmem->rsp_mem_offset;
virtio_dev->rsp_mem_len = fd_bo_size(virtio_dev->shmem_bo) - offset;
virtio_dev->rsp_mem = &((uint8_t *)virtio_dev->shmem)[offset];
virtio_bo_setup_prealloc(dev);
}
simple_mtx_unlock(&virtio_dev->rsp_lock);

View File

@ -81,6 +81,11 @@ struct virtio_device {
struct util_vma_heap address_space;
simple_mtx_t address_space_lock;
/**
* Pre-allocated cmdstream buffers to avoid stalling on mmap.
*/
struct list_head prealloc_list;
uint32_t reqbuf_len;
uint32_t reqbuf_cnt;
uint8_t reqbuf[0x4000];
@ -182,6 +187,7 @@ FD_DEFINE_CAST(fd_bo, virtio_bo);
struct fd_bo *virtio_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags);
struct fd_bo *virtio_bo_from_handle(struct fd_device *dev, uint32_t size,
uint32_t handle);
void virtio_bo_setup_prealloc(struct fd_device *dev);
/*
* Internal helpers: