From 5a6f95db135a0f52859088a83a960fde528e0980 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 10 Nov 2022 07:36:22 -0800 Subject: [PATCH] freedreno/drm/virtio: Pre-allocate cmdstream buffers We know cmdstream buffers are immediately mmap'd, which is both expensive on the host, and breaks the pipelining as guest is forced to stall waiting for the host. So pre-allocate some cmdestream buffers, so that we have something that is (hopefully) already allocated and mapped to guest's physical memory before we need it. The older buffer from the head of the prealloc list replaces the newly allocated buffer which is pushed to the tail of the prealloc list. Signed-off-by: Rob Clark Part-of: --- src/freedreno/drm/freedreno_bo.c | 3 +- src/freedreno/drm/freedreno_drmif.h | 3 +- src/freedreno/drm/freedreno_priv.h | 1 + src/freedreno/drm/virtio/virtio_bo.c | 39 ++++++++++++++++++++++-- src/freedreno/drm/virtio/virtio_device.c | 2 ++ src/freedreno/drm/virtio/virtio_pipe.c | 2 ++ src/freedreno/drm/virtio/virtio_priv.h | 6 ++++ 7 files changed, 51 insertions(+), 5 deletions(-) diff --git a/src/freedreno/drm/freedreno_bo.c b/src/freedreno/drm/freedreno_bo.c index fa63709e37c..78285108d5a 100644 --- a/src/freedreno/drm/freedreno_bo.c +++ b/src/freedreno/drm/freedreno_bo.c @@ -150,8 +150,7 @@ _fd_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap) struct fd_bo * fd_bo_new_ring(struct fd_device *dev, uint32_t size) { - uint32_t flags = FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT; - struct fd_bo *bo = bo_new(dev, size, flags, &dev->ring_cache); + struct fd_bo *bo = bo_new(dev, size, RING_FLAGS, &dev->ring_cache); if (bo) { bo->bo_reuse = RING_CACHE; bo->reloc_flags |= FD_RELOC_DUMP; diff --git a/src/freedreno/drm/freedreno_drmif.h b/src/freedreno/drm/freedreno_drmif.h index 616b5b29d3a..d4dd3f64a05 100644 --- a/src/freedreno/drm/freedreno_drmif.h +++ b/src/freedreno/drm/freedreno_drmif.h @@ -108,8 +108,9 @@ struct fd_fence { /* Hint that the bo will be exported/shared: */ #define FD_BO_SHARED BITSET_BIT(5) -/* backend private bo flags: */ +/* internal bo flags: */ #define _FD_BO_VIRTIO_SHM BITSET_BIT(6) +#define _FD_BO_RING BITSET_BIT(7) /* bo access flags: (keep aligned to MSM_PREP_x) */ #define FD_BO_PREP_READ BITSET_BIT(0) diff --git a/src/freedreno/drm/freedreno_priv.h b/src/freedreno/drm/freedreno_priv.h index a5184dc22cc..827ed1c3d27 100644 --- a/src/freedreno/drm/freedreno_priv.h +++ b/src/freedreno/drm/freedreno_priv.h @@ -54,6 +54,7 @@ extern simple_mtx_t table_lock; #define SUBALLOC_SIZE (32 * 1024) +#define RING_FLAGS (FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT | _FD_BO_RING) /* * Stupid/simple growable array implementation: diff --git a/src/freedreno/drm/virtio/virtio_bo.c b/src/freedreno/drm/virtio/virtio_bo.c index 7636f3948d1..52a1f016ff3 100644 --- a/src/freedreno/drm/virtio/virtio_bo.c +++ b/src/freedreno/drm/virtio/virtio_bo.c @@ -372,8 +372,8 @@ fail: } /* allocate a buffer object: */ -struct fd_bo * -virtio_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags) +static struct fd_bo * +virtio_bo_new_impl(struct fd_device *dev, uint32_t size, uint32_t flags) { struct virtio_device *virtio_dev = to_virtio_device(dev); struct drm_virtgpu_resource_create_blob args = { @@ -449,3 +449,38 @@ fail: } return NULL; } + +struct fd_bo * +virtio_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags) +{ + struct fd_bo *bo = virtio_bo_new_impl(dev, size, flags); + + if (bo && (flags == RING_FLAGS) && (size == SUBALLOC_SIZE)) { + struct virtio_device *virtio_dev = to_virtio_device(dev); + + /* + * Swap the bo with an earlier pre-allocated one, since we know + * this one will be immediately mmap'd and want to avoid the + * latency hit of waiting for the host to catch up. + */ + simple_mtx_lock(&virtio_dev->eb_lock); + list_addtail(&bo->list, &virtio_dev->prealloc_list); + bo = list_first_entry(&virtio_dev->prealloc_list, struct fd_bo, list); + list_delinit(&bo->list); + simple_mtx_unlock(&virtio_dev->eb_lock); + } + + return bo; +} + +void virtio_bo_setup_prealloc(struct fd_device *dev) +{ + struct virtio_device *virtio_dev = to_virtio_device(dev); + + for (int i = 0; i < 16; i++) { + struct fd_bo *bo = virtio_bo_new_impl(dev, SUBALLOC_SIZE, RING_FLAGS); + if (!bo) + break; + list_addtail(&bo->list, &virtio_dev->prealloc_list); + } +} diff --git a/src/freedreno/drm/virtio/virtio_device.c b/src/freedreno/drm/virtio/virtio_device.c index 8a09e36cb29..333565016dd 100644 --- a/src/freedreno/drm/virtio/virtio_device.c +++ b/src/freedreno/drm/virtio/virtio_device.c @@ -204,6 +204,8 @@ virtio_device_new(int fd, drmVersionPtr version) set_debuginfo(dev); + list_inithead(&virtio_dev->prealloc_list); + util_vma_heap_init(&virtio_dev->address_space, caps.u.msm.va_start, caps.u.msm.va_size); diff --git a/src/freedreno/drm/virtio/virtio_pipe.c b/src/freedreno/drm/virtio/virtio_pipe.c index bb54442cbfb..f109cfb57d2 100644 --- a/src/freedreno/drm/virtio/virtio_pipe.c +++ b/src/freedreno/drm/virtio/virtio_pipe.c @@ -220,6 +220,8 @@ init_shmem(struct fd_device *dev) uint32_t offset = virtio_dev->shmem->rsp_mem_offset; virtio_dev->rsp_mem_len = fd_bo_size(virtio_dev->shmem_bo) - offset; virtio_dev->rsp_mem = &((uint8_t *)virtio_dev->shmem)[offset]; + + virtio_bo_setup_prealloc(dev); } simple_mtx_unlock(&virtio_dev->rsp_lock); diff --git a/src/freedreno/drm/virtio/virtio_priv.h b/src/freedreno/drm/virtio/virtio_priv.h index c210a2b05b7..d453d15c26c 100644 --- a/src/freedreno/drm/virtio/virtio_priv.h +++ b/src/freedreno/drm/virtio/virtio_priv.h @@ -81,6 +81,11 @@ struct virtio_device { struct util_vma_heap address_space; simple_mtx_t address_space_lock; + /** + * Pre-allocated cmdstream buffers to avoid stalling on mmap. + */ + struct list_head prealloc_list; + uint32_t reqbuf_len; uint32_t reqbuf_cnt; uint8_t reqbuf[0x4000]; @@ -182,6 +187,7 @@ FD_DEFINE_CAST(fd_bo, virtio_bo); struct fd_bo *virtio_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags); struct fd_bo *virtio_bo_from_handle(struct fd_device *dev, uint32_t size, uint32_t handle); +void virtio_bo_setup_prealloc(struct fd_device *dev); /* * Internal helpers: