From 89a9157e1253bb3384a7596cb51bf1ceeac063ed Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:18 +0800 Subject: [PATCH 01/13] virtio: add packed ring types and macros Add types and macros for packed ring. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- include/uapi/linux/virtio_config.h | 3 ++ include/uapi/linux/virtio_ring.h | 52 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 449132c76b1c..1196e1c1d4f6 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -75,6 +75,9 @@ */ #define VIRTIO_F_IOMMU_PLATFORM 33 +/* This feature indicates support for the packed virtqueue layout. */ +#define VIRTIO_F_RING_PACKED 34 + /* * Does the device support Single Root I/O Virtualization? */ diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 6d5d5faa989b..2414f8af26b3 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -44,6 +44,13 @@ /* This means the buffer contains a list of buffer descriptors. */ #define VRING_DESC_F_INDIRECT 4 +/* + * Mark a descriptor as available or used in packed ring. + * Notice: they are defined as shifts instead of shifted values. + */ +#define VRING_PACKED_DESC_F_AVAIL 7 +#define VRING_PACKED_DESC_F_USED 15 + /* The Host uses this in used->flags to advise the Guest: don't kick me when * you add a buffer. It's unreliable, so it's simply an optimization. Guest * will still kick if it's out of buffers. */ @@ -53,6 +60,23 @@ * optimization. */ #define VRING_AVAIL_F_NO_INTERRUPT 1 +/* Enable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_ENABLE 0x0 +/* Disable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_DISABLE 0x1 +/* + * Enable events for a specific descriptor in packed ring. + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. + */ +#define VRING_PACKED_EVENT_FLAG_DESC 0x2 + +/* + * Wrap counter bit shift in event suppression structure + * of packed ring. + */ +#define VRING_PACKED_EVENT_F_WRAP_CTR 15 + /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC 28 @@ -171,4 +195,32 @@ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); } +struct vring_packed_desc_event { + /* Descriptor Ring Change Event Offset/Wrap Counter. */ + __le16 off_wrap; + /* Descriptor Ring Change Event Flags. */ + __le16 flags; +}; + +struct vring_packed_desc { + /* Buffer Address. */ + __le64 addr; + /* Buffer Length. */ + __le32 len; + /* Buffer ID. */ + __le16 id; + /* The flags depending on descriptor type. */ + __le16 flags; +}; + +struct vring_packed { + unsigned int num; + + struct vring_packed_desc *desc; + + struct vring_packed_desc_event *driver; + + struct vring_packed_desc_event *device; +}; + #endif /* _UAPI_LINUX_VIRTIO_RING_H */ From 138fd25148638a93313f8ce703dd4aef2704dcea Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:19 +0800 Subject: [PATCH 02/13] virtio_ring: add _split suffix for split ring functions Add _split suffix for split ring specific functions. This is a preparation for introducing the packed ring support. There is no functional change. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 253 +++++++++++++++++++++-------------- 1 file changed, 156 insertions(+), 97 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 814b395007b2..29fab2fb39cb 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -200,8 +200,8 @@ static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, cpu_addr, size, direction); } -static void vring_unmap_one(const struct vring_virtqueue *vq, - struct vring_desc *desc) +static void vring_unmap_one_split(const struct vring_virtqueue *vq, + struct vring_desc *desc) { u16 flags; @@ -234,8 +234,9 @@ static int vring_mapping_error(const struct vring_virtqueue *vq, return dma_mapping_error(vring_dma_dev(vq), addr); } -static struct vring_desc *alloc_indirect(struct virtqueue *_vq, - unsigned int total_sg, gfp_t gfp) +static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, + unsigned int total_sg, + gfp_t gfp) { struct vring_desc *desc; unsigned int i; @@ -256,14 +257,14 @@ static struct vring_desc *alloc_indirect(struct virtqueue *_vq, return desc; } -static inline int virtqueue_add(struct virtqueue *_vq, - struct scatterlist *sgs[], - unsigned int total_sg, - unsigned int out_sgs, - unsigned int in_sgs, - void *data, - void *ctx, - gfp_t gfp) +static inline int virtqueue_add_split(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); struct scatterlist *sg; @@ -302,7 +303,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, /* If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold */ if (vq->indirect && total_sg > 1 && vq->vq.num_free) - desc = alloc_indirect(_vq, total_sg, gfp); + desc = alloc_indirect_split(_vq, total_sg, gfp); else { desc = NULL; WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect); @@ -423,7 +424,7 @@ unmap_release: for (n = 0; n < total_sg; n++) { if (i == err_idx) break; - vring_unmap_one(vq, &desc[i]); + vring_unmap_one_split(vq, &desc[i]); i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next); } @@ -434,6 +435,19 @@ unmap_release: return -EIO; } +static inline int virtqueue_add(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + return virtqueue_add_split(_vq, sgs, total_sg, + out_sgs, in_sgs, data, ctx, gfp); +} + /** * virtqueue_add_sgs - expose buffers to other end * @vq: the struct virtqueue we're talking about. @@ -536,18 +550,7 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); -/** - * virtqueue_kick_prepare - first half of split virtqueue_kick call. - * @vq: the struct virtqueue - * - * Instead of virtqueue_kick(), you can do: - * if (virtqueue_kick_prepare(vq)) - * virtqueue_notify(vq); - * - * This is sometimes useful because the virtqueue_kick_prepare() needs - * to be serialized, but the actual virtqueue_notify() call does not. - */ -bool virtqueue_kick_prepare(struct virtqueue *_vq) +static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 new, old; @@ -579,6 +582,22 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq) END_USE(vq); return needs_kick; } + +/** + * virtqueue_kick_prepare - first half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * Instead of virtqueue_kick(), you can do: + * if (virtqueue_kick_prepare(vq)) + * virtqueue_notify(vq); + * + * This is sometimes useful because the virtqueue_kick_prepare() needs + * to be serialized, but the actual virtqueue_notify() call does not. + */ +bool virtqueue_kick_prepare(struct virtqueue *_vq) +{ + return virtqueue_kick_prepare_split(_vq); +} EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); /** @@ -625,8 +644,8 @@ bool virtqueue_kick(struct virtqueue *vq) } EXPORT_SYMBOL_GPL(virtqueue_kick); -static void detach_buf(struct vring_virtqueue *vq, unsigned int head, - void **ctx) +static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, + void **ctx) { unsigned int i, j; __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); @@ -638,12 +657,12 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head, i = head; while (vq->vring.desc[i].flags & nextflag) { - vring_unmap_one(vq, &vq->vring.desc[i]); + vring_unmap_one_split(vq, &vq->vring.desc[i]); i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next); vq->vq.num_free++; } - vring_unmap_one(vq, &vq->vring.desc[i]); + vring_unmap_one_split(vq, &vq->vring.desc[i]); vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); vq->free_head = head; @@ -665,7 +684,7 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head, BUG_ON(len == 0 || len % sizeof(struct vring_desc)); for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one(vq, &indir_desc[j]); + vring_unmap_one_split(vq, &indir_desc[j]); kfree(indir_desc); vq->desc_state[head].indir_desc = NULL; @@ -674,29 +693,14 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head, } } -static inline bool more_used(const struct vring_virtqueue *vq) +static inline bool more_used_split(const struct vring_virtqueue *vq) { return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx); } -/** - * virtqueue_get_buf - get the next used buffer - * @vq: the struct virtqueue we're talking about. - * @len: the length written into the buffer - * - * If the device wrote data into the buffer, @len will be set to the - * amount written. This means you don't need to clear the buffer - * beforehand to ensure there's no data leakage in the case of short - * writes. - * - * Caller must ensure we don't call this with other virtqueue - * operations at the same time (except where noted). - * - * Returns NULL if there are no used buffers, or the "data" token - * handed to virtqueue_add_*(). - */ -void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, - void **ctx) +static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, + unsigned int *len, + void **ctx) { struct vring_virtqueue *vq = to_vvq(_vq); void *ret; @@ -710,7 +714,7 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, return NULL; } - if (!more_used(vq)) { + if (!more_used_split(vq)) { pr_debug("No more buffers in queue\n"); END_USE(vq); return NULL; @@ -732,9 +736,9 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, return NULL; } - /* detach_buf clears data, so grab it now. */ + /* detach_buf_split clears data, so grab it now. */ ret = vq->desc_state[i].data; - detach_buf(vq, i, ctx); + detach_buf_split(vq, i, ctx); vq->last_used_idx++; /* If we expect an interrupt for the next entry, tell host * by writing event index and flush out the write before @@ -751,6 +755,28 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, END_USE(vq); return ret; } + +/** + * virtqueue_get_buf - get the next used buffer + * @vq: the struct virtqueue we're talking about. + * @len: the length written into the buffer + * + * If the device wrote data into the buffer, @len will be set to the + * amount written. This means you don't need to clear the buffer + * beforehand to ensure there's no data leakage in the case of short + * writes. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns NULL if there are no used buffers, or the "data" token + * handed to virtqueue_add_*(). + */ +void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, + void **ctx) +{ + return virtqueue_get_buf_ctx_split(_vq, len, ctx); +} EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) @@ -758,6 +784,18 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) return virtqueue_get_buf_ctx(_vq, len, NULL); } EXPORT_SYMBOL_GPL(virtqueue_get_buf); + +static void virtqueue_disable_cb_split(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { + vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; + if (!vq->event) + vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); + } +} + /** * virtqueue_disable_cb - disable callbacks * @vq: the struct virtqueue we're talking about. @@ -769,30 +807,11 @@ EXPORT_SYMBOL_GPL(virtqueue_get_buf); */ void virtqueue_disable_cb(struct virtqueue *_vq) { - struct vring_virtqueue *vq = to_vvq(_vq); - - if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { - vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; - if (!vq->event) - vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); - } - + virtqueue_disable_cb_split(_vq); } EXPORT_SYMBOL_GPL(virtqueue_disable_cb); -/** - * virtqueue_enable_cb_prepare - restart callbacks after disable_cb - * @vq: the struct virtqueue we're talking about. - * - * This re-enables callbacks; it returns current queue state - * in an opaque unsigned value. This value should be later tested by - * virtqueue_poll, to detect a possible race between the driver checking for - * more work, and enabling callbacks. - * - * Caller must ensure we don't call this with other virtqueue - * operations at the same time (except where noted). - */ -unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) +static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 last_used_idx; @@ -813,8 +832,33 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) END_USE(vq); return last_used_idx; } + +/** + * virtqueue_enable_cb_prepare - restart callbacks after disable_cb + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns current queue state + * in an opaque unsigned value. This value should be later tested by + * virtqueue_poll, to detect a possible race between the driver checking for + * more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) +{ + return virtqueue_enable_cb_prepare_split(_vq); +} EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); +static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, + vq->vring.used->idx); +} + /** * virtqueue_poll - query pending used buffers * @vq: the struct virtqueue we're talking about. @@ -829,7 +873,7 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) struct vring_virtqueue *vq = to_vvq(_vq); virtio_mb(vq->weak_barriers); - return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx); + return virtqueue_poll_split(_vq, last_used_idx); } EXPORT_SYMBOL_GPL(virtqueue_poll); @@ -851,20 +895,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_enable_cb); -/** - * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. - * @vq: the struct virtqueue we're talking about. - * - * This re-enables callbacks but hints to the other side to delay - * interrupts until most of the available buffers have been processed; - * it returns "false" if there are many pending buffers in the queue, - * to detect a possible race between the driver checking for more work, - * and enabling callbacks. - * - * Caller must ensure we don't call this with other virtqueue - * operations at the same time (except where noted). - */ -bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) +static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 bufs; @@ -896,17 +927,27 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) END_USE(vq); return true; } -EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); /** - * virtqueue_detach_unused_buf - detach first unused buffer + * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. * @vq: the struct virtqueue we're talking about. * - * Returns NULL or the "data" token handed to virtqueue_add_*(). - * This is not valid on an active queue; it is useful only for device - * shutdown. + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). */ -void *virtqueue_detach_unused_buf(struct virtqueue *_vq) +bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) +{ + return virtqueue_enable_cb_delayed_split(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); + +static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); unsigned int i; @@ -917,9 +958,9 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) for (i = 0; i < vq->vring.num; i++) { if (!vq->desc_state[i].data) continue; - /* detach_buf clears data, so grab it now. */ + /* detach_buf_split clears data, so grab it now. */ buf = vq->desc_state[i].data; - detach_buf(vq, i, NULL); + detach_buf_split(vq, i, NULL); vq->avail_idx_shadow--; vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); END_USE(vq); @@ -931,8 +972,26 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) END_USE(vq); return NULL; } + +/** + * virtqueue_detach_unused_buf - detach first unused buffer + * @vq: the struct virtqueue we're talking about. + * + * Returns NULL or the "data" token handed to virtqueue_add_*(). + * This is not valid on an active queue; it is useful only for device + * shutdown. + */ +void *virtqueue_detach_unused_buf(struct virtqueue *_vq) +{ + return virtqueue_detach_unused_buf_split(_vq); +} EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); +static inline bool more_used(const struct vring_virtqueue *vq) +{ + return more_used_split(vq); +} + irqreturn_t vring_interrupt(int irq, void *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); From e6f633e5beab659513297f1210b428ce6f46e69c Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:20 +0800 Subject: [PATCH 03/13] virtio_ring: put split ring functions together Put the xxx_split() functions together to make the code more readable and avoid misuse after introducing the packed ring. There is no functional change. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 621 ++++++++++++++++++----------------- 1 file changed, 319 insertions(+), 302 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 29fab2fb39cb..7cd40a2a0d21 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -113,6 +113,11 @@ struct vring_virtqueue { struct vring_desc_state desc_state[]; }; + +/* + * Helpers. + */ + #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) /* @@ -200,6 +205,20 @@ static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, cpu_addr, size, direction); } +static int vring_mapping_error(const struct vring_virtqueue *vq, + dma_addr_t addr) +{ + if (!vring_use_dma_api(vq->vq.vdev)) + return 0; + + return dma_mapping_error(vring_dma_dev(vq), addr); +} + + +/* + * Split ring specific functions - *_split(). + */ + static void vring_unmap_one_split(const struct vring_virtqueue *vq, struct vring_desc *desc) { @@ -225,15 +244,6 @@ static void vring_unmap_one_split(const struct vring_virtqueue *vq, } } -static int vring_mapping_error(const struct vring_virtqueue *vq, - dma_addr_t addr) -{ - if (!vring_use_dma_api(vq->vq.vdev)) - return 0; - - return dma_mapping_error(vring_dma_dev(vq), addr); -} - static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, unsigned int total_sg, gfp_t gfp) @@ -435,121 +445,6 @@ unmap_release: return -EIO; } -static inline int virtqueue_add(struct virtqueue *_vq, - struct scatterlist *sgs[], - unsigned int total_sg, - unsigned int out_sgs, - unsigned int in_sgs, - void *data, - void *ctx, - gfp_t gfp) -{ - return virtqueue_add_split(_vq, sgs, total_sg, - out_sgs, in_sgs, data, ctx, gfp); -} - -/** - * virtqueue_add_sgs - expose buffers to other end - * @vq: the struct virtqueue we're talking about. - * @sgs: array of terminated scatterlists. - * @out_num: the number of scatterlists readable by other side - * @in_num: the number of scatterlists which are writable (after readable ones) - * @data: the token identifying the buffer. - * @gfp: how to do memory allocations (if necessary). - * - * Caller must ensure we don't call this with other virtqueue operations - * at the same time (except where noted). - * - * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). - */ -int virtqueue_add_sgs(struct virtqueue *_vq, - struct scatterlist *sgs[], - unsigned int out_sgs, - unsigned int in_sgs, - void *data, - gfp_t gfp) -{ - unsigned int i, total_sg = 0; - - /* Count them first. */ - for (i = 0; i < out_sgs + in_sgs; i++) { - struct scatterlist *sg; - for (sg = sgs[i]; sg; sg = sg_next(sg)) - total_sg++; - } - return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, - data, NULL, gfp); -} -EXPORT_SYMBOL_GPL(virtqueue_add_sgs); - -/** - * virtqueue_add_outbuf - expose output buffers to other end - * @vq: the struct virtqueue we're talking about. - * @sg: scatterlist (must be well-formed and terminated!) - * @num: the number of entries in @sg readable by other side - * @data: the token identifying the buffer. - * @gfp: how to do memory allocations (if necessary). - * - * Caller must ensure we don't call this with other virtqueue operations - * at the same time (except where noted). - * - * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). - */ -int virtqueue_add_outbuf(struct virtqueue *vq, - struct scatterlist *sg, unsigned int num, - void *data, - gfp_t gfp) -{ - return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); -} -EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); - -/** - * virtqueue_add_inbuf - expose input buffers to other end - * @vq: the struct virtqueue we're talking about. - * @sg: scatterlist (must be well-formed and terminated!) - * @num: the number of entries in @sg writable by other side - * @data: the token identifying the buffer. - * @gfp: how to do memory allocations (if necessary). - * - * Caller must ensure we don't call this with other virtqueue operations - * at the same time (except where noted). - * - * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). - */ -int virtqueue_add_inbuf(struct virtqueue *vq, - struct scatterlist *sg, unsigned int num, - void *data, - gfp_t gfp) -{ - return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); -} -EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); - -/** - * virtqueue_add_inbuf_ctx - expose input buffers to other end - * @vq: the struct virtqueue we're talking about. - * @sg: scatterlist (must be well-formed and terminated!) - * @num: the number of entries in @sg writable by other side - * @data: the token identifying the buffer. - * @ctx: extra context for the token - * @gfp: how to do memory allocations (if necessary). - * - * Caller must ensure we don't call this with other virtqueue operations - * at the same time (except where noted). - * - * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). - */ -int virtqueue_add_inbuf_ctx(struct virtqueue *vq, - struct scatterlist *sg, unsigned int num, - void *data, - void *ctx, - gfp_t gfp) -{ - return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); -} -EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); - static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -583,67 +478,6 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) return needs_kick; } -/** - * virtqueue_kick_prepare - first half of split virtqueue_kick call. - * @vq: the struct virtqueue - * - * Instead of virtqueue_kick(), you can do: - * if (virtqueue_kick_prepare(vq)) - * virtqueue_notify(vq); - * - * This is sometimes useful because the virtqueue_kick_prepare() needs - * to be serialized, but the actual virtqueue_notify() call does not. - */ -bool virtqueue_kick_prepare(struct virtqueue *_vq) -{ - return virtqueue_kick_prepare_split(_vq); -} -EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); - -/** - * virtqueue_notify - second half of split virtqueue_kick call. - * @vq: the struct virtqueue - * - * This does not need to be serialized. - * - * Returns false if host notify failed or queue is broken, otherwise true. - */ -bool virtqueue_notify(struct virtqueue *_vq) -{ - struct vring_virtqueue *vq = to_vvq(_vq); - - if (unlikely(vq->broken)) - return false; - - /* Prod other side to tell it about changes. */ - if (!vq->notify(_vq)) { - vq->broken = true; - return false; - } - return true; -} -EXPORT_SYMBOL_GPL(virtqueue_notify); - -/** - * virtqueue_kick - update after add_buf - * @vq: the struct virtqueue - * - * After one or more virtqueue_add_* calls, invoke this to kick - * the other side. - * - * Caller must ensure we don't call this with other virtqueue - * operations at the same time (except where noted). - * - * Returns false if kick failed, otherwise true. - */ -bool virtqueue_kick(struct virtqueue *vq) -{ - if (virtqueue_kick_prepare(vq)) - return virtqueue_notify(vq); - return true; -} -EXPORT_SYMBOL_GPL(virtqueue_kick); - static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, void **ctx) { @@ -756,35 +590,6 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, return ret; } -/** - * virtqueue_get_buf - get the next used buffer - * @vq: the struct virtqueue we're talking about. - * @len: the length written into the buffer - * - * If the device wrote data into the buffer, @len will be set to the - * amount written. This means you don't need to clear the buffer - * beforehand to ensure there's no data leakage in the case of short - * writes. - * - * Caller must ensure we don't call this with other virtqueue - * operations at the same time (except where noted). - * - * Returns NULL if there are no used buffers, or the "data" token - * handed to virtqueue_add_*(). - */ -void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, - void **ctx) -{ - return virtqueue_get_buf_ctx_split(_vq, len, ctx); -} -EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); - -void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) -{ - return virtqueue_get_buf_ctx(_vq, len, NULL); -} -EXPORT_SYMBOL_GPL(virtqueue_get_buf); - static void virtqueue_disable_cb_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -796,21 +601,6 @@ static void virtqueue_disable_cb_split(struct virtqueue *_vq) } } -/** - * virtqueue_disable_cb - disable callbacks - * @vq: the struct virtqueue we're talking about. - * - * Note that this is not necessarily synchronous, hence unreliable and only - * useful as an optimization. - * - * Unlike other operations, this need not be serialized. - */ -void virtqueue_disable_cb(struct virtqueue *_vq) -{ - virtqueue_disable_cb_split(_vq); -} -EXPORT_SYMBOL_GPL(virtqueue_disable_cb); - static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -833,24 +623,6 @@ static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) return last_used_idx; } -/** - * virtqueue_enable_cb_prepare - restart callbacks after disable_cb - * @vq: the struct virtqueue we're talking about. - * - * This re-enables callbacks; it returns current queue state - * in an opaque unsigned value. This value should be later tested by - * virtqueue_poll, to detect a possible race between the driver checking for - * more work, and enabling callbacks. - * - * Caller must ensure we don't call this with other virtqueue - * operations at the same time (except where noted). - */ -unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) -{ - return virtqueue_enable_cb_prepare_split(_vq); -} -EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); - static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -859,42 +631,6 @@ static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) vq->vring.used->idx); } -/** - * virtqueue_poll - query pending used buffers - * @vq: the struct virtqueue we're talking about. - * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). - * - * Returns "true" if there are pending used buffers in the queue. - * - * This does not need to be serialized. - */ -bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) -{ - struct vring_virtqueue *vq = to_vvq(_vq); - - virtio_mb(vq->weak_barriers); - return virtqueue_poll_split(_vq, last_used_idx); -} -EXPORT_SYMBOL_GPL(virtqueue_poll); - -/** - * virtqueue_enable_cb - restart callbacks after disable_cb. - * @vq: the struct virtqueue we're talking about. - * - * This re-enables callbacks; it returns "false" if there are pending - * buffers in the queue, to detect a possible race between the driver - * checking for more work, and enabling callbacks. - * - * Caller must ensure we don't call this with other virtqueue - * operations at the same time (except where noted). - */ -bool virtqueue_enable_cb(struct virtqueue *_vq) -{ - unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); - return !virtqueue_poll(_vq, last_used_idx); -} -EXPORT_SYMBOL_GPL(virtqueue_enable_cb); - static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -928,25 +664,6 @@ static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) return true; } -/** - * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. - * @vq: the struct virtqueue we're talking about. - * - * This re-enables callbacks but hints to the other side to delay - * interrupts until most of the available buffers have been processed; - * it returns "false" if there are many pending buffers in the queue, - * to detect a possible race between the driver checking for more work, - * and enabling callbacks. - * - * Caller must ensure we don't call this with other virtqueue - * operations at the same time (except where noted). - */ -bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) -{ - return virtqueue_enable_cb_delayed_split(_vq); -} -EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); - static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -973,6 +690,306 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) return NULL; } + +/* + * Generic functions and exported symbols. + */ + +static inline int virtqueue_add(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + return virtqueue_add_split(_vq, sgs, total_sg, + out_sgs, in_sgs, data, ctx, gfp); +} + +/** + * virtqueue_add_sgs - expose buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of terminated scatterlists. + * @out_num: the number of scatterlists readable by other side + * @in_num: the number of scatterlists which are writable (after readable ones) + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_sgs(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp) +{ + unsigned int i, total_sg = 0; + + /* Count them first. */ + for (i = 0; i < out_sgs + in_sgs; i++) { + struct scatterlist *sg; + + for (sg = sgs[i]; sg; sg = sg_next(sg)) + total_sg++; + } + return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, + data, NULL, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_sgs); + +/** + * virtqueue_add_outbuf - expose output buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg readable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_outbuf(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp) +{ + return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); + +/** + * virtqueue_add_inbuf - expose input buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg writable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_inbuf(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp) +{ + return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); + +/** + * virtqueue_add_inbuf_ctx - expose input buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg writable by other side + * @data: the token identifying the buffer. + * @ctx: extra context for the token + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_inbuf_ctx(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + void *ctx, + gfp_t gfp) +{ + return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); + +/** + * virtqueue_kick_prepare - first half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * Instead of virtqueue_kick(), you can do: + * if (virtqueue_kick_prepare(vq)) + * virtqueue_notify(vq); + * + * This is sometimes useful because the virtqueue_kick_prepare() needs + * to be serialized, but the actual virtqueue_notify() call does not. + */ +bool virtqueue_kick_prepare(struct virtqueue *_vq) +{ + return virtqueue_kick_prepare_split(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); + +/** + * virtqueue_notify - second half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * This does not need to be serialized. + * + * Returns false if host notify failed or queue is broken, otherwise true. + */ +bool virtqueue_notify(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (unlikely(vq->broken)) + return false; + + /* Prod other side to tell it about changes. */ + if (!vq->notify(_vq)) { + vq->broken = true; + return false; + } + return true; +} +EXPORT_SYMBOL_GPL(virtqueue_notify); + +/** + * virtqueue_kick - update after add_buf + * @vq: the struct virtqueue + * + * After one or more virtqueue_add_* calls, invoke this to kick + * the other side. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns false if kick failed, otherwise true. + */ +bool virtqueue_kick(struct virtqueue *vq) +{ + if (virtqueue_kick_prepare(vq)) + return virtqueue_notify(vq); + return true; +} +EXPORT_SYMBOL_GPL(virtqueue_kick); + +/** + * virtqueue_get_buf - get the next used buffer + * @vq: the struct virtqueue we're talking about. + * @len: the length written into the buffer + * + * If the device wrote data into the buffer, @len will be set to the + * amount written. This means you don't need to clear the buffer + * beforehand to ensure there's no data leakage in the case of short + * writes. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns NULL if there are no used buffers, or the "data" token + * handed to virtqueue_add_*(). + */ +void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, + void **ctx) +{ + return virtqueue_get_buf_ctx_split(_vq, len, ctx); +} +EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); + +void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) +{ + return virtqueue_get_buf_ctx(_vq, len, NULL); +} +EXPORT_SYMBOL_GPL(virtqueue_get_buf); + +/** + * virtqueue_disable_cb - disable callbacks + * @vq: the struct virtqueue we're talking about. + * + * Note that this is not necessarily synchronous, hence unreliable and only + * useful as an optimization. + * + * Unlike other operations, this need not be serialized. + */ +void virtqueue_disable_cb(struct virtqueue *_vq) +{ + virtqueue_disable_cb_split(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_disable_cb); + +/** + * virtqueue_enable_cb_prepare - restart callbacks after disable_cb + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns current queue state + * in an opaque unsigned value. This value should be later tested by + * virtqueue_poll, to detect a possible race between the driver checking for + * more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) +{ + return virtqueue_enable_cb_prepare_split(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); + +/** + * virtqueue_poll - query pending used buffers + * @vq: the struct virtqueue we're talking about. + * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). + * + * Returns "true" if there are pending used buffers in the queue. + * + * This does not need to be serialized. + */ +bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + virtio_mb(vq->weak_barriers); + return virtqueue_poll_split(_vq, last_used_idx); +} +EXPORT_SYMBOL_GPL(virtqueue_poll); + +/** + * virtqueue_enable_cb - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns "false" if there are pending + * buffers in the queue, to detect a possible race between the driver + * checking for more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +bool virtqueue_enable_cb(struct virtqueue *_vq) +{ + unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); + + return !virtqueue_poll(_vq, last_used_idx); +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb); + +/** + * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) +{ + return virtqueue_enable_cb_delayed_split(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); + /** * virtqueue_detach_unused_buf - detach first unused buffer * @vq: the struct virtqueue we're talking about. From e593bf9751566e0ba5a055a976a4abd54101d572 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:21 +0800 Subject: [PATCH 04/13] virtio_ring: put split ring fields in a sub struct Put the split ring specific fields in a sub-struct named as "split" to avoid misuse after introducing packed ring. There is no functional change. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 156 ++++++++++++++++++++--------------- 1 file changed, 91 insertions(+), 65 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 7cd40a2a0d21..0b97e5c79654 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -63,9 +63,6 @@ struct vring_desc_state { struct vring_virtqueue { struct virtqueue vq; - /* Actual memory layout for this queue */ - struct vring vring; - /* Can we use weak barriers? */ bool weak_barriers; @@ -86,11 +83,16 @@ struct vring_virtqueue { /* Last used index we've seen. */ u16 last_used_idx; - /* Last written value to avail->flags */ - u16 avail_flags_shadow; + struct { + /* Actual memory layout for this queue */ + struct vring vring; - /* Last written value to avail->idx in guest byte order */ - u16 avail_idx_shadow; + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + } split; /* How to notify other side. FIXME: commonalize hcalls! */ bool (*notify)(struct virtqueue *vq); @@ -316,7 +318,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, desc = alloc_indirect_split(_vq, total_sg, gfp); else { desc = NULL; - WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect); + WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); } if (desc) { @@ -327,7 +329,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, descs_used = 1; } else { indirect = false; - desc = vq->vring.desc; + desc = vq->split.vring.desc; i = head; descs_used = total_sg; } @@ -383,10 +385,13 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, if (vring_mapping_error(vq, addr)) goto unmap_release; - vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT); - vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr); + vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, + VRING_DESC_F_INDIRECT); + vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, + addr); - vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc)); + vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, + total_sg * sizeof(struct vring_desc)); } /* We're using some buffers from the free list. */ @@ -394,7 +399,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Update free pointer */ if (indirect) - vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next); + vq->free_head = virtio16_to_cpu(_vq->vdev, + vq->split.vring.desc[head].next); else vq->free_head = i; @@ -407,14 +413,15 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Put entry in available array (but don't update avail->idx until they * do sync). */ - avail = vq->avail_idx_shadow & (vq->vring.num - 1); - vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); + avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); + vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); /* Descriptors and available array need to be set before we expose the * new available array entries. */ virtio_wmb(vq->weak_barriers); - vq->avail_idx_shadow++; - vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); + vq->split.avail_idx_shadow++; + vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, + vq->split.avail_idx_shadow); vq->num_added++; pr_debug("Added buffer head %i to %p\n", head, vq); @@ -435,7 +442,7 @@ unmap_release: if (i == err_idx) break; vring_unmap_one_split(vq, &desc[i]); - i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next); + i = virtio16_to_cpu(_vq->vdev, vq->split.vring.desc[i].next); } if (indirect) @@ -456,8 +463,8 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) * event. */ virtio_mb(vq->weak_barriers); - old = vq->avail_idx_shadow - vq->num_added; - new = vq->avail_idx_shadow; + old = vq->split.avail_idx_shadow - vq->num_added; + new = vq->split.avail_idx_shadow; vq->num_added = 0; #ifdef DEBUG @@ -469,10 +476,13 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) #endif if (vq->event) { - needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)), + needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, + vring_avail_event(&vq->split.vring)), new, old); } else { - needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY)); + needs_kick = !(vq->split.vring.used->flags & + cpu_to_virtio16(_vq->vdev, + VRING_USED_F_NO_NOTIFY)); } END_USE(vq); return needs_kick; @@ -490,14 +500,15 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, /* Put back on free list: unmap first-level descriptors and find end */ i = head; - while (vq->vring.desc[i].flags & nextflag) { - vring_unmap_one_split(vq, &vq->vring.desc[i]); - i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next); + while (vq->split.vring.desc[i].flags & nextflag) { + vring_unmap_one_split(vq, &vq->split.vring.desc[i]); + i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); vq->vq.num_free++; } - vring_unmap_one_split(vq, &vq->vring.desc[i]); - vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); + vring_unmap_one_split(vq, &vq->split.vring.desc[i]); + vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, + vq->free_head); vq->free_head = head; /* Plus final descriptor */ @@ -511,9 +522,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, if (!indir_desc) return; - len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len); + len = virtio32_to_cpu(vq->vq.vdev, + vq->split.vring.desc[head].len); - BUG_ON(!(vq->vring.desc[head].flags & + BUG_ON(!(vq->split.vring.desc[head].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); @@ -529,7 +541,8 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, static inline bool more_used_split(const struct vring_virtqueue *vq) { - return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx); + return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, + vq->split.vring.used->idx); } static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, @@ -557,11 +570,13 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, /* Only get used array entries after they have been exposed by host. */ virtio_rmb(vq->weak_barriers); - last_used = (vq->last_used_idx & (vq->vring.num - 1)); - i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id); - *len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len); + last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); + i = virtio32_to_cpu(_vq->vdev, + vq->split.vring.used->ring[last_used].id); + *len = virtio32_to_cpu(_vq->vdev, + vq->split.vring.used->ring[last_used].len); - if (unlikely(i >= vq->vring.num)) { + if (unlikely(i >= vq->split.vring.num)) { BAD_RING(vq, "id %u out of range\n", i); return NULL; } @@ -577,9 +592,9 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, /* If we expect an interrupt for the next entry, tell host * by writing event index and flush out the write before * the read in the next get_buf call. */ - if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) + if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) virtio_store_mb(vq->weak_barriers, - &vring_used_event(&vq->vring), + &vring_used_event(&vq->split.vring), cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); #ifdef DEBUG @@ -594,10 +609,12 @@ static void virtqueue_disable_cb_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { - vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; + if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { + vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; if (!vq->event) - vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); + vq->split.vring.avail->flags = + cpu_to_virtio16(_vq->vdev, + vq->split.avail_flags_shadow); } } @@ -613,12 +630,15 @@ static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to * either clear the flags bit or point the event index at the next * entry. Always do both to keep code simple. */ - if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { - vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; + if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { + vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; if (!vq->event) - vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); + vq->split.vring.avail->flags = + cpu_to_virtio16(_vq->vdev, + vq->split.avail_flags_shadow); } - vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx); + vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, + last_used_idx = vq->last_used_idx); END_USE(vq); return last_used_idx; } @@ -628,7 +648,7 @@ static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) struct vring_virtqueue *vq = to_vvq(_vq); return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, - vq->vring.used->idx); + vq->split.vring.used->idx); } static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) @@ -643,19 +663,22 @@ static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to * either clear the flags bit or point the event index at the next * entry. Always update the event index to keep code simple. */ - if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { - vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; + if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { + vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; if (!vq->event) - vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); + vq->split.vring.avail->flags = + cpu_to_virtio16(_vq->vdev, + vq->split.avail_flags_shadow); } /* TODO: tune this threshold */ - bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4; + bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; virtio_store_mb(vq->weak_barriers, - &vring_used_event(&vq->vring), + &vring_used_event(&vq->split.vring), cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); - if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) { + if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) + - vq->last_used_idx) > bufs)) { END_USE(vq); return false; } @@ -672,19 +695,20 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) START_USE(vq); - for (i = 0; i < vq->vring.num; i++) { + for (i = 0; i < vq->split.vring.num; i++) { if (!vq->desc_state[i].data) continue; /* detach_buf_split clears data, so grab it now. */ buf = vq->desc_state[i].data; detach_buf_split(vq, i, NULL); - vq->avail_idx_shadow--; - vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); + vq->split.avail_idx_shadow--; + vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, + vq->split.avail_idx_shadow); END_USE(vq); return buf; } /* That should have freed everything. */ - BUG_ON(vq->vq.num_free != vq->vring.num); + BUG_ON(vq->vq.num_free != vq->split.vring.num); END_USE(vq); return NULL; @@ -1046,7 +1070,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, if (!vq) return NULL; - vq->vring = vring; vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.name = name; @@ -1059,8 +1082,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; - vq->avail_flags_shadow = 0; - vq->avail_idx_shadow = 0; vq->num_added = 0; list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG @@ -1072,17 +1093,22 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, !context; vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); + vq->split.vring = vring; + vq->split.avail_flags_shadow = 0; + vq->split.avail_idx_shadow = 0; + /* No callback? Tell other side not to bother us. */ if (!callback) { - vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; + vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; if (!vq->event) - vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow); + vq->split.vring.avail->flags = cpu_to_virtio16(vdev, + vq->split.avail_flags_shadow); } /* Put everything in free lists. */ vq->free_head = 0; for (i = 0; i < vring.num-1; i++) - vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); + vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state)); return &vq->vq; @@ -1218,7 +1244,7 @@ void vring_del_virtqueue(struct virtqueue *_vq) if (vq->we_own_ring) { vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes, - vq->vring.desc, vq->queue_dma_addr); + vq->split.vring.desc, vq->queue_dma_addr); } list_del(&_vq->list); kfree(vq); @@ -1260,7 +1286,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) struct vring_virtqueue *vq = to_vvq(_vq); - return vq->vring.num; + return vq->split.vring.num; } EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); @@ -1304,7 +1330,7 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) BUG_ON(!vq->we_own_ring); return vq->queue_dma_addr + - ((char *)vq->vring.avail - (char *)vq->vring.desc); + ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); } EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); @@ -1315,13 +1341,13 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) BUG_ON(!vq->we_own_ring); return vq->queue_dma_addr + - ((char *)vq->vring.used - (char *)vq->vring.desc); + ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); } EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); const struct vring *virtqueue_get_vring(struct virtqueue *vq) { - return &to_vvq(vq)->vring; + return &to_vvq(vq)->split.vring; } EXPORT_SYMBOL_GPL(virtqueue_get_vring); From 4d6a105eb534b8f1a5ef9215deb1932cd8abacc2 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:22 +0800 Subject: [PATCH 05/13] virtio_ring: introduce debug helpers Introduce debug helpers for last_add_time update, check and invalid. They will be used by packed ring too. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 49 ++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 0b97e5c79654..10d407910aa2 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -44,6 +44,26 @@ } while (0) #define END_USE(_vq) \ do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) +#define LAST_ADD_TIME_UPDATE(_vq) \ + do { \ + ktime_t now = ktime_get(); \ + \ + /* No kick or get, with .1 second between? Warn. */ \ + if ((_vq)->last_add_time_valid) \ + WARN_ON(ktime_to_ms(ktime_sub(now, \ + (_vq)->last_add_time)) > 100); \ + (_vq)->last_add_time = now; \ + (_vq)->last_add_time_valid = true; \ + } while (0) +#define LAST_ADD_TIME_CHECK(_vq) \ + do { \ + if ((_vq)->last_add_time_valid) { \ + WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ + (_vq)->last_add_time)) > 100); \ + } \ + } while (0) +#define LAST_ADD_TIME_INVALID(_vq) \ + ((_vq)->last_add_time_valid = false) #else #define BAD_RING(_vq, fmt, args...) \ do { \ @@ -53,6 +73,9 @@ } while (0) #define START_USE(vq) #define END_USE(vq) +#define LAST_ADD_TIME_UPDATE(vq) +#define LAST_ADD_TIME_CHECK(vq) +#define LAST_ADD_TIME_INVALID(vq) #endif struct vring_desc_state { @@ -295,18 +318,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return -EIO; } -#ifdef DEBUG - { - ktime_t now = ktime_get(); - - /* No kick or get, with .1 second between? Warn. */ - if (vq->last_add_time_valid) - WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time)) - > 100); - vq->last_add_time = now; - vq->last_add_time_valid = true; - } -#endif + LAST_ADD_TIME_UPDATE(vq); BUG_ON(total_sg == 0); @@ -467,13 +479,8 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) new = vq->split.avail_idx_shadow; vq->num_added = 0; -#ifdef DEBUG - if (vq->last_add_time_valid) { - WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), - vq->last_add_time)) > 100); - } - vq->last_add_time_valid = false; -#endif + LAST_ADD_TIME_CHECK(vq); + LAST_ADD_TIME_INVALID(vq); if (vq->event) { needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, @@ -597,9 +604,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, &vring_used_event(&vq->split.vring), cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); -#ifdef DEBUG - vq->last_add_time_valid = false; -#endif + LAST_ADD_TIME_INVALID(vq); END_USE(vq); return ret; From 2f18c2d153fb6fc889665688f266c780353002ec Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:23 +0800 Subject: [PATCH 06/13] virtio_ring: introduce helper for indirect feature Introduce a helper to check whether we will use indirect feature. It will be used by packed ring too. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 10d407910aa2..d1076f28c7e9 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -145,6 +145,18 @@ struct vring_virtqueue { #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +static inline bool virtqueue_use_indirect(struct virtqueue *_vq, + unsigned int total_sg) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + /* + * If the host supports indirect descriptor tables, and we have multiple + * buffers, then go indirect. FIXME: tune this threshold + */ + return (vq->indirect && total_sg > 1 && vq->vq.num_free); +} + /* * Modern virtio devices have feature bits to specify whether they need a * quirk and bypass the IOMMU. If not there, just use the DMA API. @@ -324,9 +336,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, head = vq->free_head; - /* If the host supports indirect descriptor tables, and we have multiple - * buffers, then go indirect. FIXME: tune this threshold */ - if (vq->indirect && total_sg > 1 && vq->vq.num_free) + if (virtqueue_use_indirect(_vq, total_sg)) desc = alloc_indirect_split(_vq, total_sg, gfp); else { desc = NULL; From cbeedb72b97ad826e31e68e0717b763e2db0806d Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:24 +0800 Subject: [PATCH 07/13] virtio_ring: allocate desc state for split ring separately Put the split ring's desc state into the .split sub-structure, and allocate desc state for split ring separately, this makes the code more readable and more consistent with what we will do for packed ring. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 45 +++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index d1076f28c7e9..acd851f3105c 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -78,7 +78,7 @@ #define LAST_ADD_TIME_INVALID(vq) #endif -struct vring_desc_state { +struct vring_desc_state_split { void *data; /* Data for callback. */ struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ }; @@ -115,6 +115,9 @@ struct vring_virtqueue { /* Last written value to avail->idx in guest byte order */ u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; } split; /* How to notify other side. FIXME: commonalize hcalls! */ @@ -133,9 +136,6 @@ struct vring_virtqueue { bool last_add_time_valid; ktime_t last_add_time; #endif - - /* Per-descriptor state. */ - struct vring_desc_state desc_state[]; }; @@ -427,11 +427,11 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, vq->free_head = i; /* Store token and indirect buffer state. */ - vq->desc_state[head].data = data; + vq->split.desc_state[head].data = data; if (indirect) - vq->desc_state[head].indir_desc = desc; + vq->split.desc_state[head].indir_desc = desc; else - vq->desc_state[head].indir_desc = ctx; + vq->split.desc_state[head].indir_desc = ctx; /* Put entry in available array (but don't update avail->idx until they * do sync). */ @@ -512,7 +512,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); /* Clear data ptr. */ - vq->desc_state[head].data = NULL; + vq->split.desc_state[head].data = NULL; /* Put back on free list: unmap first-level descriptors and find end */ i = head; @@ -532,7 +532,8 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, vq->vq.num_free++; if (vq->indirect) { - struct vring_desc *indir_desc = vq->desc_state[head].indir_desc; + struct vring_desc *indir_desc = + vq->split.desc_state[head].indir_desc; u32 len; /* Free the indirect table, if any, now that it's unmapped. */ @@ -550,9 +551,9 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, vring_unmap_one_split(vq, &indir_desc[j]); kfree(indir_desc); - vq->desc_state[head].indir_desc = NULL; + vq->split.desc_state[head].indir_desc = NULL; } else if (ctx) { - *ctx = vq->desc_state[head].indir_desc; + *ctx = vq->split.desc_state[head].indir_desc; } } @@ -597,13 +598,13 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, BAD_RING(vq, "id %u out of range\n", i); return NULL; } - if (unlikely(!vq->desc_state[i].data)) { + if (unlikely(!vq->split.desc_state[i].data)) { BAD_RING(vq, "id %u is not a head!\n", i); return NULL; } /* detach_buf_split clears data, so grab it now. */ - ret = vq->desc_state[i].data; + ret = vq->split.desc_state[i].data; detach_buf_split(vq, i, ctx); vq->last_used_idx++; /* If we expect an interrupt for the next entry, tell host @@ -711,10 +712,10 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) START_USE(vq); for (i = 0; i < vq->split.vring.num; i++) { - if (!vq->desc_state[i].data) + if (!vq->split.desc_state[i].data) continue; /* detach_buf_split clears data, so grab it now. */ - buf = vq->desc_state[i].data; + buf = vq->split.desc_state[i].data; detach_buf_split(vq, i, NULL); vq->split.avail_idx_shadow--; vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, @@ -1080,8 +1081,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, unsigned int i; struct vring_virtqueue *vq; - vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state), - GFP_KERNEL); + vq = kmalloc(sizeof(*vq), GFP_KERNEL); if (!vq) return NULL; @@ -1120,11 +1120,19 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->split.avail_flags_shadow); } + vq->split.desc_state = kmalloc_array(vring.num, + sizeof(struct vring_desc_state_split), GFP_KERNEL); + if (!vq->split.desc_state) { + kfree(vq); + return NULL; + } + /* Put everything in free lists. */ vq->free_head = 0; for (i = 0; i < vring.num-1; i++) vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); - memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state)); + memset(vq->split.desc_state, 0, vring.num * + sizeof(struct vring_desc_state_split)); return &vq->vq; } @@ -1260,6 +1268,7 @@ void vring_del_virtqueue(struct virtqueue *_vq) if (vq->we_own_ring) { vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes, vq->split.vring.desc, vq->queue_dma_addr); + kfree(vq->split.desc_state); } list_del(&_vq->list); kfree(vq); From d79dca75c79680f52a27a7ee1b6ae75066f36b3e Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:25 +0800 Subject: [PATCH 08/13] virtio_ring: extract split ring handling from ring creation Introduce a specific function to create the split ring. And also move the DMA allocation and size information to the .split sub-structure. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 220 +++++++++++++++++++---------------- 1 file changed, 121 insertions(+), 99 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index acd851f3105c..d00a87909a7e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -118,6 +118,10 @@ struct vring_virtqueue { /* Per-descriptor state. */ struct vring_desc_state_split *desc_state; + + /* DMA, allocation, and size information */ + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; } split; /* How to notify other side. FIXME: commonalize hcalls! */ @@ -125,8 +129,6 @@ struct vring_virtqueue { /* DMA, allocation, and size information */ bool we_own_ring; - size_t queue_size_in_bytes; - dma_addr_t queue_dma_addr; #ifdef DEBUG /* They're supposed to lock for us. */ @@ -203,6 +205,48 @@ static bool vring_use_dma_api(struct virtio_device *vdev) return false; } +static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + if (vring_use_dma_api(vdev)) { + return dma_alloc_coherent(vdev->dev.parent, size, + dma_handle, flag); + } else { + void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); + + if (queue) { + phys_addr_t phys_addr = virt_to_phys(queue); + *dma_handle = (dma_addr_t)phys_addr; + + /* + * Sanity check: make sure we dind't truncate + * the address. The only arches I can find that + * have 64-bit phys_addr_t but 32-bit dma_addr_t + * are certain non-highmem MIPS and x86 + * configurations, but these configurations + * should never allocate physical pages above 32 + * bits, so this is fine. Just in case, throw a + * warning and abort if we end up with an + * unrepresentable address. + */ + if (WARN_ON_ONCE(*dma_handle != phys_addr)) { + free_pages_exact(queue, PAGE_ALIGN(size)); + return NULL; + } + } + return queue; + } +} + +static void vring_free_queue(struct virtio_device *vdev, size_t size, + void *queue, dma_addr_t dma_handle) +{ + if (vring_use_dma_api(vdev)) + dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); + else + free_pages_exact(queue, PAGE_ALIGN(size)); +} + /* * The DMA ops on various arches are rather gnarly right now, and * making all of the arch DMA ops work on the vring device itself @@ -730,6 +774,68 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) return NULL; } +static struct virtqueue *vring_create_virtqueue_split( + unsigned int index, + unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + bool weak_barriers, + bool may_reduce_num, + bool context, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name) +{ + struct virtqueue *vq; + void *queue = NULL; + dma_addr_t dma_addr; + size_t queue_size_in_bytes; + struct vring vring; + + /* We assume num is a power of 2. */ + if (num & (num - 1)) { + dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); + return NULL; + } + + /* TODO: allocate each queue chunk individually */ + for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { + queue = vring_alloc_queue(vdev, vring_size(num, vring_align), + &dma_addr, + GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + if (queue) + break; + } + + if (!num) + return NULL; + + if (!queue) { + /* Try to get a single page. You are my only hope! */ + queue = vring_alloc_queue(vdev, vring_size(num, vring_align), + &dma_addr, GFP_KERNEL|__GFP_ZERO); + } + if (!queue) + return NULL; + + queue_size_in_bytes = vring_size(num, vring_align); + vring_init(&vring, num, queue, vring_align); + + vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, + notify, callback, name); + if (!vq) { + vring_free_queue(vdev, queue_size_in_bytes, queue, + dma_addr); + return NULL; + } + + to_vvq(vq)->split.queue_dma_addr = dma_addr; + to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; + to_vvq(vq)->we_own_ring = true; + + return vq; +} + /* * Generic functions and exported symbols. @@ -1091,8 +1197,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->vq.num_free = vring.num; vq->vq.index = index; vq->we_own_ring = false; - vq->queue_dma_addr = 0; - vq->queue_size_in_bytes = 0; vq->notify = notify; vq->weak_barriers = weak_barriers; vq->broken = false; @@ -1108,6 +1212,9 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, !context; vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); + vq->split.queue_dma_addr = 0; + vq->split.queue_size_in_bytes = 0; + vq->split.vring = vring; vq->split.avail_flags_shadow = 0; vq->split.avail_idx_shadow = 0; @@ -1138,48 +1245,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, } EXPORT_SYMBOL_GPL(__vring_new_virtqueue); -static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) -{ - if (vring_use_dma_api(vdev)) { - return dma_alloc_coherent(vdev->dev.parent, size, - dma_handle, flag); - } else { - void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); - if (queue) { - phys_addr_t phys_addr = virt_to_phys(queue); - *dma_handle = (dma_addr_t)phys_addr; - - /* - * Sanity check: make sure we dind't truncate - * the address. The only arches I can find that - * have 64-bit phys_addr_t but 32-bit dma_addr_t - * are certain non-highmem MIPS and x86 - * configurations, but these configurations - * should never allocate physical pages above 32 - * bits, so this is fine. Just in case, throw a - * warning and abort if we end up with an - * unrepresentable address. - */ - if (WARN_ON_ONCE(*dma_handle != phys_addr)) { - free_pages_exact(queue, PAGE_ALIGN(size)); - return NULL; - } - } - return queue; - } -} - -static void vring_free_queue(struct virtio_device *vdev, size_t size, - void *queue, dma_addr_t dma_handle) -{ - if (vring_use_dma_api(vdev)) { - dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); - } else { - free_pages_exact(queue, PAGE_ALIGN(size)); - } -} - struct virtqueue *vring_create_virtqueue( unsigned int index, unsigned int num, @@ -1192,54 +1257,9 @@ struct virtqueue *vring_create_virtqueue( void (*callback)(struct virtqueue *), const char *name) { - struct virtqueue *vq; - void *queue = NULL; - dma_addr_t dma_addr; - size_t queue_size_in_bytes; - struct vring vring; - - /* We assume num is a power of 2. */ - if (num & (num - 1)) { - dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); - return NULL; - } - - /* TODO: allocate each queue chunk individually */ - for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { - queue = vring_alloc_queue(vdev, vring_size(num, vring_align), - &dma_addr, - GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); - if (queue) - break; - } - - if (!num) - return NULL; - - if (!queue) { - /* Try to get a single page. You are my only hope! */ - queue = vring_alloc_queue(vdev, vring_size(num, vring_align), - &dma_addr, GFP_KERNEL|__GFP_ZERO); - } - if (!queue) - return NULL; - - queue_size_in_bytes = vring_size(num, vring_align); - vring_init(&vring, num, queue, vring_align); - - vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, - notify, callback, name); - if (!vq) { - vring_free_queue(vdev, queue_size_in_bytes, queue, - dma_addr); - return NULL; - } - - to_vvq(vq)->queue_dma_addr = dma_addr; - to_vvq(vq)->queue_size_in_bytes = queue_size_in_bytes; - to_vvq(vq)->we_own_ring = true; - - return vq; + return vring_create_virtqueue_split(index, num, vring_align, + vdev, weak_barriers, may_reduce_num, + context, notify, callback, name); } EXPORT_SYMBOL_GPL(vring_create_virtqueue); @@ -1266,8 +1286,10 @@ void vring_del_virtqueue(struct virtqueue *_vq) struct vring_virtqueue *vq = to_vvq(_vq); if (vq->we_own_ring) { - vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes, - vq->split.vring.desc, vq->queue_dma_addr); + vring_free_queue(vq->vq.vdev, + vq->split.queue_size_in_bytes, + vq->split.vring.desc, + vq->split.queue_dma_addr); kfree(vq->split.desc_state); } list_del(&_vq->list); @@ -1343,7 +1365,7 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) BUG_ON(!vq->we_own_ring); - return vq->queue_dma_addr; + return vq->split.queue_dma_addr; } EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); @@ -1353,7 +1375,7 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) BUG_ON(!vq->we_own_ring); - return vq->queue_dma_addr + + return vq->split.queue_dma_addr + ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); } EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); @@ -1364,7 +1386,7 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) BUG_ON(!vq->we_own_ring); - return vq->queue_dma_addr + + return vq->split.queue_dma_addr + ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); } EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); From fb3fba6b162aaa42aeba6e9034f3e92716c2a749 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:26 +0800 Subject: [PATCH 09/13] virtio_ring: cache whether we will use DMA API Cache whether we will use DMA API, instead of doing the check every time. We are going to check whether DMA API is used more often in packed ring. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index d00a87909a7e..aafe1969b45e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -86,6 +86,9 @@ struct vring_desc_state_split { struct vring_virtqueue { struct virtqueue vq; + /* Is DMA API used? */ + bool use_dma_api; + /* Can we use weak barriers? */ bool weak_barriers; @@ -262,7 +265,7 @@ static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, enum dma_data_direction direction) { - if (!vring_use_dma_api(vq->vq.vdev)) + if (!vq->use_dma_api) return (dma_addr_t)sg_phys(sg); /* @@ -279,7 +282,7 @@ static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, void *cpu_addr, size_t size, enum dma_data_direction direction) { - if (!vring_use_dma_api(vq->vq.vdev)) + if (!vq->use_dma_api) return (dma_addr_t)virt_to_phys(cpu_addr); return dma_map_single(vring_dma_dev(vq), @@ -289,7 +292,7 @@ static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, static int vring_mapping_error(const struct vring_virtqueue *vq, dma_addr_t addr) { - if (!vring_use_dma_api(vq->vq.vdev)) + if (!vq->use_dma_api) return 0; return dma_mapping_error(vring_dma_dev(vq), addr); @@ -305,7 +308,7 @@ static void vring_unmap_one_split(const struct vring_virtqueue *vq, { u16 flags; - if (!vring_use_dma_api(vq->vq.vdev)) + if (!vq->use_dma_api) return; flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); @@ -1202,6 +1205,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->broken = false; vq->last_used_idx = 0; vq->num_added = 0; + vq->use_dma_api = vring_use_dma_api(vdev); list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; From 1ce9e6055fa0a9043405c5604cf19169ec5379ff Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:27 +0800 Subject: [PATCH 10/13] virtio_ring: introduce packed ring support Introduce the packed ring support. Packed ring can only be created by vring_create_virtqueue() and each chunk of packed ring will be allocated individually. Packed ring can not be created on preallocated memory by vring_new_virtqueue() or the likes currently. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 900 +++++++++++++++++++++++++++++++++-- 1 file changed, 870 insertions(+), 30 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index aafe1969b45e..b63eee2034e7 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -83,9 +83,26 @@ struct vring_desc_state_split { struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ }; +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + struct vring_virtqueue { struct virtqueue vq; + /* Is this a packed ring? */ + bool packed_ring; + /* Is DMA API used? */ bool use_dma_api; @@ -109,23 +126,64 @@ struct vring_virtqueue { /* Last used index we've seen. */ u16 last_used_idx; - struct { - /* Actual memory layout for this queue */ - struct vring vring; + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; - /* Last written value to avail->flags */ - u16 avail_flags_shadow; + /* Last written value to avail->flags */ + u16 avail_flags_shadow; - /* Last written value to avail->idx in guest byte order */ - u16 avail_idx_shadow; + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; - /* Per-descriptor state. */ - struct vring_desc_state_split *desc_state; + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; - /* DMA, allocation, and size information */ - size_t queue_size_in_bytes; - dma_addr_t queue_dma_addr; - } split; + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring_packed vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_extra_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; /* How to notify other side. FIXME: commonalize hcalls! */ bool (*notify)(struct virtqueue *vq); @@ -840,6 +898,717 @@ static struct virtqueue *vring_create_virtqueue_split( } +/* + * Packed ring specific functions - *_packed(). + */ + +static void vring_unmap_state_packed(const struct vring_virtqueue *vq, + struct vring_desc_extra_packed *state) +{ + u16 flags; + + if (!vq->use_dma_api) + return; + + flags = state->flags; + + if (flags & VRING_DESC_F_INDIRECT) { + dma_unmap_single(vring_dma_dev(vq), + state->addr, state->len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } else { + dma_unmap_page(vring_dma_dev(vq), + state->addr, state->len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } +} + +static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, + struct vring_packed_desc *desc) +{ + u16 flags; + + if (!vq->use_dma_api) + return; + + flags = le16_to_cpu(desc->flags); + + if (flags & VRING_DESC_F_INDIRECT) { + dma_unmap_single(vring_dma_dev(vq), + le64_to_cpu(desc->addr), + le32_to_cpu(desc->len), + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } else { + dma_unmap_page(vring_dma_dev(vq), + le64_to_cpu(desc->addr), + le32_to_cpu(desc->len), + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } +} + +static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, + gfp_t gfp) +{ + struct vring_packed_desc *desc; + + /* + * We require lowmem mappings for the descriptors because + * otherwise virt_to_phys will give us bogus addresses in the + * virtqueue. + */ + gfp &= ~__GFP_HIGHMEM; + + desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); + + return desc; +} + +static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp) +{ + struct vring_packed_desc *desc; + struct scatterlist *sg; + unsigned int i, n, err_idx; + u16 head, id; + dma_addr_t addr; + + head = vq->packed.next_avail_idx; + desc = alloc_indirect_packed(total_sg, gfp); + + if (unlikely(vq->vq.num_free < 1)) { + pr_debug("Can't add buf len 1 - avail = 0\n"); + END_USE(vq); + return -ENOSPC; + } + + i = 0; + id = vq->free_head; + BUG_ON(id == vq->packed.vring.num); + + for (n = 0; n < out_sgs + in_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + addr = vring_map_one_sg(vq, sg, n < out_sgs ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + if (vring_mapping_error(vq, addr)) + goto unmap_release; + + desc[i].flags = cpu_to_le16(n < out_sgs ? + 0 : VRING_DESC_F_WRITE); + desc[i].addr = cpu_to_le64(addr); + desc[i].len = cpu_to_le32(sg->length); + i++; + } + } + + /* Now that the indirect table is filled in, map it. */ + addr = vring_map_single(vq, desc, + total_sg * sizeof(struct vring_packed_desc), + DMA_TO_DEVICE); + if (vring_mapping_error(vq, addr)) + goto unmap_release; + + vq->packed.vring.desc[head].addr = cpu_to_le64(addr); + vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * + sizeof(struct vring_packed_desc)); + vq->packed.vring.desc[head].id = cpu_to_le16(id); + + if (vq->use_dma_api) { + vq->packed.desc_extra[id].addr = addr; + vq->packed.desc_extra[id].len = total_sg * + sizeof(struct vring_packed_desc); + vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | + vq->packed.avail_used_flags; + } + + /* + * A driver MUST NOT make the first descriptor in the list + * available before all subsequent descriptors comprising + * the list are made available. + */ + virtio_wmb(vq->weak_barriers); + vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | + vq->packed.avail_used_flags); + + /* We're using some buffers from the free list. */ + vq->vq.num_free -= 1; + + /* Update free pointer */ + n = head + 1; + if (n >= vq->packed.vring.num) { + n = 0; + vq->packed.avail_wrap_counter ^= 1; + vq->packed.avail_used_flags ^= + 1 << VRING_PACKED_DESC_F_AVAIL | + 1 << VRING_PACKED_DESC_F_USED; + } + vq->packed.next_avail_idx = n; + vq->free_head = vq->packed.desc_state[id].next; + + /* Store token and indirect buffer state. */ + vq->packed.desc_state[id].num = 1; + vq->packed.desc_state[id].data = data; + vq->packed.desc_state[id].indir_desc = desc; + vq->packed.desc_state[id].last = id; + + vq->num_added += 1; + + pr_debug("Added buffer head %i to %p\n", head, vq); + END_USE(vq); + + return 0; + +unmap_release: + err_idx = i; + + for (i = 0; i < err_idx; i++) + vring_unmap_desc_packed(vq, &desc[i]); + + kfree(desc); + + END_USE(vq); + return -EIO; +} + +static inline int virtqueue_add_packed(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_packed_desc *desc; + struct scatterlist *sg; + unsigned int i, n, c, descs_used, err_idx; + __le16 uninitialized_var(head_flags), flags; + u16 head, id, uninitialized_var(prev), curr, avail_used_flags; + + START_USE(vq); + + BUG_ON(data == NULL); + BUG_ON(ctx && vq->indirect); + + if (unlikely(vq->broken)) { + END_USE(vq); + return -EIO; + } + + LAST_ADD_TIME_UPDATE(vq); + + BUG_ON(total_sg == 0); + + if (virtqueue_use_indirect(_vq, total_sg)) + return virtqueue_add_indirect_packed(vq, sgs, total_sg, + out_sgs, in_sgs, data, gfp); + + head = vq->packed.next_avail_idx; + avail_used_flags = vq->packed.avail_used_flags; + + WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); + + desc = vq->packed.vring.desc; + i = head; + descs_used = total_sg; + + if (unlikely(vq->vq.num_free < descs_used)) { + pr_debug("Can't add buf len %i - avail = %i\n", + descs_used, vq->vq.num_free); + END_USE(vq); + return -ENOSPC; + } + + id = vq->free_head; + BUG_ON(id == vq->packed.vring.num); + + curr = id; + c = 0; + for (n = 0; n < out_sgs + in_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + if (vring_mapping_error(vq, addr)) + goto unmap_release; + + flags = cpu_to_le16(vq->packed.avail_used_flags | + (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | + (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); + if (i == head) + head_flags = flags; + else + desc[i].flags = flags; + + desc[i].addr = cpu_to_le64(addr); + desc[i].len = cpu_to_le32(sg->length); + desc[i].id = cpu_to_le16(id); + + if (unlikely(vq->use_dma_api)) { + vq->packed.desc_extra[curr].addr = addr; + vq->packed.desc_extra[curr].len = sg->length; + vq->packed.desc_extra[curr].flags = + le16_to_cpu(flags); + } + prev = curr; + curr = vq->packed.desc_state[curr].next; + + if ((unlikely(++i >= vq->packed.vring.num))) { + i = 0; + vq->packed.avail_used_flags ^= + 1 << VRING_PACKED_DESC_F_AVAIL | + 1 << VRING_PACKED_DESC_F_USED; + } + } + } + + if (i < head) + vq->packed.avail_wrap_counter ^= 1; + + /* We're using some buffers from the free list. */ + vq->vq.num_free -= descs_used; + + /* Update free pointer */ + vq->packed.next_avail_idx = i; + vq->free_head = curr; + + /* Store token. */ + vq->packed.desc_state[id].num = descs_used; + vq->packed.desc_state[id].data = data; + vq->packed.desc_state[id].indir_desc = ctx; + vq->packed.desc_state[id].last = prev; + + /* + * A driver MUST NOT make the first descriptor in the list + * available before all subsequent descriptors comprising + * the list are made available. + */ + virtio_wmb(vq->weak_barriers); + vq->packed.vring.desc[head].flags = head_flags; + vq->num_added += descs_used; + + pr_debug("Added buffer head %i to %p\n", head, vq); + END_USE(vq); + + return 0; + +unmap_release: + err_idx = i; + i = head; + + vq->packed.avail_used_flags = avail_used_flags; + + for (n = 0; n < total_sg; n++) { + if (i == err_idx) + break; + vring_unmap_desc_packed(vq, &desc[i]); + i++; + if (i >= vq->packed.vring.num) + i = 0; + } + + END_USE(vq); + return -EIO; +} + +static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 flags; + bool needs_kick; + union { + struct { + __le16 off_wrap; + __le16 flags; + }; + u32 u32; + } snapshot; + + START_USE(vq); + + /* + * We need to expose the new flags value before checking notification + * suppressions. + */ + virtio_mb(vq->weak_barriers); + + vq->num_added = 0; + + snapshot.u32 = *(u32 *)vq->packed.vring.device; + flags = le16_to_cpu(snapshot.flags); + + LAST_ADD_TIME_CHECK(vq); + LAST_ADD_TIME_INVALID(vq); + + needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); + END_USE(vq); + return needs_kick; +} + +static void detach_buf_packed(struct vring_virtqueue *vq, + unsigned int id, void **ctx) +{ + struct vring_desc_state_packed *state = NULL; + struct vring_packed_desc *desc; + unsigned int i, curr; + + state = &vq->packed.desc_state[id]; + + /* Clear data ptr. */ + state->data = NULL; + + vq->packed.desc_state[state->last].next = vq->free_head; + vq->free_head = id; + vq->vq.num_free += state->num; + + if (unlikely(vq->use_dma_api)) { + curr = id; + for (i = 0; i < state->num; i++) { + vring_unmap_state_packed(vq, + &vq->packed.desc_extra[curr]); + curr = vq->packed.desc_state[curr].next; + } + } + + if (vq->indirect) { + u32 len; + + /* Free the indirect table, if any, now that it's unmapped. */ + desc = state->indir_desc; + if (!desc) + return; + + if (vq->use_dma_api) { + len = vq->packed.desc_extra[id].len; + for (i = 0; i < len / sizeof(struct vring_packed_desc); + i++) + vring_unmap_desc_packed(vq, &desc[i]); + } + kfree(desc); + state->indir_desc = NULL; + } else if (ctx) { + *ctx = state->indir_desc; + } +} + +static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, + u16 idx, bool used_wrap_counter) +{ + bool avail, used; + u16 flags; + + flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); + avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); + used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); + + return avail == used && used == used_wrap_counter; +} + +static inline bool more_used_packed(const struct vring_virtqueue *vq) +{ + return is_used_desc_packed(vq, vq->last_used_idx, + vq->packed.used_wrap_counter); +} + +static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, + unsigned int *len, + void **ctx) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 last_used, id; + void *ret; + + START_USE(vq); + + if (unlikely(vq->broken)) { + END_USE(vq); + return NULL; + } + + if (!more_used_packed(vq)) { + pr_debug("No more buffers in queue\n"); + END_USE(vq); + return NULL; + } + + /* Only get used elements after they have been exposed by host. */ + virtio_rmb(vq->weak_barriers); + + last_used = vq->last_used_idx; + id = le16_to_cpu(vq->packed.vring.desc[last_used].id); + *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); + + if (unlikely(id >= vq->packed.vring.num)) { + BAD_RING(vq, "id %u out of range\n", id); + return NULL; + } + if (unlikely(!vq->packed.desc_state[id].data)) { + BAD_RING(vq, "id %u is not a head!\n", id); + return NULL; + } + + /* detach_buf_packed clears data, so grab it now. */ + ret = vq->packed.desc_state[id].data; + detach_buf_packed(vq, id, ctx); + + vq->last_used_idx += vq->packed.desc_state[id].num; + if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { + vq->last_used_idx -= vq->packed.vring.num; + vq->packed.used_wrap_counter ^= 1; + } + + LAST_ADD_TIME_INVALID(vq); + + END_USE(vq); + return ret; +} + +static void virtqueue_disable_cb_packed(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { + vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; + vq->packed.vring.driver->flags = + cpu_to_le16(vq->packed.event_flags_shadow); + } +} + +static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + START_USE(vq); + + /* + * We optimistically turn back on interrupts, then check if there was + * more to do. + */ + + if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { + vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_ENABLE; + vq->packed.vring.driver->flags = + cpu_to_le16(vq->packed.event_flags_shadow); + } + + END_USE(vq); + return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << + VRING_PACKED_EVENT_F_WRAP_CTR); +} + +static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + bool wrap_counter; + u16 used_idx; + + wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; + used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); + + return is_used_desc_packed(vq, used_idx, wrap_counter); +} + +static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 used_idx, wrap_counter; + + START_USE(vq); + + /* + * We optimistically turn back on interrupts, then check if there was + * more to do. + */ + + used_idx = vq->last_used_idx; + wrap_counter = vq->packed.used_wrap_counter; + + if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { + vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_ENABLE; + vq->packed.vring.driver->flags = + cpu_to_le16(vq->packed.event_flags_shadow); + } + + /* + * We need to update event suppression structure first + * before re-checking for more used buffers. + */ + virtio_mb(vq->weak_barriers); + + if (is_used_desc_packed(vq, used_idx, wrap_counter)) { + END_USE(vq); + return false; + } + + END_USE(vq); + return true; +} + +static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + unsigned int i; + void *buf; + + START_USE(vq); + + for (i = 0; i < vq->packed.vring.num; i++) { + if (!vq->packed.desc_state[i].data) + continue; + /* detach_buf clears data, so grab it now. */ + buf = vq->packed.desc_state[i].data; + detach_buf_packed(vq, i, NULL); + END_USE(vq); + return buf; + } + /* That should have freed everything. */ + BUG_ON(vq->vq.num_free != vq->packed.vring.num); + + END_USE(vq); + return NULL; +} + +static struct virtqueue *vring_create_virtqueue_packed( + unsigned int index, + unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + bool weak_barriers, + bool may_reduce_num, + bool context, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name) +{ + struct vring_virtqueue *vq; + struct vring_packed_desc *ring; + struct vring_packed_desc_event *driver, *device; + dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; + size_t ring_size_in_bytes, event_size_in_bytes; + unsigned int i; + + ring_size_in_bytes = num * sizeof(struct vring_packed_desc); + + ring = vring_alloc_queue(vdev, ring_size_in_bytes, + &ring_dma_addr, + GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + if (!ring) + goto err_ring; + + event_size_in_bytes = sizeof(struct vring_packed_desc_event); + + driver = vring_alloc_queue(vdev, event_size_in_bytes, + &driver_event_dma_addr, + GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + if (!driver) + goto err_driver; + + device = vring_alloc_queue(vdev, event_size_in_bytes, + &device_event_dma_addr, + GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + if (!device) + goto err_device; + + vq = kmalloc(sizeof(*vq), GFP_KERNEL); + if (!vq) + goto err_vq; + + vq->vq.callback = callback; + vq->vq.vdev = vdev; + vq->vq.name = name; + vq->vq.num_free = num; + vq->vq.index = index; + vq->we_own_ring = true; + vq->notify = notify; + vq->weak_barriers = weak_barriers; + vq->broken = false; + vq->last_used_idx = 0; + vq->num_added = 0; + vq->packed_ring = true; + vq->use_dma_api = vring_use_dma_api(vdev); + list_add_tail(&vq->vq.list, &vdev->vqs); +#ifdef DEBUG + vq->in_use = false; + vq->last_add_time_valid = false; +#endif + + vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && + !context; + vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); + + vq->packed.ring_dma_addr = ring_dma_addr; + vq->packed.driver_event_dma_addr = driver_event_dma_addr; + vq->packed.device_event_dma_addr = device_event_dma_addr; + + vq->packed.ring_size_in_bytes = ring_size_in_bytes; + vq->packed.event_size_in_bytes = event_size_in_bytes; + + vq->packed.vring.num = num; + vq->packed.vring.desc = ring; + vq->packed.vring.driver = driver; + vq->packed.vring.device = device; + + vq->packed.next_avail_idx = 0; + vq->packed.avail_wrap_counter = 1; + vq->packed.used_wrap_counter = 1; + vq->packed.event_flags_shadow = 0; + vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; + + vq->packed.desc_state = kmalloc_array(num, + sizeof(struct vring_desc_state_packed), + GFP_KERNEL); + if (!vq->packed.desc_state) + goto err_desc_state; + + memset(vq->packed.desc_state, 0, + num * sizeof(struct vring_desc_state_packed)); + + /* Put everything in free lists. */ + vq->free_head = 0; + for (i = 0; i < num-1; i++) + vq->packed.desc_state[i].next = i + 1; + + vq->packed.desc_extra = kmalloc_array(num, + sizeof(struct vring_desc_extra_packed), + GFP_KERNEL); + if (!vq->packed.desc_extra) + goto err_desc_extra; + + memset(vq->packed.desc_extra, 0, + num * sizeof(struct vring_desc_extra_packed)); + + /* No callback? Tell other side not to bother us. */ + if (!callback) { + vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; + vq->packed.vring.driver->flags = + cpu_to_le16(vq->packed.event_flags_shadow); + } + + return &vq->vq; + +err_desc_extra: + kfree(vq->packed.desc_state); +err_desc_state: + kfree(vq); +err_vq: + vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); +err_device: + vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); +err_driver: + vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); +err_ring: + return NULL; +} + + /* * Generic functions and exported symbols. */ @@ -853,8 +1622,12 @@ static inline int virtqueue_add(struct virtqueue *_vq, void *ctx, gfp_t gfp) { - return virtqueue_add_split(_vq, sgs, total_sg, - out_sgs, in_sgs, data, ctx, gfp); + struct vring_virtqueue *vq = to_vvq(_vq); + + return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, + out_sgs, in_sgs, data, ctx, gfp) : + virtqueue_add_split(_vq, sgs, total_sg, + out_sgs, in_sgs, data, ctx, gfp); } /** @@ -973,7 +1746,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); */ bool virtqueue_kick_prepare(struct virtqueue *_vq) { - return virtqueue_kick_prepare_split(_vq); + struct vring_virtqueue *vq = to_vvq(_vq); + + return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : + virtqueue_kick_prepare_split(_vq); } EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); @@ -1040,7 +1816,10 @@ EXPORT_SYMBOL_GPL(virtqueue_kick); void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, void **ctx) { - return virtqueue_get_buf_ctx_split(_vq, len, ctx); + struct vring_virtqueue *vq = to_vvq(_vq); + + return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : + virtqueue_get_buf_ctx_split(_vq, len, ctx); } EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); @@ -1049,7 +1828,6 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) return virtqueue_get_buf_ctx(_vq, len, NULL); } EXPORT_SYMBOL_GPL(virtqueue_get_buf); - /** * virtqueue_disable_cb - disable callbacks * @vq: the struct virtqueue we're talking about. @@ -1061,7 +1839,12 @@ EXPORT_SYMBOL_GPL(virtqueue_get_buf); */ void virtqueue_disable_cb(struct virtqueue *_vq) { - virtqueue_disable_cb_split(_vq); + struct vring_virtqueue *vq = to_vvq(_vq); + + if (vq->packed_ring) + virtqueue_disable_cb_packed(_vq); + else + virtqueue_disable_cb_split(_vq); } EXPORT_SYMBOL_GPL(virtqueue_disable_cb); @@ -1079,7 +1862,10 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb); */ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) { - return virtqueue_enable_cb_prepare_split(_vq); + struct vring_virtqueue *vq = to_vvq(_vq); + + return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : + virtqueue_enable_cb_prepare_split(_vq); } EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); @@ -1097,7 +1883,8 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) struct vring_virtqueue *vq = to_vvq(_vq); virtio_mb(vq->weak_barriers); - return virtqueue_poll_split(_vq, last_used_idx); + return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : + virtqueue_poll_split(_vq, last_used_idx); } EXPORT_SYMBOL_GPL(virtqueue_poll); @@ -1135,7 +1922,10 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb); */ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) { - return virtqueue_enable_cb_delayed_split(_vq); + struct vring_virtqueue *vq = to_vvq(_vq); + + return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : + virtqueue_enable_cb_delayed_split(_vq); } EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); @@ -1149,13 +1939,16 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); */ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) { - return virtqueue_detach_unused_buf_split(_vq); + struct vring_virtqueue *vq = to_vvq(_vq); + + return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : + virtqueue_detach_unused_buf_split(_vq); } EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); static inline bool more_used(const struct vring_virtqueue *vq) { - return more_used_split(vq); + return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); } irqreturn_t vring_interrupt(int irq, void *_vq) @@ -1178,6 +1971,7 @@ irqreturn_t vring_interrupt(int irq, void *_vq) } EXPORT_SYMBOL_GPL(vring_interrupt); +/* Only available for split ring */ struct virtqueue *__vring_new_virtqueue(unsigned int index, struct vring vring, struct virtio_device *vdev, @@ -1190,10 +1984,14 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, unsigned int i; struct vring_virtqueue *vq; + if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) + return NULL; + vq = kmalloc(sizeof(*vq), GFP_KERNEL); if (!vq) return NULL; + vq->packed_ring = false; vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.name = name; @@ -1261,12 +2059,19 @@ struct virtqueue *vring_create_virtqueue( void (*callback)(struct virtqueue *), const char *name) { + + if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) + return vring_create_virtqueue_packed(index, num, vring_align, + vdev, weak_barriers, may_reduce_num, + context, notify, callback, name); + return vring_create_virtqueue_split(index, num, vring_align, vdev, weak_barriers, may_reduce_num, context, notify, callback, name); } EXPORT_SYMBOL_GPL(vring_create_virtqueue); +/* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, unsigned int vring_align, @@ -1279,6 +2084,10 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, const char *name) { struct vring vring; + + if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) + return NULL; + vring_init(&vring, num, pages, vring_align); return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, notify, callback, name); @@ -1290,11 +2099,32 @@ void vring_del_virtqueue(struct virtqueue *_vq) struct vring_virtqueue *vq = to_vvq(_vq); if (vq->we_own_ring) { - vring_free_queue(vq->vq.vdev, - vq->split.queue_size_in_bytes, - vq->split.vring.desc, - vq->split.queue_dma_addr); - kfree(vq->split.desc_state); + if (vq->packed_ring) { + vring_free_queue(vq->vq.vdev, + vq->packed.ring_size_in_bytes, + vq->packed.vring.desc, + vq->packed.ring_dma_addr); + + vring_free_queue(vq->vq.vdev, + vq->packed.event_size_in_bytes, + vq->packed.vring.driver, + vq->packed.driver_event_dma_addr); + + vring_free_queue(vq->vq.vdev, + vq->packed.event_size_in_bytes, + vq->packed.vring.device, + vq->packed.device_event_dma_addr); + + kfree(vq->packed.desc_state); + kfree(vq->packed.desc_extra); + } else { + vring_free_queue(vq->vq.vdev, + vq->split.queue_size_in_bytes, + vq->split.vring.desc, + vq->split.queue_dma_addr); + + kfree(vq->split.desc_state); + } } list_del(&_vq->list); kfree(vq); @@ -1336,7 +2166,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) struct vring_virtqueue *vq = to_vvq(_vq); - return vq->split.vring.num; + return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; } EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); @@ -1369,6 +2199,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) BUG_ON(!vq->we_own_ring); + if (vq->packed_ring) + return vq->packed.ring_dma_addr; + return vq->split.queue_dma_addr; } EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); @@ -1379,6 +2212,9 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) BUG_ON(!vq->we_own_ring); + if (vq->packed_ring) + return vq->packed.driver_event_dma_addr; + return vq->split.queue_dma_addr + ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); } @@ -1390,11 +2226,15 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) BUG_ON(!vq->we_own_ring); + if (vq->packed_ring) + return vq->packed.device_event_dma_addr; + return vq->split.queue_dma_addr + ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); } EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); +/* Only available for split ring */ const struct vring *virtqueue_get_vring(struct virtqueue *vq) { return &to_vvq(vq)->split.vring; From f51f982682e2a612fa642dfee47cc8da63677148 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:28 +0800 Subject: [PATCH 11/13] virtio_ring: leverage event idx in packed ring Leverage the EVENT_IDX feature in packed ring to suppress events when it's available. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 77 +++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 6 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b63eee2034e7..40e4d3798d16 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1222,7 +1222,7 @@ unmap_release: static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - u16 flags; + u16 new, old, off_wrap, flags, wrap_counter, event_idx; bool needs_kick; union { struct { @@ -1240,6 +1240,8 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) */ virtio_mb(vq->weak_barriers); + old = vq->packed.next_avail_idx - vq->num_added; + new = vq->packed.next_avail_idx; vq->num_added = 0; snapshot.u32 = *(u32 *)vq->packed.vring.device; @@ -1248,7 +1250,20 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) LAST_ADD_TIME_CHECK(vq); LAST_ADD_TIME_INVALID(vq); - needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); + if (flags != VRING_PACKED_EVENT_FLAG_DESC) { + needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); + goto out; + } + + off_wrap = le16_to_cpu(snapshot.off_wrap); + + wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; + event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); + if (wrap_counter != vq->packed.avail_wrap_counter) + event_idx -= vq->packed.vring.num; + + needs_kick = vring_need_event(event_idx, new, old); +out: END_USE(vq); return needs_kick; } @@ -1365,6 +1380,18 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, vq->packed.used_wrap_counter ^= 1; } + /* + * If we expect an interrupt for the next entry, tell host + * by writing event index and flush out the write before + * the read in the next get_buf call. + */ + if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) + virtio_store_mb(vq->weak_barriers, + &vq->packed.vring.driver->off_wrap, + cpu_to_le16(vq->last_used_idx | + (vq->packed.used_wrap_counter << + VRING_PACKED_EVENT_F_WRAP_CTR))); + LAST_ADD_TIME_INVALID(vq); END_USE(vq); @@ -1393,8 +1420,22 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) * more to do. */ + if (vq->event) { + vq->packed.vring.driver->off_wrap = + cpu_to_le16(vq->last_used_idx | + (vq->packed.used_wrap_counter << + VRING_PACKED_EVENT_F_WRAP_CTR)); + /* + * We need to update event offset and event wrap + * counter first before updating event flags. + */ + virtio_wmb(vq->weak_barriers); + } + if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { - vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_ENABLE; + vq->packed.event_flags_shadow = vq->event ? + VRING_PACKED_EVENT_FLAG_DESC : + VRING_PACKED_EVENT_FLAG_ENABLE; vq->packed.vring.driver->flags = cpu_to_le16(vq->packed.event_flags_shadow); } @@ -1420,6 +1461,7 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 used_idx, wrap_counter; + u16 bufs; START_USE(vq); @@ -1428,11 +1470,34 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) * more to do. */ - used_idx = vq->last_used_idx; - wrap_counter = vq->packed.used_wrap_counter; + if (vq->event) { + /* TODO: tune this threshold */ + bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; + wrap_counter = vq->packed.used_wrap_counter; + + used_idx = vq->last_used_idx + bufs; + if (used_idx >= vq->packed.vring.num) { + used_idx -= vq->packed.vring.num; + wrap_counter ^= 1; + } + + vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | + (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); + + /* + * We need to update event offset and event wrap + * counter first before updating event flags. + */ + virtio_wmb(vq->weak_barriers); + } else { + used_idx = vq->last_used_idx; + wrap_counter = vq->packed.used_wrap_counter; + } if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { - vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_ENABLE; + vq->packed.event_flags_shadow = vq->event ? + VRING_PACKED_EVENT_FLAG_DESC : + VRING_PACKED_EVENT_FLAG_ENABLE; vq->packed.vring.driver->flags = cpu_to_le16(vq->packed.event_flags_shadow); } From 3a814fdf271552a5c4641f812ff7e2dd8ad09ee4 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:29 +0800 Subject: [PATCH 12/13] virtio_ring: disable packed ring on unsupported transports Currently, ccw, vop and remoteproc need some legacy virtio APIs to create or access virtio rings, which are not supported by packed ring. So disable packed ring on these transports for now. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/misc/mic/vop/vop_main.c | 13 +++++++++++++ drivers/remoteproc/remoteproc_virtio.c | 13 +++++++++++++ drivers/s390/virtio/virtio_ccw.c | 14 ++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c index 3633202e18f4..6b212c8b78e7 100644 --- a/drivers/misc/mic/vop/vop_main.c +++ b/drivers/misc/mic/vop/vop_main.c @@ -129,6 +129,16 @@ static u64 vop_get_features(struct virtio_device *vdev) return features; } +static void vop_transport_features(struct virtio_device *vdev) +{ + /* + * Packed ring isn't enabled on virtio_vop for now, + * because virtio_vop uses vring_new_virtqueue() which + * creates virtio rings on preallocated memory. + */ + __virtio_clear_bit(vdev, VIRTIO_F_RING_PACKED); +} + static int vop_finalize_features(struct virtio_device *vdev) { unsigned int i, bits; @@ -141,6 +151,9 @@ static int vop_finalize_features(struct virtio_device *vdev) /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); + /* Give virtio_vop a chance to accept features. */ + vop_transport_features(vdev); + memset_io(out_features, 0, feature_len); bits = min_t(unsigned, feature_len, sizeof(vdev->features)) * 8; diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index de21f620b882..183fc42a510a 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -214,6 +214,16 @@ static u64 rproc_virtio_get_features(struct virtio_device *vdev) return rsc->dfeatures; } +static void rproc_transport_features(struct virtio_device *vdev) +{ + /* + * Packed ring isn't enabled on remoteproc for now, + * because remoteproc uses vring_new_virtqueue() which + * creates virtio rings on preallocated memory. + */ + __virtio_clear_bit(vdev, VIRTIO_F_RING_PACKED); +} + static int rproc_virtio_finalize_features(struct virtio_device *vdev) { struct rproc_vdev *rvdev = vdev_to_rvdev(vdev); @@ -224,6 +234,9 @@ static int rproc_virtio_finalize_features(struct virtio_device *vdev) /* Give virtio_ring a chance to accept features */ vring_transport_features(vdev); + /* Give virtio_rproc a chance to accept features. */ + rproc_transport_features(vdev); + /* Make sure we don't have any features > 32 bits! */ BUG_ON((u32)vdev->features != vdev->features); diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 97b6f197f007..406d1f64ad65 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -765,6 +765,17 @@ out_free: return rc; } +static void ccw_transport_features(struct virtio_device *vdev) +{ + /* + * Packed ring isn't enabled on virtio_ccw for now, + * because virtio_ccw uses some legacy accessors, + * e.g. virtqueue_get_avail() and virtqueue_get_used() + * which aren't available in packed ring currently. + */ + __virtio_clear_bit(vdev, VIRTIO_F_RING_PACKED); +} + static int virtio_ccw_finalize_features(struct virtio_device *vdev) { struct virtio_ccw_device *vcdev = to_vc_device(vdev); @@ -791,6 +802,9 @@ static int virtio_ccw_finalize_features(struct virtio_device *vdev) /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); + /* Give virtio_ccw a chance to accept features. */ + ccw_transport_features(vdev); + features->index = 0; features->features = cpu_to_le32((u32)vdev->features); /* Write the first half of the feature bits to the host. */ From f959a128fe83090981add69aadc87a4e496e9369 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:30 +0800 Subject: [PATCH 13/13] virtio_ring: advertize packed ring layout Advertize the packed ring layout support. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 40e4d3798d16..cd7e755484e3 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2211,6 +2211,8 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_F_IOMMU_PLATFORM: break; + case VIRTIO_F_RING_PACKED: + break; default: /* We don't understand this bit. */ __virtio_clear_bit(vdev, i);