From c011641ec4fcb61d1335f61b413117c1b7d83e5e Mon Sep 17 00:00:00 2001 From: Tuomas Tynkkynen Date: Mon, 15 Oct 2018 02:21:01 -0700 Subject: [PATCH] virtio: Add codes for virtual queue/ring management This adds support for managing virtual queue/ring, the channel for high performance I/O between host and guest. Signed-off-by: Tuomas Tynkkynen Signed-off-by: Bin Meng Reviewed-by: Simon Glass --- drivers/virtio/Makefile | 2 +- drivers/virtio/virtio_ring.c | 358 +++++++++++++++++++++++++++++++++++ include/virtio_ring.h | 320 +++++++++++++++++++++++++++++++ 3 files changed, 679 insertions(+), 1 deletion(-) create mode 100644 drivers/virtio/virtio_ring.c create mode 100644 include/virtio_ring.h diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 23e7be7165..17d264a771 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -3,4 +3,4 @@ # Copyright (C) 2018, Tuomas Tynkkynen # Copyright (C) 2018, Bin Meng -obj-y += virtio-uclass.o +obj-y += virtio-uclass.o virtio_ring.o diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c new file mode 100644 index 0000000000..0eeb3501c2 --- /dev/null +++ b/drivers/virtio/virtio_ring.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2018, Tuomas Tynkkynen + * Copyright (C) 2018, Bin Meng + * + * virtio ring implementation + */ + +#include +#include +#include +#include +#include +#include + +int virtqueue_add(struct virtqueue *vq, struct virtio_sg *sgs[], + unsigned int out_sgs, unsigned int in_sgs) +{ + struct vring_desc *desc; + unsigned int total_sg = out_sgs + in_sgs; + unsigned int i, n, avail, descs_used, uninitialized_var(prev); + int head; + + WARN_ON(total_sg == 0); + + head = vq->free_head; + + desc = vq->vring.desc; + i = head; + descs_used = total_sg; + + if (vq->num_free < descs_used) { + debug("Can't add buf len %i - avail = %i\n", + descs_used, vq->num_free); + /* + * FIXME: for historical reasons, we force a notify here if + * there are outgoing parts to the buffer. Presumably the + * host should service the ring ASAP. + */ + if (out_sgs) + virtio_notify(vq->vdev, vq); + return -ENOSPC; + } + + for (n = 0; n < out_sgs; n++) { + struct virtio_sg *sg = sgs[n]; + + desc[i].flags = cpu_to_virtio16(vq->vdev, VRING_DESC_F_NEXT); + desc[i].addr = cpu_to_virtio64(vq->vdev, (u64)(size_t)sg->addr); + desc[i].len = cpu_to_virtio32(vq->vdev, sg->length); + + prev = i; + i = virtio16_to_cpu(vq->vdev, desc[i].next); + } + for (; n < (out_sgs + in_sgs); n++) { + struct virtio_sg *sg = sgs[n]; + + desc[i].flags = cpu_to_virtio16(vq->vdev, VRING_DESC_F_NEXT | + VRING_DESC_F_WRITE); + desc[i].addr = cpu_to_virtio64(vq->vdev, + (u64)(uintptr_t)sg->addr); + desc[i].len = cpu_to_virtio32(vq->vdev, sg->length); + + prev = i; + i = virtio16_to_cpu(vq->vdev, desc[i].next); + } + /* Last one doesn't continue */ + desc[prev].flags &= cpu_to_virtio16(vq->vdev, ~VRING_DESC_F_NEXT); + + /* We're using some buffers from the free list. */ + vq->num_free -= descs_used; + + /* Update free pointer */ + vq->free_head = i; + + /* + * Put entry in available array (but don't update avail->idx + * until they do sync). + */ + avail = vq->avail_idx_shadow & (vq->vring.num - 1); + vq->vring.avail->ring[avail] = cpu_to_virtio16(vq->vdev, head); + + /* + * Descriptors and available array need to be set before we expose the + * new available array entries. + */ + virtio_wmb(); + vq->avail_idx_shadow++; + vq->vring.avail->idx = cpu_to_virtio16(vq->vdev, vq->avail_idx_shadow); + vq->num_added++; + + /* + * This is very unlikely, but theoretically possible. + * Kick just in case. + */ + if (unlikely(vq->num_added == (1 << 16) - 1)) + virtqueue_kick(vq); + + return 0; +} + +static bool virtqueue_kick_prepare(struct virtqueue *vq) +{ + u16 new, old; + bool needs_kick; + + /* + * We need to expose available array entries before checking + * avail event. + */ + virtio_mb(); + + old = vq->avail_idx_shadow - vq->num_added; + new = vq->avail_idx_shadow; + vq->num_added = 0; + + if (vq->event) { + needs_kick = vring_need_event(virtio16_to_cpu(vq->vdev, + vring_avail_event(&vq->vring)), new, old); + } else { + needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(vq->vdev, + VRING_USED_F_NO_NOTIFY)); + } + + return needs_kick; +} + +void virtqueue_kick(struct virtqueue *vq) +{ + if (virtqueue_kick_prepare(vq)) + virtio_notify(vq->vdev, vq); +} + +static void detach_buf(struct virtqueue *vq, unsigned int head) +{ + unsigned int i; + __virtio16 nextflag = cpu_to_virtio16(vq->vdev, VRING_DESC_F_NEXT); + + /* Put back on free list: unmap first-level descriptors and find end */ + i = head; + + while (vq->vring.desc[i].flags & nextflag) { + i = virtio16_to_cpu(vq->vdev, vq->vring.desc[i].next); + vq->num_free++; + } + + vq->vring.desc[i].next = cpu_to_virtio16(vq->vdev, vq->free_head); + vq->free_head = head; + + /* Plus final descriptor */ + vq->num_free++; +} + +static inline bool more_used(const struct virtqueue *vq) +{ + return vq->last_used_idx != virtio16_to_cpu(vq->vdev, + vq->vring.used->idx); +} + +void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len) +{ + unsigned int i; + u16 last_used; + + if (!more_used(vq)) { + debug("(%s.%d): No more buffers in queue\n", + vq->vdev->name, vq->index); + return NULL; + } + + /* Only get used array entries after they have been exposed by host */ + virtio_rmb(); + + last_used = (vq->last_used_idx & (vq->vring.num - 1)); + i = virtio32_to_cpu(vq->vdev, vq->vring.used->ring[last_used].id); + if (len) { + *len = virtio32_to_cpu(vq->vdev, + vq->vring.used->ring[last_used].len); + debug("(%s.%d): last used idx %u with len %u\n", + vq->vdev->name, vq->index, i, *len); + } + + if (unlikely(i >= vq->vring.num)) { + printf("(%s.%d): id %u out of range\n", + vq->vdev->name, vq->index, i); + return NULL; + } + + detach_buf(vq, i); + vq->last_used_idx++; + /* + * If we expect an interrupt for the next entry, tell host + * by writing event index and flush out the write before + * the read in the next get_buf call. + */ + if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) + virtio_store_mb(&vring_used_event(&vq->vring), + cpu_to_virtio16(vq->vdev, vq->last_used_idx)); + + return (void *)(uintptr_t)virtio64_to_cpu(vq->vdev, + vq->vring.desc[i].addr); +} + +static struct virtqueue *__vring_new_virtqueue(unsigned int index, + struct vring vring, + struct udevice *udev) +{ + unsigned int i; + struct virtqueue *vq; + struct virtio_dev_priv *uc_priv = dev_get_uclass_priv(udev); + struct udevice *vdev = uc_priv->vdev; + + vq = malloc(sizeof(*vq)); + if (!vq) + return NULL; + + vq->vdev = vdev; + vq->index = index; + vq->num_free = vring.num; + vq->vring = vring; + vq->last_used_idx = 0; + vq->avail_flags_shadow = 0; + vq->avail_idx_shadow = 0; + vq->num_added = 0; + list_add_tail(&vq->list, &uc_priv->vqs); + + vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); + + /* Tell other side not to bother us */ + vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; + if (!vq->event) + vq->vring.avail->flags = cpu_to_virtio16(vdev, + vq->avail_flags_shadow); + + /* Put everything in free lists */ + vq->free_head = 0; + for (i = 0; i < vring.num - 1; i++) + vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); + + return vq; +} + +struct virtqueue *vring_create_virtqueue(unsigned int index, unsigned int num, + unsigned int vring_align, + struct udevice *udev) +{ + struct virtqueue *vq; + void *queue = NULL; + struct vring vring; + + /* We assume num is a power of 2 */ + if (num & (num - 1)) { + printf("Bad virtqueue length %u\n", num); + return NULL; + } + + /* TODO: allocate each queue chunk individually */ + for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { + queue = memalign(PAGE_SIZE, vring_size(num, vring_align)); + if (queue) + break; + } + + if (!num) + return NULL; + + if (!queue) { + /* Try to get a single page. You are my only hope! */ + queue = memalign(PAGE_SIZE, vring_size(num, vring_align)); + } + if (!queue) + return NULL; + + memset(queue, 0, vring_size(num, vring_align)); + vring_init(&vring, num, queue, vring_align); + + vq = __vring_new_virtqueue(index, vring, udev); + if (!vq) { + free(queue); + return NULL; + } + debug("(%s): created vring @ %p for vq @ %p with num %u\n", udev->name, + queue, vq, num); + + return vq; +} + +void vring_del_virtqueue(struct virtqueue *vq) +{ + free(vq->vring.desc); + list_del(&vq->list); + free(vq); +} + +unsigned int virtqueue_get_vring_size(struct virtqueue *vq) +{ + return vq->vring.num; +} + +ulong virtqueue_get_desc_addr(struct virtqueue *vq) +{ + return (ulong)vq->vring.desc; +} + +ulong virtqueue_get_avail_addr(struct virtqueue *vq) +{ + return (ulong)vq->vring.desc + + ((char *)vq->vring.avail - (char *)vq->vring.desc); +} + +ulong virtqueue_get_used_addr(struct virtqueue *vq) +{ + return (ulong)vq->vring.desc + + ((char *)vq->vring.used - (char *)vq->vring.desc); +} + +bool virtqueue_poll(struct virtqueue *vq, u16 last_used_idx) +{ + virtio_mb(); + + return last_used_idx != virtio16_to_cpu(vq->vdev, vq->vring.used->idx); +} + +void virtqueue_dump(struct virtqueue *vq) +{ + unsigned int i; + + printf("virtqueue %p for dev %s:\n", vq, vq->vdev->name); + printf("\tindex %u, phys addr %p num %u\n", + vq->index, vq->vring.desc, vq->vring.num); + printf("\tfree_head %u, num_added %u, num_free %u\n", + vq->free_head, vq->num_added, vq->num_free); + printf("\tlast_used_idx %u, avail_flags_shadow %u, avail_idx_shadow %u\n", + vq->last_used_idx, vq->avail_flags_shadow, vq->avail_idx_shadow); + + printf("Descriptor dump:\n"); + for (i = 0; i < vq->vring.num; i++) { + printf("\tdesc[%u] = { 0x%llx, len %u, flags %u, next %u }\n", + i, vq->vring.desc[i].addr, vq->vring.desc[i].len, + vq->vring.desc[i].flags, vq->vring.desc[i].next); + } + + printf("Avail ring dump:\n"); + printf("\tflags %u, idx %u\n", + vq->vring.avail->flags, vq->vring.avail->idx); + for (i = 0; i < vq->vring.num; i++) { + printf("\tavail[%u] = %u\n", + i, vq->vring.avail->ring[i]); + } + + printf("Used ring dump:\n"); + printf("\tflags %u, idx %u\n", + vq->vring.used->flags, vq->vring.used->idx); + for (i = 0; i < vq->vring.num; i++) { + printf("\tused[%u] = { %u, %u }\n", i, + vq->vring.used->ring[i].id, vq->vring.used->ring[i].len); + } +} diff --git a/include/virtio_ring.h b/include/virtio_ring.h new file mode 100644 index 0000000000..6fc0593b14 --- /dev/null +++ b/include/virtio_ring.h @@ -0,0 +1,320 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (C) 2018, Tuomas Tynkkynen + * Copyright (C) 2018, Bin Meng + * + * From Linux kernel include/uapi/linux/virtio_ring.h + */ + +#ifndef _LINUX_VIRTIO_RING_H +#define _LINUX_VIRTIO_RING_H + +#include + +/* This marks a buffer as continuing via the next field */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only) */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors */ +#define VRING_DESC_F_INDIRECT 4 + +/* + * The Host uses this in used->flags to advise the Guest: don't kick me when + * you add a buffer. It's unreliable, so it's simply an optimization. Guest + * will still kick if it's out of buffers. + */ +#define VRING_USED_F_NO_NOTIFY 1 + +/* + * The Guest uses this in avail->flags to advise the Host: don't interrupt me + * when you consume a buffer. It's unreliable, so it's simply an optimization. + */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + +/* + * The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. + * + * The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. + */ +#define VIRTIO_RING_F_EVENT_IDX 29 + +/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ +struct vring_desc { + /* Address (guest-physical) */ + __virtio64 addr; + /* Length */ + __virtio32 len; + /* The flags as indicated above */ + __virtio16 flags; + /* We chain unused descriptors via this, too */ + __virtio16 next; +}; + +struct vring_avail { + __virtio16 flags; + __virtio16 idx; + __virtio16 ring[]; +}; + +struct vring_used_elem { + /* Index of start of used descriptor chain */ + __virtio32 id; + /* Total length of the descriptor chain which was used (written to) */ + __virtio32 len; +}; + +struct vring_used { + __virtio16 flags; + __virtio16 idx; + struct vring_used_elem ring[]; +}; + +struct vring { + unsigned int num; + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; +}; + +/** + * virtqueue - a queue to register buffers for sending or receiving. + * + * @list: the chain of virtqueues for this device + * @vdev: the virtio device this queue was created for + * @index: the zero-based ordinal number for this queue + * @num_free: number of elements we expect to be able to fit + * @vring: actual memory layout for this queue + * @event: host publishes avail event idx + * @free_head: head of free buffer list + * @num_added: number we've added since last sync + * @last_used_idx: last used index we've seen + * @avail_flags_shadow: last written value to avail->flags + * @avail_idx_shadow: last written value to avail->idx in guest byte order + */ +struct virtqueue { + struct list_head list; + struct udevice *vdev; + unsigned int index; + unsigned int num_free; + struct vring vring; + bool event; + unsigned int free_head; + unsigned int num_added; + u16 last_used_idx; + u16 avail_flags_shadow; + u16 avail_idx_shadow; +}; + +/* + * Alignment requirements for vring elements. + * When using pre-virtio 1.0 layout, these fall out naturally. + */ +#define VRING_AVAIL_ALIGN_SIZE 2 +#define VRING_USED_ALIGN_SIZE 4 +#define VRING_DESC_ALIGN_SIZE 16 + +/* + * We publish the used event index at the end of the available ring, + * and vice versa. They are at the end for backwards compatibility. + */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num]) + +static inline void vring_init(struct vring *vr, unsigned int num, void *p, + unsigned long align) +{ + vr->num = num; + vr->desc = p; + vr->avail = p + num * sizeof(struct vring_desc); + vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + + sizeof(__virtio16) + align - 1) & ~(align - 1)); +} + +static inline unsigned int vring_size(unsigned int num, unsigned long align) +{ + return ((sizeof(struct vring_desc) * num + + sizeof(__virtio16) * (3 + num) + align - 1) & ~(align - 1)) + + sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num; +} + +/* + * The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX. + * Assuming a given event_idx value from the other side, if we have just + * incremented index from old to new_idx, should we trigger an event? + */ +static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) +{ + /* + * Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. + */ + return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); +} + +struct virtio_sg; + +/** + * virtqueue_add - expose buffers to other end + * + * @vq: the struct virtqueue we're talking about + * @sgs: array of terminated scatterlists + * @out_sgs: the number of scatterlists readable by other side + * @in_sgs: the number of scatterlists which are writable + * (after readable ones) + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add(struct virtqueue *vq, struct virtio_sg *sgs[], + unsigned int out_sgs, unsigned int in_sgs); + +/** + * virtqueue_kick - update after add_buf + * + * @vq: the struct virtqueue + * + * After one or more virtqueue_add() calls, invoke this to kick + * the other side. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +void virtqueue_kick(struct virtqueue *vq); + +/** + * virtqueue_get_buf - get the next used buffer + * + * @vq: the struct virtqueue we're talking about + * @len: the length written into the buffer + * + * If the device wrote data into the buffer, @len will be set to the + * amount written. This means you don't need to clear the buffer + * beforehand to ensure there's no data leakage in the case of short + * writes. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns NULL if there are no used buffers, or the memory buffer + * handed to virtqueue_add_*(). + */ +void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); + +/** + * vring_create_virtqueue - create a virtqueue for a virtio device + * + * @index: the index of the queue + * @num: number of elements of the queue + * @vring_align:the alignment requirement of the descriptor ring + * @udev: the virtio transport udevice + * @return: the virtqueue pointer or NULL if failed + * + * This creates a virtqueue and allocates the descriptor ring for a virtio + * device. The caller should query virtqueue_get_ring_size() to learn the + * actual size of the ring. + * + * This API is supposed to be called by the virtio transport driver in the + * virtio find_vqs() uclass method. + */ +struct virtqueue *vring_create_virtqueue(unsigned int index, unsigned int num, + unsigned int vring_align, + struct udevice *udev); + +/** + * vring_del_virtqueue - destroy a virtqueue + * + * @vq: the struct virtqueue we're talking about + * + * This destroys a virtqueue. If created with vring_create_virtqueue(), + * this also frees the descriptor ring. + * + * This API is supposed to be called by the virtio transport driver in the + * virtio del_vqs() uclass method. + */ +void vring_del_virtqueue(struct virtqueue *vq); + +/** + * virtqueue_get_vring_size - get the size of the virtqueue's vring + * + * @vq: the struct virtqueue containing the vring of interest + * @return: the size of the vring in a virtqueue. + */ +unsigned int virtqueue_get_vring_size(struct virtqueue *vq); + +/** + * virtqueue_get_desc_addr - get the vring descriptor table address + * + * @vq: the struct virtqueue containing the vring of interest + * @return: the descriptor table address of the vring in a virtqueue. + */ +ulong virtqueue_get_desc_addr(struct virtqueue *vq); + +/** + * virtqueue_get_avail_addr - get the vring available ring address + * + * @vq: the struct virtqueue containing the vring of interest + * @return: the available ring address of the vring in a virtqueue. + */ +ulong virtqueue_get_avail_addr(struct virtqueue *vq); + +/** + * virtqueue_get_used_addr - get the vring used ring address + * + * @vq: the struct virtqueue containing the vring of interest + * @return: the used ring address of the vring in a virtqueue. + */ +ulong virtqueue_get_used_addr(struct virtqueue *vq); + +/** + * virtqueue_poll - query pending used buffers + * + * @vq: the struct virtqueue we're talking about + * @last_used_idx: virtqueue last used index + * + * Returns "true" if there are pending used buffers in the queue. + */ +bool virtqueue_poll(struct virtqueue *vq, u16 last_used_idx); + +/** + * virtqueue_dump - dump the virtqueue for debugging + * + * @vq: the struct virtqueue we're talking about + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + */ +void virtqueue_dump(struct virtqueue *vq); + +/* + * Barriers in virtio are tricky. Since we are not in a hyperviosr/guest + * scenario, having these as nops is enough to work as expected. + */ + +static inline void virtio_mb(void) +{ +} + +static inline void virtio_rmb(void) +{ +} + +static inline void virtio_wmb(void) +{ +} + +static inline void virtio_store_mb(__virtio16 *p, __virtio16 v) +{ + WRITE_ONCE(*p, v); +} + +#endif /* _LINUX_VIRTIO_RING_H */ -- 2.39.5