mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 04:18:39 +08:00
virtio: fixes, vdpa
Some bug fixes. The new vdpa subsystem with two first drivers. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAl6MS7wPHG1zdEByZWRo YXQuY29tAAoJECgfDbjSjVRpGp8H/2H49Gya1cfVbGU13qgmBSQqQXC8hS3iNLuG ltRgU+jafJT//kvkdm3/DUzfK3eRUWUfqZLKEbAQDtMY0OGHi/KGEBYVLDde7Zxt Lg4VnwBhkYDR/f01ZZDbHxzj9JAr83i28nILjLIqf3a1BX4zf203+ZE0/JM8a7wL dOPoH7NAfyz5ul2F67bR1IOF8vC6TidpavzR2+HC/MocHYXb6Bgfvt+i4EcrfuMf 9lnBfajgklKr9sNJniwvvR1pWVg+YyG3VeC6T8tIC/xzbCmIoNT+5b3q2XPSIHq1 EuQTeXH9CBFXS0qcFlq2ktR1xd1Lx95hKwZpqLwLFDmfgjhV2QU= =/84P -----END PGP SIGNATURE----- Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost Pull virtio updates from Michael Tsirkin: - Some bug fixes - The new vdpa subsystem with two first drivers * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: virtio-balloon: Revert "virtio-balloon: Switch back to OOM handler for VIRTIO_BALLOON_F_DEFLATE_ON_OOM" vdpa: move to drivers/vdpa virtio: Intel IFC VF driver for VDPA vdpasim: vDPA device simulator vhost: introduce vDPA-based backend virtio: introduce a vDPA based transport vDPA: introduce vDPA bus vringh: IOTLB support vhost: factor out IOTLB vhost: allow per device message handler vhost: refine vhost and vringh kconfig virtio-balloon: Switch back to OOM handler for VIRTIO_BALLOON_F_DEFLATE_ON_OOM virtio-net: Introduce hash report feature virtio-net: Introduce RSS receive steering feature virtio-net: Introduce extended RSC feature tools/virtio: option to build an out of tree module
This commit is contained in:
commit
9bb715260e
@ -17870,10 +17870,12 @@ L: virtualization@lists.linux-foundation.org
|
||||
S: Maintained
|
||||
F: Documentation/devicetree/bindings/virtio/
|
||||
F: drivers/virtio/
|
||||
F: drivers/vdpa/
|
||||
F: tools/virtio/
|
||||
F: drivers/net/virtio_net.c
|
||||
F: drivers/block/virtio_blk.c
|
||||
F: include/linux/virtio*.h
|
||||
F: include/linux/vdpa.h
|
||||
F: include/uapi/linux/virtio_*.h
|
||||
F: drivers/crypto/virtio/
|
||||
F: mm/balloon_compaction.c
|
||||
@ -17941,6 +17943,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git
|
||||
S: Maintained
|
||||
F: drivers/vhost/
|
||||
F: include/uapi/linux/vhost.h
|
||||
F: include/linux/vhost_iotlb.h
|
||||
|
||||
VIRTIO INPUT DRIVER
|
||||
M: Gerd Hoffmann <kraxel@redhat.com>
|
||||
|
@ -64,6 +64,4 @@ config KVM_ARM_PMU
|
||||
config KVM_INDIRECT_VECTORS
|
||||
def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS)
|
||||
|
||||
source "drivers/vhost/Kconfig"
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
@ -72,6 +72,4 @@ config KVM_MIPS_DEBUG_COP0_COUNTERS
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
source "drivers/vhost/Kconfig"
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
@ -204,6 +204,4 @@ config KVM_XIVE
|
||||
default y
|
||||
depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE
|
||||
|
||||
source "drivers/vhost/Kconfig"
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
@ -55,8 +55,4 @@ config KVM_S390_UCONTROL
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
|
||||
# the virtualization menu.
|
||||
source "drivers/vhost/Kconfig"
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
@ -107,8 +107,4 @@ config KVM_MMU_AUDIT
|
||||
This option adds a R/W kVM module parameter 'mmu_audit', which allows
|
||||
auditing of KVM MMU events at runtime.
|
||||
|
||||
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
|
||||
# the virtualization menu.
|
||||
source "drivers/vhost/Kconfig"
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
@ -138,6 +138,10 @@ source "drivers/virt/Kconfig"
|
||||
|
||||
source "drivers/virtio/Kconfig"
|
||||
|
||||
source "drivers/vdpa/Kconfig"
|
||||
|
||||
source "drivers/vhost/Kconfig"
|
||||
|
||||
source "drivers/hv/Kconfig"
|
||||
|
||||
source "drivers/xen/Kconfig"
|
||||
|
@ -42,6 +42,7 @@ obj-$(CONFIG_DMADEVICES) += dma/
|
||||
obj-y += soc/
|
||||
|
||||
obj-$(CONFIG_VIRTIO) += virtio/
|
||||
obj-$(CONFIG_VDPA) += vdpa/
|
||||
obj-$(CONFIG_XEN) += xen/
|
||||
|
||||
# regulators early, since some subsystems rely on them to initialize
|
||||
|
@ -133,8 +133,4 @@ config VOP
|
||||
OS and tools for MIC to use with this driver are available from
|
||||
<http://software.intel.com/en-us/mic-developer>.
|
||||
|
||||
if VOP
|
||||
source "drivers/vhost/Kconfig.vringh"
|
||||
endif
|
||||
|
||||
endmenu
|
||||
|
@ -58,8 +58,4 @@ config CAIF_VIRTIO
|
||||
---help---
|
||||
The CAIF driver for CAIF over Virtio.
|
||||
|
||||
if CAIF_VIRTIO
|
||||
source "drivers/vhost/Kconfig.vringh"
|
||||
endif
|
||||
|
||||
endif # CAIF_DRIVERS
|
||||
|
37
drivers/vdpa/Kconfig
Normal file
37
drivers/vdpa/Kconfig
Normal file
@ -0,0 +1,37 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config VDPA
|
||||
tristate
|
||||
help
|
||||
Enable this module to support vDPA device that uses a
|
||||
datapath which complies with virtio specifications with
|
||||
vendor specific control path.
|
||||
|
||||
menuconfig VDPA_MENU
|
||||
bool "VDPA drivers"
|
||||
default n
|
||||
|
||||
if VDPA_MENU
|
||||
|
||||
config VDPA_SIM
|
||||
tristate "vDPA device simulator"
|
||||
depends on RUNTIME_TESTING_MENU
|
||||
select VDPA
|
||||
select VHOST_RING
|
||||
default n
|
||||
help
|
||||
vDPA networking device simulator which loop TX traffic back
|
||||
to RX. This device is used for testing, prototyping and
|
||||
development of vDPA.
|
||||
|
||||
config IFCVF
|
||||
tristate "Intel IFC VF VDPA driver"
|
||||
depends on PCI_MSI
|
||||
select VDPA
|
||||
default n
|
||||
help
|
||||
This kernel module can drive Intel IFC VF NIC to offload
|
||||
virtio dataplane traffic to hardware.
|
||||
To compile this driver as a module, choose M here: the module will
|
||||
be called ifcvf.
|
||||
|
||||
endif # VDPA_MENU
|
4
drivers/vdpa/Makefile
Normal file
4
drivers/vdpa/Makefile
Normal file
@ -0,0 +1,4 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
obj-$(CONFIG_VDPA) += vdpa.o
|
||||
obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
|
||||
obj-$(CONFIG_IFCVF) += ifcvf/
|
3
drivers/vdpa/ifcvf/Makefile
Normal file
3
drivers/vdpa/ifcvf/Makefile
Normal file
@ -0,0 +1,3 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
obj-$(CONFIG_IFCVF) += ifcvf.o
|
||||
ifcvf-$(CONFIG_IFCVF) += ifcvf_main.o ifcvf_base.o
|
389
drivers/vdpa/ifcvf/ifcvf_base.c
Normal file
389
drivers/vdpa/ifcvf/ifcvf_base.c
Normal file
@ -0,0 +1,389 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Intel IFC VF NIC driver for virtio dataplane offloading
|
||||
*
|
||||
* Copyright (C) 2020 Intel Corporation.
|
||||
*
|
||||
* Author: Zhu Lingshan <lingshan.zhu@intel.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include "ifcvf_base.h"
|
||||
|
||||
static inline u8 ifc_ioread8(u8 __iomem *addr)
|
||||
{
|
||||
return ioread8(addr);
|
||||
}
|
||||
static inline u16 ifc_ioread16 (__le16 __iomem *addr)
|
||||
{
|
||||
return ioread16(addr);
|
||||
}
|
||||
|
||||
static inline u32 ifc_ioread32(__le32 __iomem *addr)
|
||||
{
|
||||
return ioread32(addr);
|
||||
}
|
||||
|
||||
static inline void ifc_iowrite8(u8 value, u8 __iomem *addr)
|
||||
{
|
||||
iowrite8(value, addr);
|
||||
}
|
||||
|
||||
static inline void ifc_iowrite16(u16 value, __le16 __iomem *addr)
|
||||
{
|
||||
iowrite16(value, addr);
|
||||
}
|
||||
|
||||
static inline void ifc_iowrite32(u32 value, __le32 __iomem *addr)
|
||||
{
|
||||
iowrite32(value, addr);
|
||||
}
|
||||
|
||||
static void ifc_iowrite64_twopart(u64 val,
|
||||
__le32 __iomem *lo, __le32 __iomem *hi)
|
||||
{
|
||||
ifc_iowrite32((u32)val, lo);
|
||||
ifc_iowrite32(val >> 32, hi);
|
||||
}
|
||||
|
||||
struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw)
|
||||
{
|
||||
return container_of(hw, struct ifcvf_adapter, vf);
|
||||
}
|
||||
|
||||
static void __iomem *get_cap_addr(struct ifcvf_hw *hw,
|
||||
struct virtio_pci_cap *cap)
|
||||
{
|
||||
struct ifcvf_adapter *ifcvf;
|
||||
struct pci_dev *pdev;
|
||||
u32 length, offset;
|
||||
u8 bar;
|
||||
|
||||
length = le32_to_cpu(cap->length);
|
||||
offset = le32_to_cpu(cap->offset);
|
||||
bar = cap->bar;
|
||||
|
||||
ifcvf= vf_to_adapter(hw);
|
||||
pdev = ifcvf->pdev;
|
||||
|
||||
if (bar >= IFCVF_PCI_MAX_RESOURCE) {
|
||||
IFCVF_DBG(pdev,
|
||||
"Invalid bar number %u to get capabilities\n", bar);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (offset + length > pci_resource_len(pdev, bar)) {
|
||||
IFCVF_DBG(pdev,
|
||||
"offset(%u) + len(%u) overflows bar%u's capability\n",
|
||||
offset, length, bar);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return hw->base[bar] + offset;
|
||||
}
|
||||
|
||||
static int ifcvf_read_config_range(struct pci_dev *dev,
|
||||
uint32_t *val, int size, int where)
|
||||
{
|
||||
int ret, i;
|
||||
|
||||
for (i = 0; i < size; i += 4) {
|
||||
ret = pci_read_config_dword(dev, where + i, val + i / 4);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
|
||||
{
|
||||
struct virtio_pci_cap cap;
|
||||
u16 notify_off;
|
||||
int ret;
|
||||
u8 pos;
|
||||
u32 i;
|
||||
|
||||
ret = pci_read_config_byte(pdev, PCI_CAPABILITY_LIST, &pos);
|
||||
if (ret < 0) {
|
||||
IFCVF_ERR(pdev, "Failed to read PCI capability list\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
while (pos) {
|
||||
ret = ifcvf_read_config_range(pdev, (u32 *)&cap,
|
||||
sizeof(cap), pos);
|
||||
if (ret < 0) {
|
||||
IFCVF_ERR(pdev,
|
||||
"Failed to get PCI capability at %x\n", pos);
|
||||
break;
|
||||
}
|
||||
|
||||
if (cap.cap_vndr != PCI_CAP_ID_VNDR)
|
||||
goto next;
|
||||
|
||||
switch (cap.cfg_type) {
|
||||
case VIRTIO_PCI_CAP_COMMON_CFG:
|
||||
hw->common_cfg = get_cap_addr(hw, &cap);
|
||||
IFCVF_DBG(pdev, "hw->common_cfg = %p\n",
|
||||
hw->common_cfg);
|
||||
break;
|
||||
case VIRTIO_PCI_CAP_NOTIFY_CFG:
|
||||
pci_read_config_dword(pdev, pos + sizeof(cap),
|
||||
&hw->notify_off_multiplier);
|
||||
hw->notify_bar = cap.bar;
|
||||
hw->notify_base = get_cap_addr(hw, &cap);
|
||||
IFCVF_DBG(pdev, "hw->notify_base = %p\n",
|
||||
hw->notify_base);
|
||||
break;
|
||||
case VIRTIO_PCI_CAP_ISR_CFG:
|
||||
hw->isr = get_cap_addr(hw, &cap);
|
||||
IFCVF_DBG(pdev, "hw->isr = %p\n", hw->isr);
|
||||
break;
|
||||
case VIRTIO_PCI_CAP_DEVICE_CFG:
|
||||
hw->net_cfg = get_cap_addr(hw, &cap);
|
||||
IFCVF_DBG(pdev, "hw->net_cfg = %p\n", hw->net_cfg);
|
||||
break;
|
||||
}
|
||||
|
||||
next:
|
||||
pos = cap.cap_next;
|
||||
}
|
||||
|
||||
if (hw->common_cfg == NULL || hw->notify_base == NULL ||
|
||||
hw->isr == NULL || hw->net_cfg == NULL) {
|
||||
IFCVF_ERR(pdev, "Incomplete PCI capabilities\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
|
||||
ifc_iowrite16(i, &hw->common_cfg->queue_select);
|
||||
notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off);
|
||||
hw->vring[i].notify_addr = hw->notify_base +
|
||||
notify_off * hw->notify_off_multiplier;
|
||||
}
|
||||
|
||||
hw->lm_cfg = hw->base[IFCVF_LM_BAR];
|
||||
|
||||
IFCVF_DBG(pdev,
|
||||
"PCI capability mapping: common cfg: %p, notify base: %p\n, isr cfg: %p, device cfg: %p, multiplier: %u\n",
|
||||
hw->common_cfg, hw->notify_base, hw->isr,
|
||||
hw->net_cfg, hw->notify_off_multiplier);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
u8 ifcvf_get_status(struct ifcvf_hw *hw)
|
||||
{
|
||||
return ifc_ioread8(&hw->common_cfg->device_status);
|
||||
}
|
||||
|
||||
void ifcvf_set_status(struct ifcvf_hw *hw, u8 status)
|
||||
{
|
||||
ifc_iowrite8(status, &hw->common_cfg->device_status);
|
||||
}
|
||||
|
||||
void ifcvf_reset(struct ifcvf_hw *hw)
|
||||
{
|
||||
ifcvf_set_status(hw, 0);
|
||||
/* flush set_status, make sure VF is stopped, reset */
|
||||
ifcvf_get_status(hw);
|
||||
}
|
||||
|
||||
static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status)
|
||||
{
|
||||
if (status != 0)
|
||||
status |= ifcvf_get_status(hw);
|
||||
|
||||
ifcvf_set_status(hw, status);
|
||||
ifcvf_get_status(hw);
|
||||
}
|
||||
|
||||
u64 ifcvf_get_features(struct ifcvf_hw *hw)
|
||||
{
|
||||
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
|
||||
u32 features_lo, features_hi;
|
||||
|
||||
ifc_iowrite32(0, &cfg->device_feature_select);
|
||||
features_lo = ifc_ioread32(&cfg->device_feature);
|
||||
|
||||
ifc_iowrite32(1, &cfg->device_feature_select);
|
||||
features_hi = ifc_ioread32(&cfg->device_feature);
|
||||
|
||||
return ((u64)features_hi << 32) | features_lo;
|
||||
}
|
||||
|
||||
void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
|
||||
void *dst, int length)
|
||||
{
|
||||
u8 old_gen, new_gen, *p;
|
||||
int i;
|
||||
|
||||
WARN_ON(offset + length > sizeof(struct virtio_net_config));
|
||||
do {
|
||||
old_gen = ifc_ioread8(&hw->common_cfg->config_generation);
|
||||
p = dst;
|
||||
for (i = 0; i < length; i++)
|
||||
*p++ = ifc_ioread8(hw->net_cfg + offset + i);
|
||||
|
||||
new_gen = ifc_ioread8(&hw->common_cfg->config_generation);
|
||||
} while (old_gen != new_gen);
|
||||
}
|
||||
|
||||
void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset,
|
||||
const void *src, int length)
|
||||
{
|
||||
const u8 *p;
|
||||
int i;
|
||||
|
||||
p = src;
|
||||
WARN_ON(offset + length > sizeof(struct virtio_net_config));
|
||||
for (i = 0; i < length; i++)
|
||||
ifc_iowrite8(*p++, hw->net_cfg + offset + i);
|
||||
}
|
||||
|
||||
static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features)
|
||||
{
|
||||
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
|
||||
|
||||
ifc_iowrite32(0, &cfg->guest_feature_select);
|
||||
ifc_iowrite32((u32)features, &cfg->guest_feature);
|
||||
|
||||
ifc_iowrite32(1, &cfg->guest_feature_select);
|
||||
ifc_iowrite32(features >> 32, &cfg->guest_feature);
|
||||
}
|
||||
|
||||
static int ifcvf_config_features(struct ifcvf_hw *hw)
|
||||
{
|
||||
struct ifcvf_adapter *ifcvf;
|
||||
|
||||
ifcvf = vf_to_adapter(hw);
|
||||
ifcvf_set_features(hw, hw->req_features);
|
||||
ifcvf_add_status(hw, VIRTIO_CONFIG_S_FEATURES_OK);
|
||||
|
||||
if (!(ifcvf_get_status(hw) & VIRTIO_CONFIG_S_FEATURES_OK)) {
|
||||
IFCVF_ERR(ifcvf->pdev, "Failed to set FEATURES_OK status\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid)
|
||||
{
|
||||
struct ifcvf_lm_cfg __iomem *ifcvf_lm;
|
||||
void __iomem *avail_idx_addr;
|
||||
u16 last_avail_idx;
|
||||
u32 q_pair_id;
|
||||
|
||||
ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
|
||||
q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
|
||||
avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
|
||||
last_avail_idx = ifc_ioread16(avail_idx_addr);
|
||||
|
||||
return last_avail_idx;
|
||||
}
|
||||
|
||||
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num)
|
||||
{
|
||||
struct ifcvf_lm_cfg __iomem *ifcvf_lm;
|
||||
void __iomem *avail_idx_addr;
|
||||
u32 q_pair_id;
|
||||
|
||||
ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
|
||||
q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
|
||||
avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
|
||||
hw->vring[qid].last_avail_idx = num;
|
||||
ifc_iowrite16(num, avail_idx_addr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ifcvf_hw_enable(struct ifcvf_hw *hw)
|
||||
{
|
||||
struct ifcvf_lm_cfg __iomem *ifcvf_lm;
|
||||
struct virtio_pci_common_cfg __iomem *cfg;
|
||||
struct ifcvf_adapter *ifcvf;
|
||||
u32 i;
|
||||
|
||||
ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
|
||||
ifcvf = vf_to_adapter(hw);
|
||||
cfg = hw->common_cfg;
|
||||
ifc_iowrite16(IFCVF_MSI_CONFIG_OFF, &cfg->msix_config);
|
||||
|
||||
if (ifc_ioread16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) {
|
||||
IFCVF_ERR(ifcvf->pdev, "No msix vector for device config\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 0; i < hw->nr_vring; i++) {
|
||||
if (!hw->vring[i].ready)
|
||||
break;
|
||||
|
||||
ifc_iowrite16(i, &cfg->queue_select);
|
||||
ifc_iowrite64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
|
||||
&cfg->queue_desc_hi);
|
||||
ifc_iowrite64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo,
|
||||
&cfg->queue_avail_hi);
|
||||
ifc_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo,
|
||||
&cfg->queue_used_hi);
|
||||
ifc_iowrite16(hw->vring[i].size, &cfg->queue_size);
|
||||
ifc_iowrite16(i + IFCVF_MSI_QUEUE_OFF, &cfg->queue_msix_vector);
|
||||
|
||||
if (ifc_ioread16(&cfg->queue_msix_vector) ==
|
||||
VIRTIO_MSI_NO_VECTOR) {
|
||||
IFCVF_ERR(ifcvf->pdev,
|
||||
"No msix vector for queue %u\n", i);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx);
|
||||
ifc_iowrite16(1, &cfg->queue_enable);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ifcvf_hw_disable(struct ifcvf_hw *hw)
|
||||
{
|
||||
struct virtio_pci_common_cfg __iomem *cfg;
|
||||
u32 i;
|
||||
|
||||
cfg = hw->common_cfg;
|
||||
ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config);
|
||||
|
||||
for (i = 0; i < hw->nr_vring; i++) {
|
||||
ifc_iowrite16(i, &cfg->queue_select);
|
||||
ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector);
|
||||
}
|
||||
|
||||
ifc_ioread16(&cfg->queue_msix_vector);
|
||||
}
|
||||
|
||||
int ifcvf_start_hw(struct ifcvf_hw *hw)
|
||||
{
|
||||
ifcvf_reset(hw);
|
||||
ifcvf_add_status(hw, VIRTIO_CONFIG_S_ACKNOWLEDGE);
|
||||
ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER);
|
||||
|
||||
if (ifcvf_config_features(hw) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (ifcvf_hw_enable(hw) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER_OK);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ifcvf_stop_hw(struct ifcvf_hw *hw)
|
||||
{
|
||||
ifcvf_hw_disable(hw);
|
||||
ifcvf_reset(hw);
|
||||
}
|
||||
|
||||
void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
|
||||
{
|
||||
ifc_iowrite16(qid, hw->vring[qid].notify_addr);
|
||||
}
|
118
drivers/vdpa/ifcvf/ifcvf_base.h
Normal file
118
drivers/vdpa/ifcvf/ifcvf_base.h
Normal file
@ -0,0 +1,118 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Intel IFC VF NIC driver for virtio dataplane offloading
|
||||
*
|
||||
* Copyright (C) 2020 Intel Corporation.
|
||||
*
|
||||
* Author: Zhu Lingshan <lingshan.zhu@intel.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _IFCVF_H_
|
||||
#define _IFCVF_H_
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/pci_regs.h>
|
||||
#include <linux/vdpa.h>
|
||||
#include <uapi/linux/virtio_net.h>
|
||||
#include <uapi/linux/virtio_config.h>
|
||||
#include <uapi/linux/virtio_pci.h>
|
||||
|
||||
#define IFCVF_VENDOR_ID 0x1AF4
|
||||
#define IFCVF_DEVICE_ID 0x1041
|
||||
#define IFCVF_SUBSYS_VENDOR_ID 0x8086
|
||||
#define IFCVF_SUBSYS_DEVICE_ID 0x001A
|
||||
|
||||
#define IFCVF_SUPPORTED_FEATURES \
|
||||
((1ULL << VIRTIO_NET_F_MAC) | \
|
||||
(1ULL << VIRTIO_F_ANY_LAYOUT) | \
|
||||
(1ULL << VIRTIO_F_VERSION_1) | \
|
||||
(1ULL << VIRTIO_F_ORDER_PLATFORM) | \
|
||||
(1ULL << VIRTIO_F_IOMMU_PLATFORM) | \
|
||||
(1ULL << VIRTIO_NET_F_MRG_RXBUF))
|
||||
|
||||
/* Only one queue pair for now. */
|
||||
#define IFCVF_MAX_QUEUE_PAIRS 1
|
||||
|
||||
#define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE
|
||||
#define IFCVF_QUEUE_MAX 32768
|
||||
#define IFCVF_MSI_CONFIG_OFF 0
|
||||
#define IFCVF_MSI_QUEUE_OFF 1
|
||||
#define IFCVF_PCI_MAX_RESOURCE 6
|
||||
|
||||
#define IFCVF_LM_CFG_SIZE 0x40
|
||||
#define IFCVF_LM_RING_STATE_OFFSET 0x20
|
||||
#define IFCVF_LM_BAR 4
|
||||
|
||||
#define IFCVF_ERR(pdev, fmt, ...) dev_err(&pdev->dev, fmt, ##__VA_ARGS__)
|
||||
#define IFCVF_DBG(pdev, fmt, ...) dev_dbg(&pdev->dev, fmt, ##__VA_ARGS__)
|
||||
#define IFCVF_INFO(pdev, fmt, ...) dev_info(&pdev->dev, fmt, ##__VA_ARGS__)
|
||||
|
||||
#define ifcvf_private_to_vf(adapter) \
|
||||
(&((struct ifcvf_adapter *)adapter)->vf)
|
||||
|
||||
#define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1)
|
||||
|
||||
struct vring_info {
|
||||
u64 desc;
|
||||
u64 avail;
|
||||
u64 used;
|
||||
u16 size;
|
||||
u16 last_avail_idx;
|
||||
bool ready;
|
||||
void __iomem *notify_addr;
|
||||
u32 irq;
|
||||
struct vdpa_callback cb;
|
||||
char msix_name[256];
|
||||
};
|
||||
|
||||
struct ifcvf_hw {
|
||||
u8 __iomem *isr;
|
||||
/* Live migration */
|
||||
u8 __iomem *lm_cfg;
|
||||
u16 nr_vring;
|
||||
/* Notification bar number */
|
||||
u8 notify_bar;
|
||||
/* Notificaiton bar address */
|
||||
void __iomem *notify_base;
|
||||
u32 notify_off_multiplier;
|
||||
u64 req_features;
|
||||
struct virtio_pci_common_cfg __iomem *common_cfg;
|
||||
void __iomem *net_cfg;
|
||||
struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
|
||||
void __iomem * const *base;
|
||||
};
|
||||
|
||||
struct ifcvf_adapter {
|
||||
struct vdpa_device vdpa;
|
||||
struct pci_dev *pdev;
|
||||
struct ifcvf_hw vf;
|
||||
};
|
||||
|
||||
struct ifcvf_vring_lm_cfg {
|
||||
u32 idx_addr[2];
|
||||
u8 reserved[IFCVF_LM_CFG_SIZE - 8];
|
||||
};
|
||||
|
||||
struct ifcvf_lm_cfg {
|
||||
u8 reserved[IFCVF_LM_RING_STATE_OFFSET];
|
||||
struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUE_PAIRS];
|
||||
};
|
||||
|
||||
int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev);
|
||||
int ifcvf_start_hw(struct ifcvf_hw *hw);
|
||||
void ifcvf_stop_hw(struct ifcvf_hw *hw);
|
||||
void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid);
|
||||
void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
|
||||
void *dst, int length);
|
||||
void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset,
|
||||
const void *src, int length);
|
||||
u8 ifcvf_get_status(struct ifcvf_hw *hw);
|
||||
void ifcvf_set_status(struct ifcvf_hw *hw, u8 status);
|
||||
void io_write64_twopart(u64 val, u32 *lo, u32 *hi);
|
||||
void ifcvf_reset(struct ifcvf_hw *hw);
|
||||
u64 ifcvf_get_features(struct ifcvf_hw *hw);
|
||||
u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
|
||||
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num);
|
||||
struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw);
|
||||
#endif /* _IFCVF_H_ */
|
435
drivers/vdpa/ifcvf/ifcvf_main.c
Normal file
435
drivers/vdpa/ifcvf/ifcvf_main.c
Normal file
@ -0,0 +1,435 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Intel IFC VF NIC driver for virtio dataplane offloading
|
||||
*
|
||||
* Copyright (C) 2020 Intel Corporation.
|
||||
*
|
||||
* Author: Zhu Lingshan <lingshan.zhu@intel.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include "ifcvf_base.h"
|
||||
|
||||
#define VERSION_STRING "0.1"
|
||||
#define DRIVER_AUTHOR "Intel Corporation"
|
||||
#define IFCVF_DRIVER_NAME "ifcvf"
|
||||
|
||||
static irqreturn_t ifcvf_intr_handler(int irq, void *arg)
|
||||
{
|
||||
struct vring_info *vring = arg;
|
||||
|
||||
if (vring->cb.callback)
|
||||
return vring->cb.callback(vring->cb.private);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static int ifcvf_start_datapath(void *private)
|
||||
{
|
||||
struct ifcvf_hw *vf = ifcvf_private_to_vf(private);
|
||||
struct ifcvf_adapter *ifcvf;
|
||||
u8 status;
|
||||
int ret;
|
||||
|
||||
ifcvf = vf_to_adapter(vf);
|
||||
vf->nr_vring = IFCVF_MAX_QUEUE_PAIRS * 2;
|
||||
ret = ifcvf_start_hw(vf);
|
||||
if (ret < 0) {
|
||||
status = ifcvf_get_status(vf);
|
||||
status |= VIRTIO_CONFIG_S_FAILED;
|
||||
ifcvf_set_status(vf, status);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ifcvf_stop_datapath(void *private)
|
||||
{
|
||||
struct ifcvf_hw *vf = ifcvf_private_to_vf(private);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
|
||||
vf->vring[i].cb.callback = NULL;
|
||||
|
||||
ifcvf_stop_hw(vf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
|
||||
{
|
||||
struct ifcvf_hw *vf = ifcvf_private_to_vf(adapter);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
|
||||
vf->vring[i].last_avail_idx = 0;
|
||||
vf->vring[i].desc = 0;
|
||||
vf->vring[i].avail = 0;
|
||||
vf->vring[i].used = 0;
|
||||
vf->vring[i].ready = 0;
|
||||
vf->vring[i].cb.callback = NULL;
|
||||
vf->vring[i].cb.private = NULL;
|
||||
}
|
||||
|
||||
ifcvf_reset(vf);
|
||||
}
|
||||
|
||||
static struct ifcvf_adapter *vdpa_to_adapter(struct vdpa_device *vdpa_dev)
|
||||
{
|
||||
return container_of(vdpa_dev, struct ifcvf_adapter, vdpa);
|
||||
}
|
||||
|
||||
static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev)
|
||||
{
|
||||
struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
|
||||
|
||||
return &adapter->vf;
|
||||
}
|
||||
|
||||
static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
u64 features;
|
||||
|
||||
features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES;
|
||||
|
||||
return features;
|
||||
}
|
||||
|
||||
static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
vf->req_features = features;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
return ifcvf_get_status(vf);
|
||||
}
|
||||
|
||||
static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
|
||||
{
|
||||
struct ifcvf_adapter *adapter;
|
||||
struct ifcvf_hw *vf;
|
||||
|
||||
vf = vdpa_to_vf(vdpa_dev);
|
||||
adapter = dev_get_drvdata(vdpa_dev->dev.parent);
|
||||
|
||||
if (status == 0) {
|
||||
ifcvf_stop_datapath(adapter);
|
||||
ifcvf_reset_vring(adapter);
|
||||
return;
|
||||
}
|
||||
|
||||
if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
|
||||
if (ifcvf_start_datapath(adapter) < 0)
|
||||
IFCVF_ERR(adapter->pdev,
|
||||
"Failed to set ifcvf vdpa status %u\n",
|
||||
status);
|
||||
}
|
||||
|
||||
ifcvf_set_status(vf, status);
|
||||
}
|
||||
|
||||
static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev)
|
||||
{
|
||||
return IFCVF_QUEUE_MAX;
|
||||
}
|
||||
|
||||
static u64 ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
return ifcvf_get_vq_state(vf, qid);
|
||||
}
|
||||
|
||||
static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
|
||||
u64 num)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
return ifcvf_set_vq_state(vf, qid, num);
|
||||
}
|
||||
|
||||
static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid,
|
||||
struct vdpa_callback *cb)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
vf->vring[qid].cb = *cb;
|
||||
}
|
||||
|
||||
static void ifcvf_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev,
|
||||
u16 qid, bool ready)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
vf->vring[qid].ready = ready;
|
||||
}
|
||||
|
||||
static bool ifcvf_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
return vf->vring[qid].ready;
|
||||
}
|
||||
|
||||
static void ifcvf_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid,
|
||||
u32 num)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
vf->vring[qid].size = num;
|
||||
}
|
||||
|
||||
static int ifcvf_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid,
|
||||
u64 desc_area, u64 driver_area,
|
||||
u64 device_area)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
vf->vring[qid].desc = desc_area;
|
||||
vf->vring[qid].avail = driver_area;
|
||||
vf->vring[qid].used = device_area;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ifcvf_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
ifcvf_notify_queue(vf, qid);
|
||||
}
|
||||
|
||||
static u32 ifcvf_vdpa_get_generation(struct vdpa_device *vdpa_dev)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
return ioread8(&vf->common_cfg->config_generation);
|
||||
}
|
||||
|
||||
static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev)
|
||||
{
|
||||
return VIRTIO_ID_NET;
|
||||
}
|
||||
|
||||
static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev)
|
||||
{
|
||||
return IFCVF_SUBSYS_VENDOR_ID;
|
||||
}
|
||||
|
||||
static u16 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
|
||||
{
|
||||
return IFCVF_QUEUE_ALIGNMENT;
|
||||
}
|
||||
|
||||
static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
|
||||
unsigned int offset,
|
||||
void *buf, unsigned int len)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
WARN_ON(offset + len > sizeof(struct virtio_net_config));
|
||||
ifcvf_read_net_config(vf, offset, buf, len);
|
||||
}
|
||||
|
||||
static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev,
|
||||
unsigned int offset, const void *buf,
|
||||
unsigned int len)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
WARN_ON(offset + len > sizeof(struct virtio_net_config));
|
||||
ifcvf_write_net_config(vf, offset, buf, len);
|
||||
}
|
||||
|
||||
static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev,
|
||||
struct vdpa_callback *cb)
|
||||
{
|
||||
/* We don't support config interrupt */
|
||||
}
|
||||
|
||||
/*
|
||||
* IFCVF currently does't have on-chip IOMMU, so not
|
||||
* implemented set_map()/dma_map()/dma_unmap()
|
||||
*/
|
||||
static const struct vdpa_config_ops ifc_vdpa_ops = {
|
||||
.get_features = ifcvf_vdpa_get_features,
|
||||
.set_features = ifcvf_vdpa_set_features,
|
||||
.get_status = ifcvf_vdpa_get_status,
|
||||
.set_status = ifcvf_vdpa_set_status,
|
||||
.get_vq_num_max = ifcvf_vdpa_get_vq_num_max,
|
||||
.get_vq_state = ifcvf_vdpa_get_vq_state,
|
||||
.set_vq_state = ifcvf_vdpa_set_vq_state,
|
||||
.set_vq_cb = ifcvf_vdpa_set_vq_cb,
|
||||
.set_vq_ready = ifcvf_vdpa_set_vq_ready,
|
||||
.get_vq_ready = ifcvf_vdpa_get_vq_ready,
|
||||
.set_vq_num = ifcvf_vdpa_set_vq_num,
|
||||
.set_vq_address = ifcvf_vdpa_set_vq_address,
|
||||
.kick_vq = ifcvf_vdpa_kick_vq,
|
||||
.get_generation = ifcvf_vdpa_get_generation,
|
||||
.get_device_id = ifcvf_vdpa_get_device_id,
|
||||
.get_vendor_id = ifcvf_vdpa_get_vendor_id,
|
||||
.get_vq_align = ifcvf_vdpa_get_vq_align,
|
||||
.get_config = ifcvf_vdpa_get_config,
|
||||
.set_config = ifcvf_vdpa_set_config,
|
||||
.set_config_cb = ifcvf_vdpa_set_config_cb,
|
||||
};
|
||||
|
||||
static int ifcvf_request_irq(struct ifcvf_adapter *adapter)
|
||||
{
|
||||
struct pci_dev *pdev = adapter->pdev;
|
||||
struct ifcvf_hw *vf = &adapter->vf;
|
||||
int vector, i, ret, irq;
|
||||
|
||||
|
||||
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
|
||||
snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n",
|
||||
pci_name(pdev), i);
|
||||
vector = i + IFCVF_MSI_QUEUE_OFF;
|
||||
irq = pci_irq_vector(pdev, vector);
|
||||
ret = devm_request_irq(&pdev->dev, irq,
|
||||
ifcvf_intr_handler, 0,
|
||||
vf->vring[i].msix_name,
|
||||
&vf->vring[i]);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev,
|
||||
"Failed to request irq for vq %d\n", i);
|
||||
return ret;
|
||||
}
|
||||
vf->vring[i].irq = irq;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ifcvf_free_irq_vectors(void *data)
|
||||
{
|
||||
pci_free_irq_vectors(data);
|
||||
}
|
||||
|
||||
static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct ifcvf_adapter *adapter;
|
||||
struct ifcvf_hw *vf;
|
||||
int ret;
|
||||
|
||||
ret = pcim_enable_device(pdev);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "Failed to enable device\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
|
||||
IFCVF_DRIVER_NAME);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "Failed to request MMIO region\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "No usable DMA confiugration\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev,
|
||||
"No usable coherent DMA confiugration\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
|
||||
IFCVF_MAX_INTR, PCI_IRQ_MSIX);
|
||||
if (ret < 0) {
|
||||
IFCVF_ERR(pdev, "Failed to alloc irq vectors\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev,
|
||||
"Failed for adding devres for freeing irq vectors\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
|
||||
dev, &ifc_vdpa_ops);
|
||||
if (adapter == NULL) {
|
||||
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pci_set_master(pdev);
|
||||
pci_set_drvdata(pdev, adapter);
|
||||
|
||||
vf = &adapter->vf;
|
||||
vf->base = pcim_iomap_table(pdev);
|
||||
|
||||
adapter->pdev = pdev;
|
||||
adapter->vdpa.dma_dev = &pdev->dev;
|
||||
|
||||
ret = ifcvf_request_irq(adapter);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "Failed to request MSI-X irq\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = ifcvf_init_hw(vf, pdev);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = vdpa_register_device(&adapter->vdpa);
|
||||
if (ret) {
|
||||
IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus");
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
put_device(&adapter->vdpa.dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ifcvf_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
|
||||
|
||||
vdpa_unregister_device(&adapter->vdpa);
|
||||
}
|
||||
|
||||
static struct pci_device_id ifcvf_pci_ids[] = {
|
||||
{ PCI_DEVICE_SUB(IFCVF_VENDOR_ID,
|
||||
IFCVF_DEVICE_ID,
|
||||
IFCVF_SUBSYS_VENDOR_ID,
|
||||
IFCVF_SUBSYS_DEVICE_ID) },
|
||||
{ 0 },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids);
|
||||
|
||||
static struct pci_driver ifcvf_driver = {
|
||||
.name = IFCVF_DRIVER_NAME,
|
||||
.id_table = ifcvf_pci_ids,
|
||||
.probe = ifcvf_probe,
|
||||
.remove = ifcvf_remove,
|
||||
};
|
||||
|
||||
module_pci_driver(ifcvf_driver);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_VERSION(VERSION_STRING);
|
180
drivers/vdpa/vdpa.c
Normal file
180
drivers/vdpa/vdpa.c
Normal file
@ -0,0 +1,180 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* vDPA bus.
|
||||
*
|
||||
* Copyright (c) 2020, Red Hat. All rights reserved.
|
||||
* Author: Jason Wang <jasowang@redhat.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vdpa.h>
|
||||
|
||||
static DEFINE_IDA(vdpa_index_ida);
|
||||
|
||||
static int vdpa_dev_probe(struct device *d)
|
||||
{
|
||||
struct vdpa_device *vdev = dev_to_vdpa(d);
|
||||
struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
|
||||
int ret = 0;
|
||||
|
||||
if (drv && drv->probe)
|
||||
ret = drv->probe(vdev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vdpa_dev_remove(struct device *d)
|
||||
{
|
||||
struct vdpa_device *vdev = dev_to_vdpa(d);
|
||||
struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
|
||||
|
||||
if (drv && drv->remove)
|
||||
drv->remove(vdev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct bus_type vdpa_bus = {
|
||||
.name = "vdpa",
|
||||
.probe = vdpa_dev_probe,
|
||||
.remove = vdpa_dev_remove,
|
||||
};
|
||||
|
||||
static void vdpa_release_dev(struct device *d)
|
||||
{
|
||||
struct vdpa_device *vdev = dev_to_vdpa(d);
|
||||
const struct vdpa_config_ops *ops = vdev->config;
|
||||
|
||||
if (ops->free)
|
||||
ops->free(vdev);
|
||||
|
||||
ida_simple_remove(&vdpa_index_ida, vdev->index);
|
||||
kfree(vdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* __vdpa_alloc_device - allocate and initilaize a vDPA device
|
||||
* This allows driver to some prepartion after device is
|
||||
* initialized but before registered.
|
||||
* @parent: the parent device
|
||||
* @config: the bus operations that is supported by this device
|
||||
* @size: size of the parent structure that contains private data
|
||||
*
|
||||
* Drvier should use vdap_alloc_device() wrapper macro instead of
|
||||
* using this directly.
|
||||
*
|
||||
* Returns an error when parent/config/dma_dev is not set or fail to get
|
||||
* ida.
|
||||
*/
|
||||
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
|
||||
const struct vdpa_config_ops *config,
|
||||
size_t size)
|
||||
{
|
||||
struct vdpa_device *vdev;
|
||||
int err = -EINVAL;
|
||||
|
||||
if (!config)
|
||||
goto err;
|
||||
|
||||
if (!!config->dma_map != !!config->dma_unmap)
|
||||
goto err;
|
||||
|
||||
err = -ENOMEM;
|
||||
vdev = kzalloc(size, GFP_KERNEL);
|
||||
if (!vdev)
|
||||
goto err;
|
||||
|
||||
err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL);
|
||||
if (err < 0)
|
||||
goto err_ida;
|
||||
|
||||
vdev->dev.bus = &vdpa_bus;
|
||||
vdev->dev.parent = parent;
|
||||
vdev->dev.release = vdpa_release_dev;
|
||||
vdev->index = err;
|
||||
vdev->config = config;
|
||||
|
||||
err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index);
|
||||
if (err)
|
||||
goto err_name;
|
||||
|
||||
device_initialize(&vdev->dev);
|
||||
|
||||
return vdev;
|
||||
|
||||
err_name:
|
||||
ida_simple_remove(&vdpa_index_ida, vdev->index);
|
||||
err_ida:
|
||||
kfree(vdev);
|
||||
err:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__vdpa_alloc_device);
|
||||
|
||||
/**
|
||||
* vdpa_register_device - register a vDPA device
|
||||
* Callers must have a succeed call of vdpa_init_device() before.
|
||||
* @vdev: the vdpa device to be registered to vDPA bus
|
||||
*
|
||||
* Returns an error when fail to add to vDPA bus
|
||||
*/
|
||||
int vdpa_register_device(struct vdpa_device *vdev)
|
||||
{
|
||||
return device_add(&vdev->dev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vdpa_register_device);
|
||||
|
||||
/**
|
||||
* vdpa_unregister_device - unregister a vDPA device
|
||||
* @vdev: the vdpa device to be unregisted from vDPA bus
|
||||
*/
|
||||
void vdpa_unregister_device(struct vdpa_device *vdev)
|
||||
{
|
||||
device_unregister(&vdev->dev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vdpa_unregister_device);
|
||||
|
||||
/**
|
||||
* __vdpa_register_driver - register a vDPA device driver
|
||||
* @drv: the vdpa device driver to be registered
|
||||
* @owner: module owner of the driver
|
||||
*
|
||||
* Returns an err when fail to do the registration
|
||||
*/
|
||||
int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner)
|
||||
{
|
||||
drv->driver.bus = &vdpa_bus;
|
||||
drv->driver.owner = owner;
|
||||
|
||||
return driver_register(&drv->driver);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__vdpa_register_driver);
|
||||
|
||||
/**
|
||||
* vdpa_unregister_driver - unregister a vDPA device driver
|
||||
* @drv: the vdpa device driver to be unregistered
|
||||
*/
|
||||
void vdpa_unregister_driver(struct vdpa_driver *drv)
|
||||
{
|
||||
driver_unregister(&drv->driver);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vdpa_unregister_driver);
|
||||
|
||||
static int vdpa_init(void)
|
||||
{
|
||||
return bus_register(&vdpa_bus);
|
||||
}
|
||||
|
||||
static void __exit vdpa_exit(void)
|
||||
{
|
||||
bus_unregister(&vdpa_bus);
|
||||
ida_destroy(&vdpa_index_ida);
|
||||
}
|
||||
core_initcall(vdpa_init);
|
||||
module_exit(vdpa_exit);
|
||||
|
||||
MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>");
|
||||
MODULE_LICENSE("GPL v2");
|
2
drivers/vdpa/vdpa_sim/Makefile
Normal file
2
drivers/vdpa/vdpa_sim/Makefile
Normal file
@ -0,0 +1,2 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o
|
629
drivers/vdpa/vdpa_sim/vdpa_sim.c
Normal file
629
drivers/vdpa/vdpa_sim/vdpa_sim.c
Normal file
@ -0,0 +1,629 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* VDPA networking device simulator.
|
||||
*
|
||||
* Copyright (c) 2020, Red Hat Inc. All rights reserved.
|
||||
* Author: Jason Wang <jasowang@redhat.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/uuid.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/vringh.h>
|
||||
#include <linux/vdpa.h>
|
||||
#include <linux/vhost_iotlb.h>
|
||||
#include <uapi/linux/virtio_config.h>
|
||||
#include <uapi/linux/virtio_net.h>
|
||||
|
||||
#define DRV_VERSION "0.1"
|
||||
#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>"
|
||||
#define DRV_DESC "vDPA Device Simulator"
|
||||
#define DRV_LICENSE "GPL v2"
|
||||
|
||||
struct vdpasim_virtqueue {
|
||||
struct vringh vring;
|
||||
struct vringh_kiov iov;
|
||||
unsigned short head;
|
||||
bool ready;
|
||||
u64 desc_addr;
|
||||
u64 device_addr;
|
||||
u64 driver_addr;
|
||||
u32 num;
|
||||
void *private;
|
||||
irqreturn_t (*cb)(void *data);
|
||||
};
|
||||
|
||||
#define VDPASIM_QUEUE_ALIGN PAGE_SIZE
|
||||
#define VDPASIM_QUEUE_MAX 256
|
||||
#define VDPASIM_DEVICE_ID 0x1
|
||||
#define VDPASIM_VENDOR_ID 0
|
||||
#define VDPASIM_VQ_NUM 0x2
|
||||
#define VDPASIM_NAME "vdpasim-netdev"
|
||||
|
||||
static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) |
|
||||
(1ULL << VIRTIO_F_VERSION_1) |
|
||||
(1ULL << VIRTIO_F_IOMMU_PLATFORM);
|
||||
|
||||
/* State of each vdpasim device */
|
||||
struct vdpasim {
|
||||
struct vdpa_device vdpa;
|
||||
struct vdpasim_virtqueue vqs[2];
|
||||
struct work_struct work;
|
||||
/* spinlock to synchronize virtqueue state */
|
||||
spinlock_t lock;
|
||||
struct virtio_net_config config;
|
||||
struct vhost_iotlb *iommu;
|
||||
void *buffer;
|
||||
u32 status;
|
||||
u32 generation;
|
||||
u64 features;
|
||||
};
|
||||
|
||||
static struct vdpasim *vdpasim_dev;
|
||||
|
||||
static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
|
||||
{
|
||||
return container_of(vdpa, struct vdpasim, vdpa);
|
||||
}
|
||||
|
||||
static struct vdpasim *dev_to_sim(struct device *dev)
|
||||
{
|
||||
struct vdpa_device *vdpa = dev_to_vdpa(dev);
|
||||
|
||||
return vdpa_to_sim(vdpa);
|
||||
}
|
||||
|
||||
static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
|
||||
{
|
||||
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
|
||||
int ret;
|
||||
|
||||
ret = vringh_init_iotlb(&vq->vring, vdpasim_features,
|
||||
VDPASIM_QUEUE_MAX, false,
|
||||
(struct vring_desc *)(uintptr_t)vq->desc_addr,
|
||||
(struct vring_avail *)
|
||||
(uintptr_t)vq->driver_addr,
|
||||
(struct vring_used *)
|
||||
(uintptr_t)vq->device_addr);
|
||||
}
|
||||
|
||||
static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq)
|
||||
{
|
||||
vq->ready = 0;
|
||||
vq->desc_addr = 0;
|
||||
vq->driver_addr = 0;
|
||||
vq->device_addr = 0;
|
||||
vq->cb = NULL;
|
||||
vq->private = NULL;
|
||||
vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX,
|
||||
false, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
static void vdpasim_reset(struct vdpasim *vdpasim)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < VDPASIM_VQ_NUM; i++)
|
||||
vdpasim_vq_reset(&vdpasim->vqs[i]);
|
||||
|
||||
vhost_iotlb_reset(vdpasim->iommu);
|
||||
|
||||
vdpasim->features = 0;
|
||||
vdpasim->status = 0;
|
||||
++vdpasim->generation;
|
||||
}
|
||||
|
||||
static void vdpasim_work(struct work_struct *work)
|
||||
{
|
||||
struct vdpasim *vdpasim = container_of(work, struct
|
||||
vdpasim, work);
|
||||
struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
|
||||
struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
|
||||
size_t read, write, total_write;
|
||||
int err;
|
||||
int pkts = 0;
|
||||
|
||||
spin_lock(&vdpasim->lock);
|
||||
|
||||
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
|
||||
goto out;
|
||||
|
||||
if (!txq->ready || !rxq->ready)
|
||||
goto out;
|
||||
|
||||
while (true) {
|
||||
total_write = 0;
|
||||
err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL,
|
||||
&txq->head, GFP_ATOMIC);
|
||||
if (err <= 0)
|
||||
break;
|
||||
|
||||
err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov,
|
||||
&rxq->head, GFP_ATOMIC);
|
||||
if (err <= 0) {
|
||||
vringh_complete_iotlb(&txq->vring, txq->head, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov,
|
||||
vdpasim->buffer,
|
||||
PAGE_SIZE);
|
||||
if (read <= 0)
|
||||
break;
|
||||
|
||||
write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov,
|
||||
vdpasim->buffer, read);
|
||||
if (write <= 0)
|
||||
break;
|
||||
|
||||
total_write += write;
|
||||
}
|
||||
|
||||
/* Make sure data is wrote before advancing index */
|
||||
smp_wmb();
|
||||
|
||||
vringh_complete_iotlb(&txq->vring, txq->head, 0);
|
||||
vringh_complete_iotlb(&rxq->vring, rxq->head, total_write);
|
||||
|
||||
/* Make sure used is visible before rasing the interrupt. */
|
||||
smp_wmb();
|
||||
|
||||
local_bh_disable();
|
||||
if (txq->cb)
|
||||
txq->cb(txq->private);
|
||||
if (rxq->cb)
|
||||
rxq->cb(rxq->private);
|
||||
local_bh_enable();
|
||||
|
||||
if (++pkts > 4) {
|
||||
schedule_work(&vdpasim->work);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
spin_unlock(&vdpasim->lock);
|
||||
}
|
||||
|
||||
static int dir_to_perm(enum dma_data_direction dir)
|
||||
{
|
||||
int perm = -EFAULT;
|
||||
|
||||
switch (dir) {
|
||||
case DMA_FROM_DEVICE:
|
||||
perm = VHOST_MAP_WO;
|
||||
break;
|
||||
case DMA_TO_DEVICE:
|
||||
perm = VHOST_MAP_RO;
|
||||
break;
|
||||
case DMA_BIDIRECTIONAL:
|
||||
perm = VHOST_MAP_RW;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return perm;
|
||||
}
|
||||
|
||||
static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page,
|
||||
unsigned long offset, size_t size,
|
||||
enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
{
|
||||
struct vdpasim *vdpasim = dev_to_sim(dev);
|
||||
struct vhost_iotlb *iommu = vdpasim->iommu;
|
||||
u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset;
|
||||
int ret, perm = dir_to_perm(dir);
|
||||
|
||||
if (perm < 0)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
/* For simplicity, use identical mapping to avoid e.g iova
|
||||
* allocator.
|
||||
*/
|
||||
ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1,
|
||||
pa, dir_to_perm(dir));
|
||||
if (ret)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
return (dma_addr_t)(pa);
|
||||
}
|
||||
|
||||
static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr,
|
||||
size_t size, enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
{
|
||||
struct vdpasim *vdpasim = dev_to_sim(dev);
|
||||
struct vhost_iotlb *iommu = vdpasim->iommu;
|
||||
|
||||
vhost_iotlb_del_range(iommu, (u64)dma_addr,
|
||||
(u64)dma_addr + size - 1);
|
||||
}
|
||||
|
||||
static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
|
||||
dma_addr_t *dma_addr, gfp_t flag,
|
||||
unsigned long attrs)
|
||||
{
|
||||
struct vdpasim *vdpasim = dev_to_sim(dev);
|
||||
struct vhost_iotlb *iommu = vdpasim->iommu;
|
||||
void *addr = kmalloc(size, flag);
|
||||
int ret;
|
||||
|
||||
if (!addr)
|
||||
*dma_addr = DMA_MAPPING_ERROR;
|
||||
else {
|
||||
u64 pa = virt_to_phys(addr);
|
||||
|
||||
ret = vhost_iotlb_add_range(iommu, (u64)pa,
|
||||
(u64)pa + size - 1,
|
||||
pa, VHOST_MAP_RW);
|
||||
if (ret) {
|
||||
*dma_addr = DMA_MAPPING_ERROR;
|
||||
kfree(addr);
|
||||
addr = NULL;
|
||||
} else
|
||||
*dma_addr = (dma_addr_t)pa;
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
static void vdpasim_free_coherent(struct device *dev, size_t size,
|
||||
void *vaddr, dma_addr_t dma_addr,
|
||||
unsigned long attrs)
|
||||
{
|
||||
struct vdpasim *vdpasim = dev_to_sim(dev);
|
||||
struct vhost_iotlb *iommu = vdpasim->iommu;
|
||||
|
||||
vhost_iotlb_del_range(iommu, (u64)dma_addr,
|
||||
(u64)dma_addr + size - 1);
|
||||
kfree(phys_to_virt((uintptr_t)dma_addr));
|
||||
}
|
||||
|
||||
static const struct dma_map_ops vdpasim_dma_ops = {
|
||||
.map_page = vdpasim_map_page,
|
||||
.unmap_page = vdpasim_unmap_page,
|
||||
.alloc = vdpasim_alloc_coherent,
|
||||
.free = vdpasim_free_coherent,
|
||||
};
|
||||
|
||||
static const struct vdpa_config_ops vdpasim_net_config_ops;
|
||||
|
||||
static struct vdpasim *vdpasim_create(void)
|
||||
{
|
||||
struct virtio_net_config *config;
|
||||
struct vdpasim *vdpasim;
|
||||
struct device *dev;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL,
|
||||
&vdpasim_net_config_ops);
|
||||
if (!vdpasim)
|
||||
goto err_alloc;
|
||||
|
||||
INIT_WORK(&vdpasim->work, vdpasim_work);
|
||||
spin_lock_init(&vdpasim->lock);
|
||||
|
||||
dev = &vdpasim->vdpa.dev;
|
||||
dev->coherent_dma_mask = DMA_BIT_MASK(64);
|
||||
set_dma_ops(dev, &vdpasim_dma_ops);
|
||||
|
||||
vdpasim->iommu = vhost_iotlb_alloc(2048, 0);
|
||||
if (!vdpasim->iommu)
|
||||
goto err_iommu;
|
||||
|
||||
vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
if (!vdpasim->buffer)
|
||||
goto err_iommu;
|
||||
|
||||
config = &vdpasim->config;
|
||||
config->mtu = 1500;
|
||||
config->status = VIRTIO_NET_S_LINK_UP;
|
||||
eth_random_addr(config->mac);
|
||||
|
||||
vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu);
|
||||
vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu);
|
||||
|
||||
vdpasim->vdpa.dma_dev = dev;
|
||||
ret = vdpa_register_device(&vdpasim->vdpa);
|
||||
if (ret)
|
||||
goto err_iommu;
|
||||
|
||||
return vdpasim;
|
||||
|
||||
err_iommu:
|
||||
put_device(dev);
|
||||
err_alloc:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx,
|
||||
u64 desc_area, u64 driver_area,
|
||||
u64 device_area)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
|
||||
|
||||
vq->desc_addr = desc_area;
|
||||
vq->driver_addr = driver_area;
|
||||
vq->device_addr = device_area;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vdpasim_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
|
||||
|
||||
vq->num = num;
|
||||
}
|
||||
|
||||
static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
|
||||
|
||||
if (vq->ready)
|
||||
schedule_work(&vdpasim->work);
|
||||
}
|
||||
|
||||
static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
|
||||
struct vdpa_callback *cb)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
|
||||
|
||||
vq->cb = cb->callback;
|
||||
vq->private = cb->private;
|
||||
}
|
||||
|
||||
static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
|
||||
|
||||
spin_lock(&vdpasim->lock);
|
||||
vq->ready = ready;
|
||||
if (vq->ready)
|
||||
vdpasim_queue_ready(vdpasim, idx);
|
||||
spin_unlock(&vdpasim->lock);
|
||||
}
|
||||
|
||||
static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
|
||||
|
||||
return vq->ready;
|
||||
}
|
||||
|
||||
static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, u64 state)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
|
||||
struct vringh *vrh = &vq->vring;
|
||||
|
||||
spin_lock(&vdpasim->lock);
|
||||
vrh->last_avail_idx = state;
|
||||
spin_unlock(&vdpasim->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
|
||||
struct vringh *vrh = &vq->vring;
|
||||
|
||||
return vrh->last_avail_idx;
|
||||
}
|
||||
|
||||
static u16 vdpasim_get_vq_align(struct vdpa_device *vdpa)
|
||||
{
|
||||
return VDPASIM_QUEUE_ALIGN;
|
||||
}
|
||||
|
||||
static u64 vdpasim_get_features(struct vdpa_device *vdpa)
|
||||
{
|
||||
return vdpasim_features;
|
||||
}
|
||||
|
||||
static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
|
||||
/* DMA mapping must be done by driver */
|
||||
if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
|
||||
return -EINVAL;
|
||||
|
||||
vdpasim->features = features & vdpasim_features;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vdpasim_set_config_cb(struct vdpa_device *vdpa,
|
||||
struct vdpa_callback *cb)
|
||||
{
|
||||
/* We don't support config interrupt */
|
||||
}
|
||||
|
||||
static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa)
|
||||
{
|
||||
return VDPASIM_QUEUE_MAX;
|
||||
}
|
||||
|
||||
static u32 vdpasim_get_device_id(struct vdpa_device *vdpa)
|
||||
{
|
||||
return VDPASIM_DEVICE_ID;
|
||||
}
|
||||
|
||||
static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa)
|
||||
{
|
||||
return VDPASIM_VENDOR_ID;
|
||||
}
|
||||
|
||||
static u8 vdpasim_get_status(struct vdpa_device *vdpa)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
u8 status;
|
||||
|
||||
spin_lock(&vdpasim->lock);
|
||||
status = vdpasim->status;
|
||||
spin_unlock(&vdpasim->lock);
|
||||
|
||||
return vdpasim->status;
|
||||
}
|
||||
|
||||
static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
|
||||
spin_lock(&vdpasim->lock);
|
||||
vdpasim->status = status;
|
||||
if (status == 0)
|
||||
vdpasim_reset(vdpasim);
|
||||
spin_unlock(&vdpasim->lock);
|
||||
}
|
||||
|
||||
static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset,
|
||||
void *buf, unsigned int len)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
|
||||
if (offset + len < sizeof(struct virtio_net_config))
|
||||
memcpy(buf, &vdpasim->config + offset, len);
|
||||
}
|
||||
|
||||
static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset,
|
||||
const void *buf, unsigned int len)
|
||||
{
|
||||
/* No writable config supportted by vdpasim */
|
||||
}
|
||||
|
||||
static u32 vdpasim_get_generation(struct vdpa_device *vdpa)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
|
||||
return vdpasim->generation;
|
||||
}
|
||||
|
||||
static int vdpasim_set_map(struct vdpa_device *vdpa,
|
||||
struct vhost_iotlb *iotlb)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
struct vhost_iotlb_map *map;
|
||||
u64 start = 0ULL, last = 0ULL - 1;
|
||||
int ret;
|
||||
|
||||
vhost_iotlb_reset(vdpasim->iommu);
|
||||
|
||||
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
|
||||
map = vhost_iotlb_itree_next(map, start, last)) {
|
||||
ret = vhost_iotlb_add_range(vdpasim->iommu, map->start,
|
||||
map->last, map->addr, map->perm);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
return 0;
|
||||
|
||||
err:
|
||||
vhost_iotlb_reset(vdpasim->iommu);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size,
|
||||
u64 pa, u32 perm)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
|
||||
return vhost_iotlb_add_range(vdpasim->iommu, iova,
|
||||
iova + size - 1, pa, perm);
|
||||
}
|
||||
|
||||
static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
|
||||
vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vdpasim_free(struct vdpa_device *vdpa)
|
||||
{
|
||||
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
|
||||
|
||||
cancel_work_sync(&vdpasim->work);
|
||||
kfree(vdpasim->buffer);
|
||||
if (vdpasim->iommu)
|
||||
vhost_iotlb_free(vdpasim->iommu);
|
||||
}
|
||||
|
||||
static const struct vdpa_config_ops vdpasim_net_config_ops = {
|
||||
.set_vq_address = vdpasim_set_vq_address,
|
||||
.set_vq_num = vdpasim_set_vq_num,
|
||||
.kick_vq = vdpasim_kick_vq,
|
||||
.set_vq_cb = vdpasim_set_vq_cb,
|
||||
.set_vq_ready = vdpasim_set_vq_ready,
|
||||
.get_vq_ready = vdpasim_get_vq_ready,
|
||||
.set_vq_state = vdpasim_set_vq_state,
|
||||
.get_vq_state = vdpasim_get_vq_state,
|
||||
.get_vq_align = vdpasim_get_vq_align,
|
||||
.get_features = vdpasim_get_features,
|
||||
.set_features = vdpasim_set_features,
|
||||
.set_config_cb = vdpasim_set_config_cb,
|
||||
.get_vq_num_max = vdpasim_get_vq_num_max,
|
||||
.get_device_id = vdpasim_get_device_id,
|
||||
.get_vendor_id = vdpasim_get_vendor_id,
|
||||
.get_status = vdpasim_get_status,
|
||||
.set_status = vdpasim_set_status,
|
||||
.get_config = vdpasim_get_config,
|
||||
.set_config = vdpasim_set_config,
|
||||
.get_generation = vdpasim_get_generation,
|
||||
.set_map = vdpasim_set_map,
|
||||
.dma_map = vdpasim_dma_map,
|
||||
.dma_unmap = vdpasim_dma_unmap,
|
||||
.free = vdpasim_free,
|
||||
};
|
||||
|
||||
static int __init vdpasim_dev_init(void)
|
||||
{
|
||||
vdpasim_dev = vdpasim_create();
|
||||
|
||||
if (!IS_ERR(vdpasim_dev))
|
||||
return 0;
|
||||
|
||||
return PTR_ERR(vdpasim_dev);
|
||||
}
|
||||
|
||||
static void __exit vdpasim_dev_exit(void)
|
||||
{
|
||||
struct vdpa_device *vdpa = &vdpasim_dev->vdpa;
|
||||
|
||||
vdpa_unregister_device(vdpa);
|
||||
}
|
||||
|
||||
module_init(vdpasim_dev_init)
|
||||
module_exit(vdpasim_dev_exit)
|
||||
|
||||
MODULE_VERSION(DRV_VERSION);
|
||||
MODULE_LICENSE(DRV_LICENSE);
|
||||
MODULE_AUTHOR(DRV_AUTHOR);
|
||||
MODULE_DESCRIPTION(DRV_DESC);
|
@ -1,4 +1,29 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config VHOST_IOTLB
|
||||
tristate
|
||||
help
|
||||
Generic IOTLB implementation for vhost and vringh.
|
||||
|
||||
config VHOST_RING
|
||||
tristate
|
||||
select VHOST_IOTLB
|
||||
help
|
||||
This option is selected by any driver which needs to access
|
||||
the host side of a virtio ring.
|
||||
|
||||
config VHOST
|
||||
tristate
|
||||
select VHOST_IOTLB
|
||||
help
|
||||
This option is selected by any driver which needs to access
|
||||
the core of vhost.
|
||||
|
||||
menuconfig VHOST_MENU
|
||||
bool "VHOST drivers"
|
||||
default y
|
||||
|
||||
if VHOST_MENU
|
||||
|
||||
config VHOST_NET
|
||||
tristate "Host kernel accelerator for virtio net"
|
||||
depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP)
|
||||
@ -23,8 +48,8 @@ config VHOST_SCSI
|
||||
config VHOST_VSOCK
|
||||
tristate "vhost virtio-vsock driver"
|
||||
depends on VSOCKETS && EVENTFD
|
||||
select VIRTIO_VSOCKETS_COMMON
|
||||
select VHOST
|
||||
select VIRTIO_VSOCKETS_COMMON
|
||||
default n
|
||||
---help---
|
||||
This kernel module can be loaded in the host kernel to provide AF_VSOCK
|
||||
@ -34,11 +59,17 @@ config VHOST_VSOCK
|
||||
To compile this driver as a module, choose M here: the module will be called
|
||||
vhost_vsock.
|
||||
|
||||
config VHOST
|
||||
tristate
|
||||
---help---
|
||||
This option is selected by any driver which needs to access
|
||||
the core of vhost.
|
||||
config VHOST_VDPA
|
||||
tristate "Vhost driver for vDPA-based backend"
|
||||
depends on EVENTFD
|
||||
select VHOST
|
||||
select VDPA
|
||||
help
|
||||
This kernel module can be loaded in host kernel to accelerate
|
||||
guest virtio devices with the vDPA-based backends.
|
||||
|
||||
To compile this driver as a module, choose M here: the module
|
||||
will be called vhost_vdpa.
|
||||
|
||||
config VHOST_CROSS_ENDIAN_LEGACY
|
||||
bool "Cross-endian support for vhost"
|
||||
@ -54,3 +85,5 @@ config VHOST_CROSS_ENDIAN_LEGACY
|
||||
adds some overhead, it is disabled by default.
|
||||
|
||||
If unsure, say "N".
|
||||
|
||||
endif
|
||||
|
@ -1,6 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config VHOST_RING
|
||||
tristate
|
||||
---help---
|
||||
This option is selected by any driver which needs to access
|
||||
the host side of a virtio ring.
|
@ -10,4 +10,10 @@ vhost_vsock-y := vsock.o
|
||||
|
||||
obj-$(CONFIG_VHOST_RING) += vringh.o
|
||||
|
||||
obj-$(CONFIG_VHOST_VDPA) += vhost_vdpa.o
|
||||
vhost_vdpa-y := vdpa.o
|
||||
|
||||
obj-$(CONFIG_VHOST) += vhost.o
|
||||
|
||||
obj-$(CONFIG_VHOST_IOTLB) += vhost_iotlb.o
|
||||
vhost_iotlb-y := iotlb.o
|
||||
|
177
drivers/vhost/iotlb.c
Normal file
177
drivers/vhost/iotlb.c
Normal file
@ -0,0 +1,177 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (C) 2020 Red Hat, Inc.
|
||||
* Author: Jason Wang <jasowang@redhat.com>
|
||||
*
|
||||
* IOTLB implementation for vhost.
|
||||
*/
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vhost_iotlb.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#define MOD_VERSION "0.1"
|
||||
#define MOD_DESC "VHOST IOTLB"
|
||||
#define MOD_AUTHOR "Jason Wang <jasowang@redhat.com>"
|
||||
#define MOD_LICENSE "GPL v2"
|
||||
|
||||
#define START(map) ((map)->start)
|
||||
#define LAST(map) ((map)->last)
|
||||
|
||||
INTERVAL_TREE_DEFINE(struct vhost_iotlb_map,
|
||||
rb, __u64, __subtree_last,
|
||||
START, LAST, static inline, vhost_iotlb_itree);
|
||||
|
||||
/**
|
||||
* vhost_iotlb_map_free - remove a map node and free it
|
||||
* @iotlb: the IOTLB
|
||||
* @map: the map that want to be remove and freed
|
||||
*/
|
||||
void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
|
||||
struct vhost_iotlb_map *map)
|
||||
{
|
||||
vhost_iotlb_itree_remove(map, &iotlb->root);
|
||||
list_del(&map->link);
|
||||
kfree(map);
|
||||
iotlb->nmaps--;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_map_free);
|
||||
|
||||
/**
|
||||
* vhost_iotlb_add_range - add a new range to vhost IOTLB
|
||||
* @iotlb: the IOTLB
|
||||
* @start: start of the IOVA range
|
||||
* @last: last of IOVA range
|
||||
* @addr: the address that is mapped to @start
|
||||
* @perm: access permission of this range
|
||||
*
|
||||
* Returns an error last is smaller than start or memory allocation
|
||||
* fails
|
||||
*/
|
||||
int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
|
||||
u64 start, u64 last,
|
||||
u64 addr, unsigned int perm)
|
||||
{
|
||||
struct vhost_iotlb_map *map;
|
||||
|
||||
if (last < start)
|
||||
return -EFAULT;
|
||||
|
||||
if (iotlb->limit &&
|
||||
iotlb->nmaps == iotlb->limit &&
|
||||
iotlb->flags & VHOST_IOTLB_FLAG_RETIRE) {
|
||||
map = list_first_entry(&iotlb->list, typeof(*map), link);
|
||||
vhost_iotlb_map_free(iotlb, map);
|
||||
}
|
||||
|
||||
map = kmalloc(sizeof(*map), GFP_ATOMIC);
|
||||
if (!map)
|
||||
return -ENOMEM;
|
||||
|
||||
map->start = start;
|
||||
map->size = last - start + 1;
|
||||
map->last = last;
|
||||
map->addr = addr;
|
||||
map->perm = perm;
|
||||
|
||||
iotlb->nmaps++;
|
||||
vhost_iotlb_itree_insert(map, &iotlb->root);
|
||||
|
||||
INIT_LIST_HEAD(&map->link);
|
||||
list_add_tail(&map->link, &iotlb->list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_add_range);
|
||||
|
||||
/**
|
||||
* vring_iotlb_del_range - delete overlapped ranges from vhost IOTLB
|
||||
* @iotlb: the IOTLB
|
||||
* @start: start of the IOVA range
|
||||
* @last: last of IOVA range
|
||||
*/
|
||||
void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last)
|
||||
{
|
||||
struct vhost_iotlb_map *map;
|
||||
|
||||
while ((map = vhost_iotlb_itree_iter_first(&iotlb->root,
|
||||
start, last)))
|
||||
vhost_iotlb_map_free(iotlb, map);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_del_range);
|
||||
|
||||
/**
|
||||
* vhost_iotlb_alloc - add a new vhost IOTLB
|
||||
* @limit: maximum number of IOTLB entries
|
||||
* @flags: VHOST_IOTLB_FLAG_XXX
|
||||
*
|
||||
* Returns an error is memory allocation fails
|
||||
*/
|
||||
struct vhost_iotlb *vhost_iotlb_alloc(unsigned int limit, unsigned int flags)
|
||||
{
|
||||
struct vhost_iotlb *iotlb = kzalloc(sizeof(*iotlb), GFP_KERNEL);
|
||||
|
||||
if (!iotlb)
|
||||
return NULL;
|
||||
|
||||
iotlb->root = RB_ROOT_CACHED;
|
||||
iotlb->limit = limit;
|
||||
iotlb->nmaps = 0;
|
||||
iotlb->flags = flags;
|
||||
INIT_LIST_HEAD(&iotlb->list);
|
||||
|
||||
return iotlb;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_alloc);
|
||||
|
||||
/**
|
||||
* vhost_iotlb_reset - reset vhost IOTLB (free all IOTLB entries)
|
||||
* @iotlb: the IOTLB to be reset
|
||||
*/
|
||||
void vhost_iotlb_reset(struct vhost_iotlb *iotlb)
|
||||
{
|
||||
vhost_iotlb_del_range(iotlb, 0ULL, 0ULL - 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_reset);
|
||||
|
||||
/**
|
||||
* vhost_iotlb_free - reset and free vhost IOTLB
|
||||
* @iotlb: the IOTLB to be freed
|
||||
*/
|
||||
void vhost_iotlb_free(struct vhost_iotlb *iotlb)
|
||||
{
|
||||
if (iotlb) {
|
||||
vhost_iotlb_reset(iotlb);
|
||||
kfree(iotlb);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_free);
|
||||
|
||||
/**
|
||||
* vhost_iotlb_itree_first - return the first overlapped range
|
||||
* @iotlb: the IOTLB
|
||||
* @start: start of IOVA range
|
||||
* @end: end of IOVA range
|
||||
*/
|
||||
struct vhost_iotlb_map *
|
||||
vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last)
|
||||
{
|
||||
return vhost_iotlb_itree_iter_first(&iotlb->root, start, last);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_itree_first);
|
||||
|
||||
/**
|
||||
* vhost_iotlb_itree_first - return the next overlapped range
|
||||
* @iotlb: the IOTLB
|
||||
* @start: start of IOVA range
|
||||
* @end: end of IOVA range
|
||||
*/
|
||||
struct vhost_iotlb_map *
|
||||
vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last)
|
||||
{
|
||||
return vhost_iotlb_itree_iter_next(map, start, last);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_itree_next);
|
||||
|
||||
MODULE_VERSION(MOD_VERSION);
|
||||
MODULE_DESCRIPTION(MOD_DESC);
|
||||
MODULE_AUTHOR(MOD_AUTHOR);
|
||||
MODULE_LICENSE(MOD_LICENSE);
|
@ -1324,7 +1324,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
|
||||
}
|
||||
vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
|
||||
UIO_MAXIOV + VHOST_NET_BATCH,
|
||||
VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT);
|
||||
VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT,
|
||||
NULL);
|
||||
|
||||
vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
|
||||
vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
|
||||
@ -1586,7 +1587,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
|
||||
struct socket *tx_sock = NULL;
|
||||
struct socket *rx_sock = NULL;
|
||||
long err;
|
||||
struct vhost_umem *umem;
|
||||
struct vhost_iotlb *umem;
|
||||
|
||||
mutex_lock(&n->dev.mutex);
|
||||
err = vhost_dev_check_owner(&n->dev);
|
||||
|
@ -1628,7 +1628,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
|
||||
vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
|
||||
}
|
||||
vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV,
|
||||
VHOST_SCSI_WEIGHT, 0);
|
||||
VHOST_SCSI_WEIGHT, 0, NULL);
|
||||
|
||||
vhost_scsi_init_inflight(vs, NULL);
|
||||
|
||||
|
883
drivers/vhost/vdpa.c
Normal file
883
drivers/vhost/vdpa.c
Normal file
@ -0,0 +1,883 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation.
|
||||
* Copyright (C) 2020 Red Hat, Inc.
|
||||
*
|
||||
* Author: Tiwei Bie <tiwei.bie@intel.com>
|
||||
* Jason Wang <jasowang@redhat.com>
|
||||
*
|
||||
* Thanks Michael S. Tsirkin for the valuable comments and
|
||||
* suggestions. And thanks to Cunming Liang and Zhihong Wang for all
|
||||
* their supports.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/uuid.h>
|
||||
#include <linux/vdpa.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/vhost.h>
|
||||
#include <linux/virtio_net.h>
|
||||
|
||||
#include "vhost.h"
|
||||
|
||||
enum {
|
||||
VHOST_VDPA_FEATURES =
|
||||
(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) |
|
||||
(1ULL << VIRTIO_F_ANY_LAYOUT) |
|
||||
(1ULL << VIRTIO_F_VERSION_1) |
|
||||
(1ULL << VIRTIO_F_IOMMU_PLATFORM) |
|
||||
(1ULL << VIRTIO_F_RING_PACKED) |
|
||||
(1ULL << VIRTIO_F_ORDER_PLATFORM) |
|
||||
(1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
|
||||
(1ULL << VIRTIO_RING_F_EVENT_IDX),
|
||||
|
||||
VHOST_VDPA_NET_FEATURES = VHOST_VDPA_FEATURES |
|
||||
(1ULL << VIRTIO_NET_F_CSUM) |
|
||||
(1ULL << VIRTIO_NET_F_GUEST_CSUM) |
|
||||
(1ULL << VIRTIO_NET_F_MTU) |
|
||||
(1ULL << VIRTIO_NET_F_MAC) |
|
||||
(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
|
||||
(1ULL << VIRTIO_NET_F_GUEST_TSO6) |
|
||||
(1ULL << VIRTIO_NET_F_GUEST_ECN) |
|
||||
(1ULL << VIRTIO_NET_F_GUEST_UFO) |
|
||||
(1ULL << VIRTIO_NET_F_HOST_TSO4) |
|
||||
(1ULL << VIRTIO_NET_F_HOST_TSO6) |
|
||||
(1ULL << VIRTIO_NET_F_HOST_ECN) |
|
||||
(1ULL << VIRTIO_NET_F_HOST_UFO) |
|
||||
(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
|
||||
(1ULL << VIRTIO_NET_F_STATUS) |
|
||||
(1ULL << VIRTIO_NET_F_SPEED_DUPLEX),
|
||||
};
|
||||
|
||||
/* Currently, only network backend w/o multiqueue is supported. */
|
||||
#define VHOST_VDPA_VQ_MAX 2
|
||||
|
||||
#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
|
||||
|
||||
struct vhost_vdpa {
|
||||
struct vhost_dev vdev;
|
||||
struct iommu_domain *domain;
|
||||
struct vhost_virtqueue *vqs;
|
||||
struct completion completion;
|
||||
struct vdpa_device *vdpa;
|
||||
struct device dev;
|
||||
struct cdev cdev;
|
||||
atomic_t opened;
|
||||
int nvqs;
|
||||
int virtio_id;
|
||||
int minor;
|
||||
};
|
||||
|
||||
static DEFINE_IDA(vhost_vdpa_ida);
|
||||
|
||||
static dev_t vhost_vdpa_major;
|
||||
|
||||
static const u64 vhost_vdpa_features[] = {
|
||||
[VIRTIO_ID_NET] = VHOST_VDPA_NET_FEATURES,
|
||||
};
|
||||
|
||||
static void handle_vq_kick(struct vhost_work *work)
|
||||
{
|
||||
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
|
||||
poll.work);
|
||||
struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
|
||||
const struct vdpa_config_ops *ops = v->vdpa->config;
|
||||
|
||||
ops->kick_vq(v->vdpa, vq - v->vqs);
|
||||
}
|
||||
|
||||
static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
|
||||
{
|
||||
struct vhost_virtqueue *vq = private;
|
||||
struct eventfd_ctx *call_ctx = vq->call_ctx;
|
||||
|
||||
if (call_ctx)
|
||||
eventfd_signal(call_ctx, 1);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static void vhost_vdpa_reset(struct vhost_vdpa *v)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
ops->set_status(vdpa, 0);
|
||||
}
|
||||
|
||||
static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
u32 device_id;
|
||||
|
||||
device_id = ops->get_device_id(vdpa);
|
||||
|
||||
if (copy_to_user(argp, &device_id, sizeof(device_id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
u8 status;
|
||||
|
||||
status = ops->get_status(vdpa);
|
||||
|
||||
if (copy_to_user(statusp, &status, sizeof(status)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
u8 status;
|
||||
|
||||
if (copy_from_user(&status, statusp, sizeof(status)))
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* Userspace shouldn't remove status bits unless reset the
|
||||
* status to 0.
|
||||
*/
|
||||
if (status != 0 && (ops->get_status(vdpa) & ~status) != 0)
|
||||
return -EINVAL;
|
||||
|
||||
ops->set_status(vdpa, status);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
|
||||
struct vhost_vdpa_config *c)
|
||||
{
|
||||
long size = 0;
|
||||
|
||||
switch (v->virtio_id) {
|
||||
case VIRTIO_ID_NET:
|
||||
size = sizeof(struct virtio_net_config);
|
||||
break;
|
||||
}
|
||||
|
||||
if (c->len == 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (c->len > size - c->off)
|
||||
return -E2BIG;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long vhost_vdpa_get_config(struct vhost_vdpa *v,
|
||||
struct vhost_vdpa_config __user *c)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
struct vhost_vdpa_config config;
|
||||
unsigned long size = offsetof(struct vhost_vdpa_config, buf);
|
||||
u8 *buf;
|
||||
|
||||
if (copy_from_user(&config, c, size))
|
||||
return -EFAULT;
|
||||
if (vhost_vdpa_config_validate(v, &config))
|
||||
return -EINVAL;
|
||||
buf = kvzalloc(config.len, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
ops->get_config(vdpa, config.off, buf, config.len);
|
||||
|
||||
if (copy_to_user(c->buf, buf, config.len)) {
|
||||
kvfree(buf);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
kvfree(buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long vhost_vdpa_set_config(struct vhost_vdpa *v,
|
||||
struct vhost_vdpa_config __user *c)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
struct vhost_vdpa_config config;
|
||||
unsigned long size = offsetof(struct vhost_vdpa_config, buf);
|
||||
u8 *buf;
|
||||
|
||||
if (copy_from_user(&config, c, size))
|
||||
return -EFAULT;
|
||||
if (vhost_vdpa_config_validate(v, &config))
|
||||
return -EINVAL;
|
||||
buf = kvzalloc(config.len, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
if (copy_from_user(buf, c->buf, config.len)) {
|
||||
kvfree(buf);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
ops->set_config(vdpa, config.off, buf, config.len);
|
||||
|
||||
kvfree(buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
u64 features;
|
||||
|
||||
features = ops->get_features(vdpa);
|
||||
features &= vhost_vdpa_features[v->virtio_id];
|
||||
|
||||
if (copy_to_user(featurep, &features, sizeof(features)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
u64 features;
|
||||
|
||||
/*
|
||||
* It's not allowed to change the features after they have
|
||||
* been negotiated.
|
||||
*/
|
||||
if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
|
||||
return -EBUSY;
|
||||
|
||||
if (copy_from_user(&features, featurep, sizeof(features)))
|
||||
return -EFAULT;
|
||||
|
||||
if (features & ~vhost_vdpa_features[v->virtio_id])
|
||||
return -EINVAL;
|
||||
|
||||
if (ops->set_features(vdpa, features))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
u16 num;
|
||||
|
||||
num = ops->get_vq_num_max(vdpa);
|
||||
|
||||
if (copy_to_user(argp, &num, sizeof(num)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
|
||||
void __user *argp)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
struct vdpa_callback cb;
|
||||
struct vhost_virtqueue *vq;
|
||||
struct vhost_vring_state s;
|
||||
u8 status;
|
||||
u32 idx;
|
||||
long r;
|
||||
|
||||
r = get_user(idx, (u32 __user *)argp);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (idx >= v->nvqs)
|
||||
return -ENOBUFS;
|
||||
|
||||
idx = array_index_nospec(idx, v->nvqs);
|
||||
vq = &v->vqs[idx];
|
||||
|
||||
status = ops->get_status(vdpa);
|
||||
|
||||
if (cmd == VHOST_VDPA_SET_VRING_ENABLE) {
|
||||
if (copy_from_user(&s, argp, sizeof(s)))
|
||||
return -EFAULT;
|
||||
ops->set_vq_ready(vdpa, idx, s.num);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (cmd == VHOST_GET_VRING_BASE)
|
||||
vq->last_avail_idx = ops->get_vq_state(v->vdpa, idx);
|
||||
|
||||
r = vhost_vring_ioctl(&v->vdev, cmd, argp);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
switch (cmd) {
|
||||
case VHOST_SET_VRING_ADDR:
|
||||
if (ops->set_vq_address(vdpa, idx,
|
||||
(u64)(uintptr_t)vq->desc,
|
||||
(u64)(uintptr_t)vq->avail,
|
||||
(u64)(uintptr_t)vq->used))
|
||||
r = -EINVAL;
|
||||
break;
|
||||
|
||||
case VHOST_SET_VRING_BASE:
|
||||
if (ops->set_vq_state(vdpa, idx, vq->last_avail_idx))
|
||||
r = -EINVAL;
|
||||
break;
|
||||
|
||||
case VHOST_SET_VRING_CALL:
|
||||
if (vq->call_ctx) {
|
||||
cb.callback = vhost_vdpa_virtqueue_cb;
|
||||
cb.private = vq;
|
||||
} else {
|
||||
cb.callback = NULL;
|
||||
cb.private = NULL;
|
||||
}
|
||||
ops->set_vq_cb(vdpa, idx, &cb);
|
||||
break;
|
||||
|
||||
case VHOST_SET_VRING_NUM:
|
||||
ops->set_vq_num(vdpa, idx, vq->num);
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static long vhost_vdpa_unlocked_ioctl(struct file *filep,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct vhost_vdpa *v = filep->private_data;
|
||||
struct vhost_dev *d = &v->vdev;
|
||||
void __user *argp = (void __user *)arg;
|
||||
long r;
|
||||
|
||||
mutex_lock(&d->mutex);
|
||||
|
||||
switch (cmd) {
|
||||
case VHOST_VDPA_GET_DEVICE_ID:
|
||||
r = vhost_vdpa_get_device_id(v, argp);
|
||||
break;
|
||||
case VHOST_VDPA_GET_STATUS:
|
||||
r = vhost_vdpa_get_status(v, argp);
|
||||
break;
|
||||
case VHOST_VDPA_SET_STATUS:
|
||||
r = vhost_vdpa_set_status(v, argp);
|
||||
break;
|
||||
case VHOST_VDPA_GET_CONFIG:
|
||||
r = vhost_vdpa_get_config(v, argp);
|
||||
break;
|
||||
case VHOST_VDPA_SET_CONFIG:
|
||||
r = vhost_vdpa_set_config(v, argp);
|
||||
break;
|
||||
case VHOST_GET_FEATURES:
|
||||
r = vhost_vdpa_get_features(v, argp);
|
||||
break;
|
||||
case VHOST_SET_FEATURES:
|
||||
r = vhost_vdpa_set_features(v, argp);
|
||||
break;
|
||||
case VHOST_VDPA_GET_VRING_NUM:
|
||||
r = vhost_vdpa_get_vring_num(v, argp);
|
||||
break;
|
||||
case VHOST_SET_LOG_BASE:
|
||||
case VHOST_SET_LOG_FD:
|
||||
r = -ENOIOCTLCMD;
|
||||
break;
|
||||
default:
|
||||
r = vhost_dev_ioctl(&v->vdev, cmd, argp);
|
||||
if (r == -ENOIOCTLCMD)
|
||||
r = vhost_vdpa_vring_ioctl(v, cmd, argp);
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_unlock(&d->mutex);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
struct vhost_iotlb *iotlb = dev->iotlb;
|
||||
struct vhost_iotlb_map *map;
|
||||
struct page *page;
|
||||
unsigned long pfn, pinned;
|
||||
|
||||
while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
|
||||
pinned = map->size >> PAGE_SHIFT;
|
||||
for (pfn = map->addr >> PAGE_SHIFT;
|
||||
pinned > 0; pfn++, pinned--) {
|
||||
page = pfn_to_page(pfn);
|
||||
if (map->perm & VHOST_ACCESS_WO)
|
||||
set_page_dirty_lock(page);
|
||||
unpin_user_page(page);
|
||||
}
|
||||
atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
|
||||
vhost_iotlb_map_free(iotlb, map);
|
||||
}
|
||||
}
|
||||
|
||||
static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
|
||||
vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1);
|
||||
kfree(dev->iotlb);
|
||||
dev->iotlb = NULL;
|
||||
}
|
||||
|
||||
static int perm_to_iommu_flags(u32 perm)
|
||||
{
|
||||
int flags = 0;
|
||||
|
||||
switch (perm) {
|
||||
case VHOST_ACCESS_WO:
|
||||
flags |= IOMMU_WRITE;
|
||||
break;
|
||||
case VHOST_ACCESS_RO:
|
||||
flags |= IOMMU_READ;
|
||||
break;
|
||||
case VHOST_ACCESS_RW:
|
||||
flags |= (IOMMU_WRITE | IOMMU_READ);
|
||||
break;
|
||||
default:
|
||||
WARN(1, "invalidate vhost IOTLB permission\n");
|
||||
break;
|
||||
}
|
||||
|
||||
return flags | IOMMU_CACHE;
|
||||
}
|
||||
|
||||
static int vhost_vdpa_map(struct vhost_vdpa *v,
|
||||
u64 iova, u64 size, u64 pa, u32 perm)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
int r = 0;
|
||||
|
||||
r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
|
||||
pa, perm);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (ops->dma_map)
|
||||
r = ops->dma_map(vdpa, iova, size, pa, perm);
|
||||
else if (ops->set_map)
|
||||
r = ops->set_map(vdpa, dev->iotlb);
|
||||
else
|
||||
r = iommu_map(v->domain, iova, pa, size,
|
||||
perm_to_iommu_flags(perm));
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1);
|
||||
|
||||
if (ops->dma_map)
|
||||
ops->dma_unmap(vdpa, iova, size);
|
||||
else if (ops->set_map)
|
||||
ops->set_map(vdpa, dev->iotlb);
|
||||
else
|
||||
iommu_unmap(v->domain, iova, size);
|
||||
}
|
||||
|
||||
static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
|
||||
struct vhost_iotlb_msg *msg)
|
||||
{
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
struct vhost_iotlb *iotlb = dev->iotlb;
|
||||
struct page **page_list;
|
||||
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
|
||||
unsigned int gup_flags = FOLL_LONGTERM;
|
||||
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
|
||||
unsigned long locked, lock_limit, pinned, i;
|
||||
u64 iova = msg->iova;
|
||||
int ret = 0;
|
||||
|
||||
if (vhost_iotlb_itree_first(iotlb, msg->iova,
|
||||
msg->iova + msg->size - 1))
|
||||
return -EEXIST;
|
||||
|
||||
page_list = (struct page **) __get_free_page(GFP_KERNEL);
|
||||
if (!page_list)
|
||||
return -ENOMEM;
|
||||
|
||||
if (msg->perm & VHOST_ACCESS_WO)
|
||||
gup_flags |= FOLL_WRITE;
|
||||
|
||||
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
|
||||
if (!npages)
|
||||
return -EINVAL;
|
||||
|
||||
down_read(&dev->mm->mmap_sem);
|
||||
|
||||
locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
|
||||
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||
|
||||
if (locked > lock_limit) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
cur_base = msg->uaddr & PAGE_MASK;
|
||||
iova &= PAGE_MASK;
|
||||
|
||||
while (npages) {
|
||||
pinned = min_t(unsigned long, npages, list_size);
|
||||
ret = pin_user_pages(cur_base, pinned,
|
||||
gup_flags, page_list, NULL);
|
||||
if (ret != pinned)
|
||||
goto out;
|
||||
|
||||
if (!last_pfn)
|
||||
map_pfn = page_to_pfn(page_list[0]);
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
unsigned long this_pfn = page_to_pfn(page_list[i]);
|
||||
u64 csize;
|
||||
|
||||
if (last_pfn && (this_pfn != last_pfn + 1)) {
|
||||
/* Pin a contiguous chunk of memory */
|
||||
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
|
||||
if (vhost_vdpa_map(v, iova, csize,
|
||||
map_pfn << PAGE_SHIFT,
|
||||
msg->perm))
|
||||
goto out;
|
||||
map_pfn = this_pfn;
|
||||
iova += csize;
|
||||
}
|
||||
|
||||
last_pfn = this_pfn;
|
||||
}
|
||||
|
||||
cur_base += ret << PAGE_SHIFT;
|
||||
npages -= ret;
|
||||
}
|
||||
|
||||
/* Pin the rest chunk */
|
||||
ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
|
||||
map_pfn << PAGE_SHIFT, msg->perm);
|
||||
out:
|
||||
if (ret) {
|
||||
vhost_vdpa_unmap(v, msg->iova, msg->size);
|
||||
atomic64_sub(npages, &dev->mm->pinned_vm);
|
||||
}
|
||||
up_read(&dev->mm->mmap_sem);
|
||||
free_page((unsigned long)page_list);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
|
||||
struct vhost_iotlb_msg *msg)
|
||||
{
|
||||
struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
|
||||
int r = 0;
|
||||
|
||||
r = vhost_dev_check_owner(dev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
switch (msg->type) {
|
||||
case VHOST_IOTLB_UPDATE:
|
||||
r = vhost_vdpa_process_iotlb_update(v, msg);
|
||||
break;
|
||||
case VHOST_IOTLB_INVALIDATE:
|
||||
vhost_vdpa_unmap(v, msg->iova, msg->size);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
|
||||
struct iov_iter *from)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct vhost_vdpa *v = file->private_data;
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
|
||||
return vhost_chr_write_iter(dev, from);
|
||||
}
|
||||
|
||||
static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
struct device *dma_dev = vdpa_get_dma_dev(vdpa);
|
||||
struct bus_type *bus;
|
||||
int ret;
|
||||
|
||||
/* Device want to do DMA by itself */
|
||||
if (ops->set_map || ops->dma_map)
|
||||
return 0;
|
||||
|
||||
bus = dma_dev->bus;
|
||||
if (!bus)
|
||||
return -EFAULT;
|
||||
|
||||
if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
|
||||
return -ENOTSUPP;
|
||||
|
||||
v->domain = iommu_domain_alloc(bus);
|
||||
if (!v->domain)
|
||||
return -EIO;
|
||||
|
||||
ret = iommu_attach_device(v->domain, dma_dev);
|
||||
if (ret)
|
||||
goto err_attach;
|
||||
|
||||
return 0;
|
||||
|
||||
err_attach:
|
||||
iommu_domain_free(v->domain);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
|
||||
{
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
struct device *dma_dev = vdpa_get_dma_dev(vdpa);
|
||||
|
||||
if (v->domain) {
|
||||
iommu_detach_device(v->domain, dma_dev);
|
||||
iommu_domain_free(v->domain);
|
||||
}
|
||||
|
||||
v->domain = NULL;
|
||||
}
|
||||
|
||||
static int vhost_vdpa_open(struct inode *inode, struct file *filep)
|
||||
{
|
||||
struct vhost_vdpa *v;
|
||||
struct vhost_dev *dev;
|
||||
struct vhost_virtqueue **vqs;
|
||||
int nvqs, i, r, opened;
|
||||
|
||||
v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
|
||||
if (!v)
|
||||
return -ENODEV;
|
||||
|
||||
opened = atomic_cmpxchg(&v->opened, 0, 1);
|
||||
if (opened)
|
||||
return -EBUSY;
|
||||
|
||||
nvqs = v->nvqs;
|
||||
vhost_vdpa_reset(v);
|
||||
|
||||
vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
|
||||
if (!vqs) {
|
||||
r = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
dev = &v->vdev;
|
||||
for (i = 0; i < nvqs; i++) {
|
||||
vqs[i] = &v->vqs[i];
|
||||
vqs[i]->handle_kick = handle_vq_kick;
|
||||
}
|
||||
vhost_dev_init(dev, vqs, nvqs, 0, 0, 0,
|
||||
vhost_vdpa_process_iotlb_msg);
|
||||
|
||||
dev->iotlb = vhost_iotlb_alloc(0, 0);
|
||||
if (!dev->iotlb) {
|
||||
r = -ENOMEM;
|
||||
goto err_init_iotlb;
|
||||
}
|
||||
|
||||
r = vhost_vdpa_alloc_domain(v);
|
||||
if (r)
|
||||
goto err_init_iotlb;
|
||||
|
||||
filep->private_data = v;
|
||||
|
||||
return 0;
|
||||
|
||||
err_init_iotlb:
|
||||
vhost_dev_cleanup(&v->vdev);
|
||||
err:
|
||||
atomic_dec(&v->opened);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int vhost_vdpa_release(struct inode *inode, struct file *filep)
|
||||
{
|
||||
struct vhost_vdpa *v = filep->private_data;
|
||||
struct vhost_dev *d = &v->vdev;
|
||||
|
||||
mutex_lock(&d->mutex);
|
||||
filep->private_data = NULL;
|
||||
vhost_vdpa_reset(v);
|
||||
vhost_dev_stop(&v->vdev);
|
||||
vhost_vdpa_iotlb_free(v);
|
||||
vhost_vdpa_free_domain(v);
|
||||
vhost_dev_cleanup(&v->vdev);
|
||||
kfree(v->vdev.vqs);
|
||||
mutex_unlock(&d->mutex);
|
||||
|
||||
atomic_dec(&v->opened);
|
||||
complete(&v->completion);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations vhost_vdpa_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = vhost_vdpa_open,
|
||||
.release = vhost_vdpa_release,
|
||||
.write_iter = vhost_vdpa_chr_write_iter,
|
||||
.unlocked_ioctl = vhost_vdpa_unlocked_ioctl,
|
||||
.compat_ioctl = compat_ptr_ioctl,
|
||||
};
|
||||
|
||||
static void vhost_vdpa_release_dev(struct device *device)
|
||||
{
|
||||
struct vhost_vdpa *v =
|
||||
container_of(device, struct vhost_vdpa, dev);
|
||||
|
||||
ida_simple_remove(&vhost_vdpa_ida, v->minor);
|
||||
kfree(v->vqs);
|
||||
kfree(v);
|
||||
}
|
||||
|
||||
static int vhost_vdpa_probe(struct vdpa_device *vdpa)
|
||||
{
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
struct vhost_vdpa *v;
|
||||
int minor, nvqs = VHOST_VDPA_VQ_MAX;
|
||||
int r;
|
||||
|
||||
/* Currently, we only accept the network devices. */
|
||||
if (ops->get_device_id(vdpa) != VIRTIO_ID_NET)
|
||||
return -ENOTSUPP;
|
||||
|
||||
v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
|
||||
if (!v)
|
||||
return -ENOMEM;
|
||||
|
||||
minor = ida_simple_get(&vhost_vdpa_ida, 0,
|
||||
VHOST_VDPA_DEV_MAX, GFP_KERNEL);
|
||||
if (minor < 0) {
|
||||
kfree(v);
|
||||
return minor;
|
||||
}
|
||||
|
||||
atomic_set(&v->opened, 0);
|
||||
v->minor = minor;
|
||||
v->vdpa = vdpa;
|
||||
v->nvqs = nvqs;
|
||||
v->virtio_id = ops->get_device_id(vdpa);
|
||||
|
||||
device_initialize(&v->dev);
|
||||
v->dev.release = vhost_vdpa_release_dev;
|
||||
v->dev.parent = &vdpa->dev;
|
||||
v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
|
||||
v->vqs = kmalloc_array(nvqs, sizeof(struct vhost_virtqueue),
|
||||
GFP_KERNEL);
|
||||
if (!v->vqs) {
|
||||
r = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
|
||||
if (r)
|
||||
goto err;
|
||||
|
||||
cdev_init(&v->cdev, &vhost_vdpa_fops);
|
||||
v->cdev.owner = THIS_MODULE;
|
||||
|
||||
r = cdev_device_add(&v->cdev, &v->dev);
|
||||
if (r)
|
||||
goto err;
|
||||
|
||||
init_completion(&v->completion);
|
||||
vdpa_set_drvdata(vdpa, v);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
put_device(&v->dev);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void vhost_vdpa_remove(struct vdpa_device *vdpa)
|
||||
{
|
||||
struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
|
||||
int opened;
|
||||
|
||||
cdev_device_del(&v->cdev, &v->dev);
|
||||
|
||||
do {
|
||||
opened = atomic_cmpxchg(&v->opened, 0, 1);
|
||||
if (!opened)
|
||||
break;
|
||||
wait_for_completion(&v->completion);
|
||||
} while (1);
|
||||
|
||||
put_device(&v->dev);
|
||||
}
|
||||
|
||||
static struct vdpa_driver vhost_vdpa_driver = {
|
||||
.driver = {
|
||||
.name = "vhost_vdpa",
|
||||
},
|
||||
.probe = vhost_vdpa_probe,
|
||||
.remove = vhost_vdpa_remove,
|
||||
};
|
||||
|
||||
static int __init vhost_vdpa_init(void)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
|
||||
"vhost-vdpa");
|
||||
if (r)
|
||||
goto err_alloc_chrdev;
|
||||
|
||||
r = vdpa_register_driver(&vhost_vdpa_driver);
|
||||
if (r)
|
||||
goto err_vdpa_register_driver;
|
||||
|
||||
return 0;
|
||||
|
||||
err_vdpa_register_driver:
|
||||
unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
|
||||
err_alloc_chrdev:
|
||||
return r;
|
||||
}
|
||||
module_init(vhost_vdpa_init);
|
||||
|
||||
static void __exit vhost_vdpa_exit(void)
|
||||
{
|
||||
vdpa_unregister_driver(&vhost_vdpa_driver);
|
||||
unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
|
||||
}
|
||||
module_exit(vhost_vdpa_exit);
|
||||
|
||||
MODULE_VERSION("0.0.1");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Intel Corporation");
|
||||
MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");
|
@ -50,10 +50,6 @@ enum {
|
||||
#define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num])
|
||||
#define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num])
|
||||
|
||||
INTERVAL_TREE_DEFINE(struct vhost_umem_node,
|
||||
rb, __u64, __subtree_last,
|
||||
START, LAST, static inline, vhost_umem_interval_tree);
|
||||
|
||||
#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
|
||||
static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
|
||||
{
|
||||
@ -457,7 +453,9 @@ static size_t vhost_get_desc_size(struct vhost_virtqueue *vq,
|
||||
|
||||
void vhost_dev_init(struct vhost_dev *dev,
|
||||
struct vhost_virtqueue **vqs, int nvqs,
|
||||
int iov_limit, int weight, int byte_weight)
|
||||
int iov_limit, int weight, int byte_weight,
|
||||
int (*msg_handler)(struct vhost_dev *dev,
|
||||
struct vhost_iotlb_msg *msg))
|
||||
{
|
||||
struct vhost_virtqueue *vq;
|
||||
int i;
|
||||
@ -473,6 +471,7 @@ void vhost_dev_init(struct vhost_dev *dev,
|
||||
dev->iov_limit = iov_limit;
|
||||
dev->weight = weight;
|
||||
dev->byte_weight = byte_weight;
|
||||
dev->msg_handler = msg_handler;
|
||||
init_llist_head(&dev->work_list);
|
||||
init_waitqueue_head(&dev->wait);
|
||||
INIT_LIST_HEAD(&dev->read_list);
|
||||
@ -581,21 +580,25 @@ err_mm:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
|
||||
|
||||
struct vhost_umem *vhost_dev_reset_owner_prepare(void)
|
||||
static struct vhost_iotlb *iotlb_alloc(void)
|
||||
{
|
||||
return kvzalloc(sizeof(struct vhost_umem), GFP_KERNEL);
|
||||
return vhost_iotlb_alloc(max_iotlb_entries,
|
||||
VHOST_IOTLB_FLAG_RETIRE);
|
||||
}
|
||||
|
||||
struct vhost_iotlb *vhost_dev_reset_owner_prepare(void)
|
||||
{
|
||||
return iotlb_alloc();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare);
|
||||
|
||||
/* Caller should have device mutex */
|
||||
void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem)
|
||||
void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem)
|
||||
{
|
||||
int i;
|
||||
|
||||
vhost_dev_cleanup(dev);
|
||||
|
||||
/* Restore memory to default empty mapping. */
|
||||
INIT_LIST_HEAD(&umem->umem_list);
|
||||
dev->umem = umem;
|
||||
/* We don't need VQ locks below since vhost_dev_cleanup makes sure
|
||||
* VQs aren't running.
|
||||
@ -618,28 +621,6 @@ void vhost_dev_stop(struct vhost_dev *dev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_dev_stop);
|
||||
|
||||
static void vhost_umem_free(struct vhost_umem *umem,
|
||||
struct vhost_umem_node *node)
|
||||
{
|
||||
vhost_umem_interval_tree_remove(node, &umem->umem_tree);
|
||||
list_del(&node->link);
|
||||
kfree(node);
|
||||
umem->numem--;
|
||||
}
|
||||
|
||||
static void vhost_umem_clean(struct vhost_umem *umem)
|
||||
{
|
||||
struct vhost_umem_node *node, *tmp;
|
||||
|
||||
if (!umem)
|
||||
return;
|
||||
|
||||
list_for_each_entry_safe(node, tmp, &umem->umem_list, link)
|
||||
vhost_umem_free(umem, node);
|
||||
|
||||
kvfree(umem);
|
||||
}
|
||||
|
||||
static void vhost_clear_msg(struct vhost_dev *dev)
|
||||
{
|
||||
struct vhost_msg_node *node, *n;
|
||||
@ -677,9 +658,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
|
||||
eventfd_ctx_put(dev->log_ctx);
|
||||
dev->log_ctx = NULL;
|
||||
/* No one will access memory at this point */
|
||||
vhost_umem_clean(dev->umem);
|
||||
vhost_iotlb_free(dev->umem);
|
||||
dev->umem = NULL;
|
||||
vhost_umem_clean(dev->iotlb);
|
||||
vhost_iotlb_free(dev->iotlb);
|
||||
dev->iotlb = NULL;
|
||||
vhost_clear_msg(dev);
|
||||
wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
|
||||
@ -715,27 +696,26 @@ static bool vhost_overflow(u64 uaddr, u64 size)
|
||||
}
|
||||
|
||||
/* Caller should have vq mutex and device mutex. */
|
||||
static bool vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
|
||||
static bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem,
|
||||
int log_all)
|
||||
{
|
||||
struct vhost_umem_node *node;
|
||||
struct vhost_iotlb_map *map;
|
||||
|
||||
if (!umem)
|
||||
return false;
|
||||
|
||||
list_for_each_entry(node, &umem->umem_list, link) {
|
||||
unsigned long a = node->userspace_addr;
|
||||
list_for_each_entry(map, &umem->list, link) {
|
||||
unsigned long a = map->addr;
|
||||
|
||||
if (vhost_overflow(node->userspace_addr, node->size))
|
||||
if (vhost_overflow(map->addr, map->size))
|
||||
return false;
|
||||
|
||||
|
||||
if (!access_ok((void __user *)a,
|
||||
node->size))
|
||||
if (!access_ok((void __user *)a, map->size))
|
||||
return false;
|
||||
else if (log_all && !log_access_ok(log_base,
|
||||
node->start,
|
||||
node->size))
|
||||
map->start,
|
||||
map->size))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -745,17 +725,17 @@ static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
|
||||
u64 addr, unsigned int size,
|
||||
int type)
|
||||
{
|
||||
const struct vhost_umem_node *node = vq->meta_iotlb[type];
|
||||
const struct vhost_iotlb_map *map = vq->meta_iotlb[type];
|
||||
|
||||
if (!node)
|
||||
if (!map)
|
||||
return NULL;
|
||||
|
||||
return (void *)(uintptr_t)(node->userspace_addr + addr - node->start);
|
||||
return (void *)(uintptr_t)(map->addr + addr - map->start);
|
||||
}
|
||||
|
||||
/* Can we switch to this memory table? */
|
||||
/* Caller should have device mutex but not vq mutex */
|
||||
static bool memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
|
||||
static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem,
|
||||
int log_all)
|
||||
{
|
||||
int i;
|
||||
@ -1020,47 +1000,6 @@ static inline int vhost_get_desc(struct vhost_virtqueue *vq,
|
||||
return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
|
||||
}
|
||||
|
||||
static int vhost_new_umem_range(struct vhost_umem *umem,
|
||||
u64 start, u64 size, u64 end,
|
||||
u64 userspace_addr, int perm)
|
||||
{
|
||||
struct vhost_umem_node *tmp, *node;
|
||||
|
||||
if (!size)
|
||||
return -EFAULT;
|
||||
|
||||
node = kmalloc(sizeof(*node), GFP_ATOMIC);
|
||||
if (!node)
|
||||
return -ENOMEM;
|
||||
|
||||
if (umem->numem == max_iotlb_entries) {
|
||||
tmp = list_first_entry(&umem->umem_list, typeof(*tmp), link);
|
||||
vhost_umem_free(umem, tmp);
|
||||
}
|
||||
|
||||
node->start = start;
|
||||
node->size = size;
|
||||
node->last = end;
|
||||
node->userspace_addr = userspace_addr;
|
||||
node->perm = perm;
|
||||
INIT_LIST_HEAD(&node->link);
|
||||
list_add_tail(&node->link, &umem->umem_list);
|
||||
vhost_umem_interval_tree_insert(node, &umem->umem_tree);
|
||||
umem->numem++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vhost_del_umem_range(struct vhost_umem *umem,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
struct vhost_umem_node *node;
|
||||
|
||||
while ((node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
|
||||
start, end)))
|
||||
vhost_umem_free(umem, node);
|
||||
}
|
||||
|
||||
static void vhost_iotlb_notify_vq(struct vhost_dev *d,
|
||||
struct vhost_iotlb_msg *msg)
|
||||
{
|
||||
@ -1117,9 +1056,9 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
|
||||
break;
|
||||
}
|
||||
vhost_vq_meta_reset(dev);
|
||||
if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size,
|
||||
msg->iova + msg->size - 1,
|
||||
msg->uaddr, msg->perm)) {
|
||||
if (vhost_iotlb_add_range(dev->iotlb, msg->iova,
|
||||
msg->iova + msg->size - 1,
|
||||
msg->uaddr, msg->perm)) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
@ -1131,8 +1070,8 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
|
||||
break;
|
||||
}
|
||||
vhost_vq_meta_reset(dev);
|
||||
vhost_del_umem_range(dev->iotlb, msg->iova,
|
||||
msg->iova + msg->size - 1);
|
||||
vhost_iotlb_del_range(dev->iotlb, msg->iova,
|
||||
msg->iova + msg->size - 1);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
@ -1178,7 +1117,12 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
if (vhost_process_iotlb_msg(dev, &msg)) {
|
||||
|
||||
if (dev->msg_handler)
|
||||
ret = dev->msg_handler(dev, &msg);
|
||||
else
|
||||
ret = vhost_process_iotlb_msg(dev, &msg);
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
goto done;
|
||||
}
|
||||
@ -1311,44 +1255,42 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
|
||||
}
|
||||
|
||||
static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
|
||||
const struct vhost_umem_node *node,
|
||||
const struct vhost_iotlb_map *map,
|
||||
int type)
|
||||
{
|
||||
int access = (type == VHOST_ADDR_USED) ?
|
||||
VHOST_ACCESS_WO : VHOST_ACCESS_RO;
|
||||
|
||||
if (likely(node->perm & access))
|
||||
vq->meta_iotlb[type] = node;
|
||||
if (likely(map->perm & access))
|
||||
vq->meta_iotlb[type] = map;
|
||||
}
|
||||
|
||||
static bool iotlb_access_ok(struct vhost_virtqueue *vq,
|
||||
int access, u64 addr, u64 len, int type)
|
||||
{
|
||||
const struct vhost_umem_node *node;
|
||||
struct vhost_umem *umem = vq->iotlb;
|
||||
const struct vhost_iotlb_map *map;
|
||||
struct vhost_iotlb *umem = vq->iotlb;
|
||||
u64 s = 0, size, orig_addr = addr, last = addr + len - 1;
|
||||
|
||||
if (vhost_vq_meta_fetch(vq, addr, len, type))
|
||||
return true;
|
||||
|
||||
while (len > s) {
|
||||
node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
|
||||
addr,
|
||||
last);
|
||||
if (node == NULL || node->start > addr) {
|
||||
map = vhost_iotlb_itree_first(umem, addr, last);
|
||||
if (map == NULL || map->start > addr) {
|
||||
vhost_iotlb_miss(vq, addr, access);
|
||||
return false;
|
||||
} else if (!(node->perm & access)) {
|
||||
} else if (!(map->perm & access)) {
|
||||
/* Report the possible access violation by
|
||||
* request another translation from userspace.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
size = node->size - addr + node->start;
|
||||
size = map->size - addr + map->start;
|
||||
|
||||
if (orig_addr == addr && size >= len)
|
||||
vhost_vq_meta_update(vq, node, type);
|
||||
vhost_vq_meta_update(vq, map, type);
|
||||
|
||||
s += size;
|
||||
addr += size;
|
||||
@ -1364,12 +1306,12 @@ int vq_meta_prefetch(struct vhost_virtqueue *vq)
|
||||
if (!vq->iotlb)
|
||||
return 1;
|
||||
|
||||
return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
|
||||
return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc,
|
||||
vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) &&
|
||||
iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
|
||||
iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail,
|
||||
vhost_get_avail_size(vq, num),
|
||||
VHOST_ADDR_AVAIL) &&
|
||||
iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used,
|
||||
iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used,
|
||||
vhost_get_used_size(vq, num), VHOST_ADDR_USED);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vq_meta_prefetch);
|
||||
@ -1408,25 +1350,11 @@ bool vhost_vq_access_ok(struct vhost_virtqueue *vq)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
|
||||
|
||||
static struct vhost_umem *vhost_umem_alloc(void)
|
||||
{
|
||||
struct vhost_umem *umem = kvzalloc(sizeof(*umem), GFP_KERNEL);
|
||||
|
||||
if (!umem)
|
||||
return NULL;
|
||||
|
||||
umem->umem_tree = RB_ROOT_CACHED;
|
||||
umem->numem = 0;
|
||||
INIT_LIST_HEAD(&umem->umem_list);
|
||||
|
||||
return umem;
|
||||
}
|
||||
|
||||
static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
|
||||
{
|
||||
struct vhost_memory mem, *newmem;
|
||||
struct vhost_memory_region *region;
|
||||
struct vhost_umem *newumem, *oldumem;
|
||||
struct vhost_iotlb *newumem, *oldumem;
|
||||
unsigned long size = offsetof(struct vhost_memory, regions);
|
||||
int i;
|
||||
|
||||
@ -1448,7 +1376,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
newumem = vhost_umem_alloc();
|
||||
newumem = iotlb_alloc();
|
||||
if (!newumem) {
|
||||
kvfree(newmem);
|
||||
return -ENOMEM;
|
||||
@ -1457,13 +1385,12 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
|
||||
for (region = newmem->regions;
|
||||
region < newmem->regions + mem.nregions;
|
||||
region++) {
|
||||
if (vhost_new_umem_range(newumem,
|
||||
region->guest_phys_addr,
|
||||
region->memory_size,
|
||||
region->guest_phys_addr +
|
||||
region->memory_size - 1,
|
||||
region->userspace_addr,
|
||||
VHOST_ACCESS_RW))
|
||||
if (vhost_iotlb_add_range(newumem,
|
||||
region->guest_phys_addr,
|
||||
region->guest_phys_addr +
|
||||
region->memory_size - 1,
|
||||
region->userspace_addr,
|
||||
VHOST_MAP_RW))
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1481,11 +1408,11 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
|
||||
}
|
||||
|
||||
kvfree(newmem);
|
||||
vhost_umem_clean(oldumem);
|
||||
vhost_iotlb_free(oldumem);
|
||||
return 0;
|
||||
|
||||
err:
|
||||
vhost_umem_clean(newumem);
|
||||
vhost_iotlb_free(newumem);
|
||||
kvfree(newmem);
|
||||
return -EFAULT;
|
||||
}
|
||||
@ -1726,10 +1653,10 @@ EXPORT_SYMBOL_GPL(vhost_vring_ioctl);
|
||||
|
||||
int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled)
|
||||
{
|
||||
struct vhost_umem *niotlb, *oiotlb;
|
||||
struct vhost_iotlb *niotlb, *oiotlb;
|
||||
int i;
|
||||
|
||||
niotlb = vhost_umem_alloc();
|
||||
niotlb = iotlb_alloc();
|
||||
if (!niotlb)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1745,7 +1672,7 @@ int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled)
|
||||
mutex_unlock(&vq->mutex);
|
||||
}
|
||||
|
||||
vhost_umem_clean(oiotlb);
|
||||
vhost_iotlb_free(oiotlb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1875,8 +1802,8 @@ static int log_write(void __user *log_base,
|
||||
|
||||
static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len)
|
||||
{
|
||||
struct vhost_umem *umem = vq->umem;
|
||||
struct vhost_umem_node *u;
|
||||
struct vhost_iotlb *umem = vq->umem;
|
||||
struct vhost_iotlb_map *u;
|
||||
u64 start, end, l, min;
|
||||
int r;
|
||||
bool hit = false;
|
||||
@ -1886,16 +1813,15 @@ static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len)
|
||||
/* More than one GPAs can be mapped into a single HVA. So
|
||||
* iterate all possible umems here to be safe.
|
||||
*/
|
||||
list_for_each_entry(u, &umem->umem_list, link) {
|
||||
if (u->userspace_addr > hva - 1 + len ||
|
||||
u->userspace_addr - 1 + u->size < hva)
|
||||
list_for_each_entry(u, &umem->list, link) {
|
||||
if (u->addr > hva - 1 + len ||
|
||||
u->addr - 1 + u->size < hva)
|
||||
continue;
|
||||
start = max(u->userspace_addr, hva);
|
||||
end = min(u->userspace_addr - 1 + u->size,
|
||||
hva - 1 + len);
|
||||
start = max(u->addr, hva);
|
||||
end = min(u->addr - 1 + u->size, hva - 1 + len);
|
||||
l = end - start + 1;
|
||||
r = log_write(vq->log_base,
|
||||
u->start + start - u->userspace_addr,
|
||||
u->start + start - u->addr,
|
||||
l);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@ -2046,9 +1972,9 @@ EXPORT_SYMBOL_GPL(vhost_vq_init_access);
|
||||
static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
|
||||
struct iovec iov[], int iov_size, int access)
|
||||
{
|
||||
const struct vhost_umem_node *node;
|
||||
const struct vhost_iotlb_map *map;
|
||||
struct vhost_dev *dev = vq->dev;
|
||||
struct vhost_umem *umem = dev->iotlb ? dev->iotlb : dev->umem;
|
||||
struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem;
|
||||
struct iovec *_iov;
|
||||
u64 s = 0;
|
||||
int ret = 0;
|
||||
@ -2060,25 +1986,24 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
|
||||
break;
|
||||
}
|
||||
|
||||
node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
|
||||
addr, addr + len - 1);
|
||||
if (node == NULL || node->start > addr) {
|
||||
map = vhost_iotlb_itree_first(umem, addr, addr + len - 1);
|
||||
if (map == NULL || map->start > addr) {
|
||||
if (umem != dev->iotlb) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
} else if (!(node->perm & access)) {
|
||||
} else if (!(map->perm & access)) {
|
||||
ret = -EPERM;
|
||||
break;
|
||||
}
|
||||
|
||||
_iov = iov + ret;
|
||||
size = node->size - addr + node->start;
|
||||
size = map->size - addr + map->start;
|
||||
_iov->iov_len = min((u64)len - s, size);
|
||||
_iov->iov_base = (void __user *)(unsigned long)
|
||||
(node->userspace_addr + addr - node->start);
|
||||
(map->addr + addr - map->start);
|
||||
s += size;
|
||||
addr += size;
|
||||
++ret;
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <linux/virtio_config.h>
|
||||
#include <linux/virtio_ring.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/vhost_iotlb.h>
|
||||
|
||||
struct vhost_work;
|
||||
typedef void (*vhost_work_fn_t)(struct vhost_work *work);
|
||||
@ -52,27 +53,6 @@ struct vhost_log {
|
||||
u64 len;
|
||||
};
|
||||
|
||||
#define START(node) ((node)->start)
|
||||
#define LAST(node) ((node)->last)
|
||||
|
||||
struct vhost_umem_node {
|
||||
struct rb_node rb;
|
||||
struct list_head link;
|
||||
__u64 start;
|
||||
__u64 last;
|
||||
__u64 size;
|
||||
__u64 userspace_addr;
|
||||
__u32 perm;
|
||||
__u32 flags_padding;
|
||||
__u64 __subtree_last;
|
||||
};
|
||||
|
||||
struct vhost_umem {
|
||||
struct rb_root_cached umem_tree;
|
||||
struct list_head umem_list;
|
||||
int numem;
|
||||
};
|
||||
|
||||
enum vhost_uaddr_type {
|
||||
VHOST_ADDR_DESC = 0,
|
||||
VHOST_ADDR_AVAIL = 1,
|
||||
@ -90,7 +70,7 @@ struct vhost_virtqueue {
|
||||
struct vring_desc __user *desc;
|
||||
struct vring_avail __user *avail;
|
||||
struct vring_used __user *used;
|
||||
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
|
||||
const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS];
|
||||
struct file *kick;
|
||||
struct eventfd_ctx *call_ctx;
|
||||
struct eventfd_ctx *error_ctx;
|
||||
@ -128,8 +108,8 @@ struct vhost_virtqueue {
|
||||
struct iovec *indirect;
|
||||
struct vring_used_elem *heads;
|
||||
/* Protected by virtqueue mutex. */
|
||||
struct vhost_umem *umem;
|
||||
struct vhost_umem *iotlb;
|
||||
struct vhost_iotlb *umem;
|
||||
struct vhost_iotlb *iotlb;
|
||||
void *private_data;
|
||||
u64 acked_features;
|
||||
u64 acked_backend_features;
|
||||
@ -164,8 +144,8 @@ struct vhost_dev {
|
||||
struct eventfd_ctx *log_ctx;
|
||||
struct llist_head work_list;
|
||||
struct task_struct *worker;
|
||||
struct vhost_umem *umem;
|
||||
struct vhost_umem *iotlb;
|
||||
struct vhost_iotlb *umem;
|
||||
struct vhost_iotlb *iotlb;
|
||||
spinlock_t iotlb_lock;
|
||||
struct list_head read_list;
|
||||
struct list_head pending_list;
|
||||
@ -174,16 +154,20 @@ struct vhost_dev {
|
||||
int weight;
|
||||
int byte_weight;
|
||||
u64 kcov_handle;
|
||||
int (*msg_handler)(struct vhost_dev *dev,
|
||||
struct vhost_iotlb_msg *msg);
|
||||
};
|
||||
|
||||
bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
|
||||
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
|
||||
int nvqs, int iov_limit, int weight, int byte_weight);
|
||||
int nvqs, int iov_limit, int weight, int byte_weight,
|
||||
int (*msg_handler)(struct vhost_dev *dev,
|
||||
struct vhost_iotlb_msg *msg));
|
||||
long vhost_dev_set_owner(struct vhost_dev *dev);
|
||||
bool vhost_dev_has_owner(struct vhost_dev *dev);
|
||||
long vhost_dev_check_owner(struct vhost_dev *);
|
||||
struct vhost_umem *vhost_dev_reset_owner_prepare(void);
|
||||
void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *);
|
||||
struct vhost_iotlb *vhost_dev_reset_owner_prepare(void);
|
||||
void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *iotlb);
|
||||
void vhost_dev_cleanup(struct vhost_dev *);
|
||||
void vhost_dev_stop(struct vhost_dev *);
|
||||
long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
|
||||
@ -229,6 +213,9 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
|
||||
struct iov_iter *from);
|
||||
int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
|
||||
|
||||
void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
|
||||
struct vhost_iotlb_map *map);
|
||||
|
||||
#define vq_err(vq, fmt, ...) do { \
|
||||
pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
|
||||
if ((vq)->error_ctx) \
|
||||
|
@ -13,6 +13,9 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/bvec.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/vhost_iotlb.h>
|
||||
#include <uapi/linux/virtio_config.h>
|
||||
|
||||
static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
|
||||
@ -71,9 +74,11 @@ static inline int __vringh_get_head(const struct vringh *vrh,
|
||||
}
|
||||
|
||||
/* Copy some bytes to/from the iovec. Returns num copied. */
|
||||
static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov,
|
||||
static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
|
||||
struct vringh_kiov *iov,
|
||||
void *ptr, size_t len,
|
||||
int (*xfer)(void *addr, void *ptr,
|
||||
int (*xfer)(const struct vringh *vrh,
|
||||
void *addr, void *ptr,
|
||||
size_t len))
|
||||
{
|
||||
int err, done = 0;
|
||||
@ -82,7 +87,7 @@ static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov,
|
||||
size_t partlen;
|
||||
|
||||
partlen = min(iov->iov[iov->i].iov_len, len);
|
||||
err = xfer(iov->iov[iov->i].iov_base, ptr, partlen);
|
||||
err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen);
|
||||
if (err)
|
||||
return err;
|
||||
done += partlen;
|
||||
@ -96,6 +101,7 @@ static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov,
|
||||
/* Fix up old iov element then increment. */
|
||||
iov->iov[iov->i].iov_len = iov->consumed;
|
||||
iov->iov[iov->i].iov_base -= iov->consumed;
|
||||
|
||||
|
||||
iov->consumed = 0;
|
||||
iov->i++;
|
||||
@ -227,7 +233,8 @@ static int slow_copy(struct vringh *vrh, void *dst, const void *src,
|
||||
u64 addr,
|
||||
struct vringh_range *r),
|
||||
struct vringh_range *range,
|
||||
int (*copy)(void *dst, const void *src, size_t len))
|
||||
int (*copy)(const struct vringh *vrh,
|
||||
void *dst, const void *src, size_t len))
|
||||
{
|
||||
size_t part, len = sizeof(struct vring_desc);
|
||||
|
||||
@ -241,7 +248,7 @@ static int slow_copy(struct vringh *vrh, void *dst, const void *src,
|
||||
if (!rcheck(vrh, addr, &part, range, getrange))
|
||||
return -EINVAL;
|
||||
|
||||
err = copy(dst, src, part);
|
||||
err = copy(vrh, dst, src, part);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -262,7 +269,8 @@ __vringh_iov(struct vringh *vrh, u16 i,
|
||||
struct vringh_range *)),
|
||||
bool (*getrange)(struct vringh *, u64, struct vringh_range *),
|
||||
gfp_t gfp,
|
||||
int (*copy)(void *dst, const void *src, size_t len))
|
||||
int (*copy)(const struct vringh *vrh,
|
||||
void *dst, const void *src, size_t len))
|
||||
{
|
||||
int err, count = 0, up_next, desc_max;
|
||||
struct vring_desc desc, *descs;
|
||||
@ -291,7 +299,7 @@ __vringh_iov(struct vringh *vrh, u16 i,
|
||||
err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange,
|
||||
&slowrange, copy);
|
||||
else
|
||||
err = copy(&desc, &descs[i], sizeof(desc));
|
||||
err = copy(vrh, &desc, &descs[i], sizeof(desc));
|
||||
if (unlikely(err))
|
||||
goto fail;
|
||||
|
||||
@ -404,7 +412,8 @@ static inline int __vringh_complete(struct vringh *vrh,
|
||||
unsigned int num_used,
|
||||
int (*putu16)(const struct vringh *vrh,
|
||||
__virtio16 *p, u16 val),
|
||||
int (*putused)(struct vring_used_elem *dst,
|
||||
int (*putused)(const struct vringh *vrh,
|
||||
struct vring_used_elem *dst,
|
||||
const struct vring_used_elem
|
||||
*src, unsigned num))
|
||||
{
|
||||
@ -420,12 +429,12 @@ static inline int __vringh_complete(struct vringh *vrh,
|
||||
/* Compiler knows num_used == 1 sometimes, hence extra check */
|
||||
if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) {
|
||||
u16 part = vrh->vring.num - off;
|
||||
err = putused(&used_ring->ring[off], used, part);
|
||||
err = putused(vrh, &used_ring->ring[off], used, part);
|
||||
if (!err)
|
||||
err = putused(&used_ring->ring[0], used + part,
|
||||
err = putused(vrh, &used_ring->ring[0], used + part,
|
||||
num_used - part);
|
||||
} else
|
||||
err = putused(&used_ring->ring[off], used, num_used);
|
||||
err = putused(vrh, &used_ring->ring[off], used, num_used);
|
||||
|
||||
if (err) {
|
||||
vringh_bad("Failed to write %u used entries %u at %p",
|
||||
@ -564,13 +573,15 @@ static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
|
||||
return put_user(v, (__force __virtio16 __user *)p);
|
||||
}
|
||||
|
||||
static inline int copydesc_user(void *dst, const void *src, size_t len)
|
||||
static inline int copydesc_user(const struct vringh *vrh,
|
||||
void *dst, const void *src, size_t len)
|
||||
{
|
||||
return copy_from_user(dst, (__force void __user *)src, len) ?
|
||||
-EFAULT : 0;
|
||||
}
|
||||
|
||||
static inline int putused_user(struct vring_used_elem *dst,
|
||||
static inline int putused_user(const struct vringh *vrh,
|
||||
struct vring_used_elem *dst,
|
||||
const struct vring_used_elem *src,
|
||||
unsigned int num)
|
||||
{
|
||||
@ -578,13 +589,15 @@ static inline int putused_user(struct vring_used_elem *dst,
|
||||
sizeof(*dst) * num) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static inline int xfer_from_user(void *src, void *dst, size_t len)
|
||||
static inline int xfer_from_user(const struct vringh *vrh, void *src,
|
||||
void *dst, size_t len)
|
||||
{
|
||||
return copy_from_user(dst, (__force void __user *)src, len) ?
|
||||
-EFAULT : 0;
|
||||
}
|
||||
|
||||
static inline int xfer_to_user(void *dst, void *src, size_t len)
|
||||
static inline int xfer_to_user(const struct vringh *vrh,
|
||||
void *dst, void *src, size_t len)
|
||||
{
|
||||
return copy_to_user((__force void __user *)dst, src, len) ?
|
||||
-EFAULT : 0;
|
||||
@ -706,7 +719,7 @@ EXPORT_SYMBOL(vringh_getdesc_user);
|
||||
*/
|
||||
ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len)
|
||||
{
|
||||
return vringh_iov_xfer((struct vringh_kiov *)riov,
|
||||
return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov,
|
||||
dst, len, xfer_from_user);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_iov_pull_user);
|
||||
@ -722,7 +735,7 @@ EXPORT_SYMBOL(vringh_iov_pull_user);
|
||||
ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
|
||||
const void *src, size_t len)
|
||||
{
|
||||
return vringh_iov_xfer((struct vringh_kiov *)wiov,
|
||||
return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov,
|
||||
(void *)src, len, xfer_to_user);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_iov_push_user);
|
||||
@ -832,13 +845,15 @@ static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int copydesc_kern(void *dst, const void *src, size_t len)
|
||||
static inline int copydesc_kern(const struct vringh *vrh,
|
||||
void *dst, const void *src, size_t len)
|
||||
{
|
||||
memcpy(dst, src, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int putused_kern(struct vring_used_elem *dst,
|
||||
static inline int putused_kern(const struct vringh *vrh,
|
||||
struct vring_used_elem *dst,
|
||||
const struct vring_used_elem *src,
|
||||
unsigned int num)
|
||||
{
|
||||
@ -846,13 +861,15 @@ static inline int putused_kern(struct vring_used_elem *dst,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int xfer_kern(void *src, void *dst, size_t len)
|
||||
static inline int xfer_kern(const struct vringh *vrh, void *src,
|
||||
void *dst, size_t len)
|
||||
{
|
||||
memcpy(dst, src, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int kern_xfer(void *dst, void *src, size_t len)
|
||||
static inline int kern_xfer(const struct vringh *vrh, void *dst,
|
||||
void *src, size_t len)
|
||||
{
|
||||
memcpy(dst, src, len);
|
||||
return 0;
|
||||
@ -949,7 +966,7 @@ EXPORT_SYMBOL(vringh_getdesc_kern);
|
||||
*/
|
||||
ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len)
|
||||
{
|
||||
return vringh_iov_xfer(riov, dst, len, xfer_kern);
|
||||
return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_iov_pull_kern);
|
||||
|
||||
@ -964,7 +981,7 @@ EXPORT_SYMBOL(vringh_iov_pull_kern);
|
||||
ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
|
||||
const void *src, size_t len)
|
||||
{
|
||||
return vringh_iov_xfer(wiov, (void *)src, len, kern_xfer);
|
||||
return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_iov_push_kern);
|
||||
|
||||
@ -1042,4 +1059,362 @@ int vringh_need_notify_kern(struct vringh *vrh)
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_need_notify_kern);
|
||||
|
||||
static int iotlb_translate(const struct vringh *vrh,
|
||||
u64 addr, u64 len, struct bio_vec iov[],
|
||||
int iov_size, u32 perm)
|
||||
{
|
||||
struct vhost_iotlb_map *map;
|
||||
struct vhost_iotlb *iotlb = vrh->iotlb;
|
||||
int ret = 0;
|
||||
u64 s = 0;
|
||||
|
||||
while (len > s) {
|
||||
u64 size, pa, pfn;
|
||||
|
||||
if (unlikely(ret >= iov_size)) {
|
||||
ret = -ENOBUFS;
|
||||
break;
|
||||
}
|
||||
|
||||
map = vhost_iotlb_itree_first(iotlb, addr,
|
||||
addr + len - 1);
|
||||
if (!map || map->start > addr) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
} else if (!(map->perm & perm)) {
|
||||
ret = -EPERM;
|
||||
break;
|
||||
}
|
||||
|
||||
size = map->size - addr + map->start;
|
||||
pa = map->addr + addr - map->start;
|
||||
pfn = pa >> PAGE_SHIFT;
|
||||
iov[ret].bv_page = pfn_to_page(pfn);
|
||||
iov[ret].bv_len = min(len - s, size);
|
||||
iov[ret].bv_offset = pa & (PAGE_SIZE - 1);
|
||||
s += size;
|
||||
addr += size;
|
||||
++ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
|
||||
void *src, size_t len)
|
||||
{
|
||||
struct iov_iter iter;
|
||||
struct bio_vec iov[16];
|
||||
int ret;
|
||||
|
||||
ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
|
||||
len, iov, 16, VHOST_MAP_RO);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
iov_iter_bvec(&iter, READ, iov, ret, len);
|
||||
|
||||
ret = copy_from_iter(dst, len, &iter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
|
||||
void *src, size_t len)
|
||||
{
|
||||
struct iov_iter iter;
|
||||
struct bio_vec iov[16];
|
||||
int ret;
|
||||
|
||||
ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
|
||||
len, iov, 16, VHOST_MAP_WO);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
iov_iter_bvec(&iter, WRITE, iov, ret, len);
|
||||
|
||||
return copy_to_iter(src, len, &iter);
|
||||
}
|
||||
|
||||
static inline int getu16_iotlb(const struct vringh *vrh,
|
||||
u16 *val, const __virtio16 *p)
|
||||
{
|
||||
struct bio_vec iov;
|
||||
void *kaddr, *from;
|
||||
int ret;
|
||||
|
||||
/* Atomic read is needed for getu16 */
|
||||
ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
|
||||
&iov, 1, VHOST_MAP_RO);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
kaddr = kmap_atomic(iov.bv_page);
|
||||
from = kaddr + iov.bv_offset;
|
||||
*val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
|
||||
kunmap_atomic(kaddr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int putu16_iotlb(const struct vringh *vrh,
|
||||
__virtio16 *p, u16 val)
|
||||
{
|
||||
struct bio_vec iov;
|
||||
void *kaddr, *to;
|
||||
int ret;
|
||||
|
||||
/* Atomic write is needed for putu16 */
|
||||
ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
|
||||
&iov, 1, VHOST_MAP_WO);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
kaddr = kmap_atomic(iov.bv_page);
|
||||
to = kaddr + iov.bv_offset;
|
||||
WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
|
||||
kunmap_atomic(kaddr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int copydesc_iotlb(const struct vringh *vrh,
|
||||
void *dst, const void *src, size_t len)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = copy_from_iotlb(vrh, dst, (void *)src, len);
|
||||
if (ret != len)
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int xfer_from_iotlb(const struct vringh *vrh, void *src,
|
||||
void *dst, size_t len)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = copy_from_iotlb(vrh, dst, src, len);
|
||||
if (ret != len)
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int xfer_to_iotlb(const struct vringh *vrh,
|
||||
void *dst, void *src, size_t len)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = copy_to_iotlb(vrh, dst, src, len);
|
||||
if (ret != len)
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int putused_iotlb(const struct vringh *vrh,
|
||||
struct vring_used_elem *dst,
|
||||
const struct vring_used_elem *src,
|
||||
unsigned int num)
|
||||
{
|
||||
int size = num * sizeof(*dst);
|
||||
int ret;
|
||||
|
||||
ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst));
|
||||
if (ret != size)
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* vringh_init_iotlb - initialize a vringh for a ring with IOTLB.
|
||||
* @vrh: the vringh to initialize.
|
||||
* @features: the feature bits for this ring.
|
||||
* @num: the number of elements.
|
||||
* @weak_barriers: true if we only need memory barriers, not I/O.
|
||||
* @desc: the userpace descriptor pointer.
|
||||
* @avail: the userpace avail pointer.
|
||||
* @used: the userpace used pointer.
|
||||
*
|
||||
* Returns an error if num is invalid.
|
||||
*/
|
||||
int vringh_init_iotlb(struct vringh *vrh, u64 features,
|
||||
unsigned int num, bool weak_barriers,
|
||||
struct vring_desc *desc,
|
||||
struct vring_avail *avail,
|
||||
struct vring_used *used)
|
||||
{
|
||||
return vringh_init_kern(vrh, features, num, weak_barriers,
|
||||
desc, avail, used);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_init_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
|
||||
* @vrh: the vring
|
||||
* @iotlb: iotlb associated with this vring
|
||||
*/
|
||||
void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb)
|
||||
{
|
||||
vrh->iotlb = iotlb;
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_set_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_getdesc_iotlb - get next available descriptor from ring with
|
||||
* IOTLB.
|
||||
* @vrh: the kernelspace vring.
|
||||
* @riov: where to put the readable descriptors (or NULL)
|
||||
* @wiov: where to put the writable descriptors (or NULL)
|
||||
* @head: head index we received, for passing to vringh_complete_iotlb().
|
||||
* @gfp: flags for allocating larger riov/wiov.
|
||||
*
|
||||
* Returns 0 if there was no descriptor, 1 if there was, or -errno.
|
||||
*
|
||||
* Note that on error return, you can tell the difference between an
|
||||
* invalid ring and a single invalid descriptor: in the former case,
|
||||
* *head will be vrh->vring.num. You may be able to ignore an invalid
|
||||
* descriptor, but there's not much you can do with an invalid ring.
|
||||
*
|
||||
* Note that you may need to clean up riov and wiov, even on error!
|
||||
*/
|
||||
int vringh_getdesc_iotlb(struct vringh *vrh,
|
||||
struct vringh_kiov *riov,
|
||||
struct vringh_kiov *wiov,
|
||||
u16 *head,
|
||||
gfp_t gfp)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
/* Empty... */
|
||||
if (err == vrh->vring.num)
|
||||
return 0;
|
||||
|
||||
*head = err;
|
||||
err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
|
||||
gfp, copydesc_iotlb);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_getdesc_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_iov_pull_iotlb - copy bytes from vring_iov.
|
||||
* @vrh: the vring.
|
||||
* @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume)
|
||||
* @dst: the place to copy.
|
||||
* @len: the maximum length to copy.
|
||||
*
|
||||
* Returns the bytes copied <= len or a negative errno.
|
||||
*/
|
||||
ssize_t vringh_iov_pull_iotlb(struct vringh *vrh,
|
||||
struct vringh_kiov *riov,
|
||||
void *dst, size_t len)
|
||||
{
|
||||
return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_iov_pull_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_iov_push_iotlb - copy bytes into vring_iov.
|
||||
* @vrh: the vring.
|
||||
* @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume)
|
||||
* @dst: the place to copy.
|
||||
* @len: the maximum length to copy.
|
||||
*
|
||||
* Returns the bytes copied <= len or a negative errno.
|
||||
*/
|
||||
ssize_t vringh_iov_push_iotlb(struct vringh *vrh,
|
||||
struct vringh_kiov *wiov,
|
||||
const void *src, size_t len)
|
||||
{
|
||||
return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_iov_push_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_abandon_iotlb - we've decided not to handle the descriptor(s).
|
||||
* @vrh: the vring.
|
||||
* @num: the number of descriptors to put back (ie. num
|
||||
* vringh_get_iotlb() to undo).
|
||||
*
|
||||
* The next vringh_get_iotlb() will return the old descriptor(s) again.
|
||||
*/
|
||||
void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num)
|
||||
{
|
||||
/* We only update vring_avail_event(vr) when we want to be notified,
|
||||
* so we haven't changed that yet.
|
||||
*/
|
||||
vrh->last_avail_idx -= num;
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_abandon_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_complete_iotlb - we've finished with descriptor, publish it.
|
||||
* @vrh: the vring.
|
||||
* @head: the head as filled in by vringh_getdesc_iotlb.
|
||||
* @len: the length of data we have written.
|
||||
*
|
||||
* You should check vringh_need_notify_iotlb() after one or more calls
|
||||
* to this function.
|
||||
*/
|
||||
int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len)
|
||||
{
|
||||
struct vring_used_elem used;
|
||||
|
||||
used.id = cpu_to_vringh32(vrh, head);
|
||||
used.len = cpu_to_vringh32(vrh, len);
|
||||
|
||||
return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_complete_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_notify_enable_iotlb - we want to know if something changes.
|
||||
* @vrh: the vring.
|
||||
*
|
||||
* This always enables notifications, but returns false if there are
|
||||
* now more buffers available in the vring.
|
||||
*/
|
||||
bool vringh_notify_enable_iotlb(struct vringh *vrh)
|
||||
{
|
||||
return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_notify_enable_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_notify_disable_iotlb - don't tell us if something changes.
|
||||
* @vrh: the vring.
|
||||
*
|
||||
* This is our normal running state: we disable and then only enable when
|
||||
* we're going to sleep.
|
||||
*/
|
||||
void vringh_notify_disable_iotlb(struct vringh *vrh)
|
||||
{
|
||||
__vringh_notify_disable(vrh, putu16_iotlb);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_notify_disable_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_need_notify_iotlb - must we tell the other side about used buffers?
|
||||
* @vrh: the vring we've called vringh_complete_iotlb() on.
|
||||
*
|
||||
* Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
|
||||
*/
|
||||
int vringh_need_notify_iotlb(struct vringh *vrh)
|
||||
{
|
||||
return __vringh_need_notify(vrh, getu16_iotlb);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_need_notify_iotlb);
|
||||
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -621,7 +621,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
|
||||
|
||||
vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
|
||||
UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
|
||||
VHOST_VSOCK_WEIGHT);
|
||||
VHOST_VSOCK_WEIGHT, NULL);
|
||||
|
||||
file->private_data = vsock;
|
||||
spin_lock_init(&vsock->send_pkt_list_lock);
|
||||
|
@ -43,6 +43,19 @@ config VIRTIO_PCI_LEGACY
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config VIRTIO_VDPA
|
||||
tristate "vDPA driver for virtio devices"
|
||||
select VDPA
|
||||
select VIRTIO
|
||||
help
|
||||
This driver provides support for virtio based paravirtual
|
||||
device driver over vDPA bus. For this to be useful, you need
|
||||
an appropriate vDPA device implementation that operates on a
|
||||
physical device to allow the datapath of virtio to be
|
||||
offloaded to hardware.
|
||||
|
||||
If unsure, say M.
|
||||
|
||||
config VIRTIO_PMEM
|
||||
tristate "Support for virtio pmem driver"
|
||||
depends on VIRTIO
|
||||
|
@ -6,3 +6,4 @@ virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
|
||||
virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
|
||||
obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
|
||||
obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o
|
||||
obj-$(CONFIG_VIRTIO_VDPA) += virtio_vdpa.o
|
||||
|
396
drivers/virtio/virtio_vdpa.c
Normal file
396
drivers/virtio/virtio_vdpa.c
Normal file
@ -0,0 +1,396 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* VIRTIO based driver for vDPA device
|
||||
*
|
||||
* Copyright (c) 2020, Red Hat. All rights reserved.
|
||||
* Author: Jason Wang <jasowang@redhat.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uuid.h>
|
||||
#include <linux/virtio.h>
|
||||
#include <linux/vdpa.h>
|
||||
#include <linux/virtio_config.h>
|
||||
#include <linux/virtio_ring.h>
|
||||
|
||||
#define MOD_VERSION "0.1"
|
||||
#define MOD_AUTHOR "Jason Wang <jasowang@redhat.com>"
|
||||
#define MOD_DESC "vDPA bus driver for virtio devices"
|
||||
#define MOD_LICENSE "GPL v2"
|
||||
|
||||
struct virtio_vdpa_device {
|
||||
struct virtio_device vdev;
|
||||
struct vdpa_device *vdpa;
|
||||
u64 features;
|
||||
|
||||
/* The lock to protect virtqueue list */
|
||||
spinlock_t lock;
|
||||
/* List of virtio_vdpa_vq_info */
|
||||
struct list_head virtqueues;
|
||||
};
|
||||
|
||||
struct virtio_vdpa_vq_info {
|
||||
/* the actual virtqueue */
|
||||
struct virtqueue *vq;
|
||||
|
||||
/* the list node for the virtqueues list */
|
||||
struct list_head node;
|
||||
};
|
||||
|
||||
static inline struct virtio_vdpa_device *
|
||||
to_virtio_vdpa_device(struct virtio_device *dev)
|
||||
{
|
||||
return container_of(dev, struct virtio_vdpa_device, vdev);
|
||||
}
|
||||
|
||||
static struct vdpa_device *vd_get_vdpa(struct virtio_device *vdev)
|
||||
{
|
||||
return to_virtio_vdpa_device(vdev)->vdpa;
|
||||
}
|
||||
|
||||
static void virtio_vdpa_get(struct virtio_device *vdev, unsigned offset,
|
||||
void *buf, unsigned len)
|
||||
{
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
ops->get_config(vdpa, offset, buf, len);
|
||||
}
|
||||
|
||||
static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset,
|
||||
const void *buf, unsigned len)
|
||||
{
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
ops->set_config(vdpa, offset, buf, len);
|
||||
}
|
||||
|
||||
static u32 virtio_vdpa_generation(struct virtio_device *vdev)
|
||||
{
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
if (ops->get_generation)
|
||||
return ops->get_generation(vdpa);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u8 virtio_vdpa_get_status(struct virtio_device *vdev)
|
||||
{
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
return ops->get_status(vdpa);
|
||||
}
|
||||
|
||||
static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 status)
|
||||
{
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
return ops->set_status(vdpa, status);
|
||||
}
|
||||
|
||||
static void virtio_vdpa_reset(struct virtio_device *vdev)
|
||||
{
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
return ops->set_status(vdpa, 0);
|
||||
}
|
||||
|
||||
static bool virtio_vdpa_notify(struct virtqueue *vq)
|
||||
{
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vq->vdev);
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
ops->kick_vq(vdpa, vq->index);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static irqreturn_t virtio_vdpa_config_cb(void *private)
|
||||
{
|
||||
struct virtio_vdpa_device *vd_dev = private;
|
||||
|
||||
virtio_config_changed(&vd_dev->vdev);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static irqreturn_t virtio_vdpa_virtqueue_cb(void *private)
|
||||
{
|
||||
struct virtio_vdpa_vq_info *info = private;
|
||||
|
||||
return vring_interrupt(0, info->vq);
|
||||
}
|
||||
|
||||
static struct virtqueue *
|
||||
virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
|
||||
void (*callback)(struct virtqueue *vq),
|
||||
const char *name, bool ctx)
|
||||
{
|
||||
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
struct virtio_vdpa_vq_info *info;
|
||||
struct vdpa_callback cb;
|
||||
struct virtqueue *vq;
|
||||
u64 desc_addr, driver_addr, device_addr;
|
||||
unsigned long flags;
|
||||
u32 align, num;
|
||||
int err;
|
||||
|
||||
if (!name)
|
||||
return NULL;
|
||||
|
||||
/* Queue shouldn't already be set up. */
|
||||
if (ops->get_vq_ready(vdpa, index))
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
/* Allocate and fill out our active queue description */
|
||||
info = kmalloc(sizeof(*info), GFP_KERNEL);
|
||||
if (!info)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
num = ops->get_vq_num_max(vdpa);
|
||||
if (num == 0) {
|
||||
err = -ENOENT;
|
||||
goto error_new_virtqueue;
|
||||
}
|
||||
|
||||
/* Create the vring */
|
||||
align = ops->get_vq_align(vdpa);
|
||||
vq = vring_create_virtqueue(index, num, align, vdev,
|
||||
true, true, ctx,
|
||||
virtio_vdpa_notify, callback, name);
|
||||
if (!vq) {
|
||||
err = -ENOMEM;
|
||||
goto error_new_virtqueue;
|
||||
}
|
||||
|
||||
/* Setup virtqueue callback */
|
||||
cb.callback = virtio_vdpa_virtqueue_cb;
|
||||
cb.private = info;
|
||||
ops->set_vq_cb(vdpa, index, &cb);
|
||||
ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq));
|
||||
|
||||
desc_addr = virtqueue_get_desc_addr(vq);
|
||||
driver_addr = virtqueue_get_avail_addr(vq);
|
||||
device_addr = virtqueue_get_used_addr(vq);
|
||||
|
||||
if (ops->set_vq_address(vdpa, index,
|
||||
desc_addr, driver_addr,
|
||||
device_addr)) {
|
||||
err = -EINVAL;
|
||||
goto err_vq;
|
||||
}
|
||||
|
||||
ops->set_vq_ready(vdpa, index, 1);
|
||||
|
||||
vq->priv = info;
|
||||
info->vq = vq;
|
||||
|
||||
spin_lock_irqsave(&vd_dev->lock, flags);
|
||||
list_add(&info->node, &vd_dev->virtqueues);
|
||||
spin_unlock_irqrestore(&vd_dev->lock, flags);
|
||||
|
||||
return vq;
|
||||
|
||||
err_vq:
|
||||
vring_del_virtqueue(vq);
|
||||
error_new_virtqueue:
|
||||
ops->set_vq_ready(vdpa, index, 0);
|
||||
/* VDPA driver should make sure vq is stopeed here */
|
||||
WARN_ON(ops->get_vq_ready(vdpa, index));
|
||||
kfree(info);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static void virtio_vdpa_del_vq(struct virtqueue *vq)
|
||||
{
|
||||
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vq->vdev);
|
||||
struct vdpa_device *vdpa = vd_dev->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
struct virtio_vdpa_vq_info *info = vq->priv;
|
||||
unsigned int index = vq->index;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&vd_dev->lock, flags);
|
||||
list_del(&info->node);
|
||||
spin_unlock_irqrestore(&vd_dev->lock, flags);
|
||||
|
||||
/* Select and deactivate the queue */
|
||||
ops->set_vq_ready(vdpa, index, 0);
|
||||
WARN_ON(ops->get_vq_ready(vdpa, index));
|
||||
|
||||
vring_del_virtqueue(vq);
|
||||
|
||||
kfree(info);
|
||||
}
|
||||
|
||||
static void virtio_vdpa_del_vqs(struct virtio_device *vdev)
|
||||
{
|
||||
struct virtqueue *vq, *n;
|
||||
|
||||
list_for_each_entry_safe(vq, n, &vdev->vqs, list)
|
||||
virtio_vdpa_del_vq(vq);
|
||||
}
|
||||
|
||||
static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned nvqs,
|
||||
struct virtqueue *vqs[],
|
||||
vq_callback_t *callbacks[],
|
||||
const char * const names[],
|
||||
const bool *ctx,
|
||||
struct irq_affinity *desc)
|
||||
{
|
||||
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
struct vdpa_callback cb;
|
||||
int i, err, queue_idx = 0;
|
||||
|
||||
for (i = 0; i < nvqs; ++i) {
|
||||
if (!names[i]) {
|
||||
vqs[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++,
|
||||
callbacks[i], names[i], ctx ?
|
||||
ctx[i] : false);
|
||||
if (IS_ERR(vqs[i])) {
|
||||
err = PTR_ERR(vqs[i]);
|
||||
goto err_setup_vq;
|
||||
}
|
||||
}
|
||||
|
||||
cb.callback = virtio_vdpa_config_cb;
|
||||
cb.private = vd_dev;
|
||||
ops->set_config_cb(vdpa, &cb);
|
||||
|
||||
return 0;
|
||||
|
||||
err_setup_vq:
|
||||
virtio_vdpa_del_vqs(vdev);
|
||||
return err;
|
||||
}
|
||||
|
||||
static u64 virtio_vdpa_get_features(struct virtio_device *vdev)
|
||||
{
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
return ops->get_features(vdpa);
|
||||
}
|
||||
|
||||
static int virtio_vdpa_finalize_features(struct virtio_device *vdev)
|
||||
{
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
/* Give virtio_ring a chance to accept features. */
|
||||
vring_transport_features(vdev);
|
||||
|
||||
return ops->set_features(vdpa, vdev->features);
|
||||
}
|
||||
|
||||
static const char *virtio_vdpa_bus_name(struct virtio_device *vdev)
|
||||
{
|
||||
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
|
||||
struct vdpa_device *vdpa = vd_dev->vdpa;
|
||||
|
||||
return dev_name(&vdpa->dev);
|
||||
}
|
||||
|
||||
static const struct virtio_config_ops virtio_vdpa_config_ops = {
|
||||
.get = virtio_vdpa_get,
|
||||
.set = virtio_vdpa_set,
|
||||
.generation = virtio_vdpa_generation,
|
||||
.get_status = virtio_vdpa_get_status,
|
||||
.set_status = virtio_vdpa_set_status,
|
||||
.reset = virtio_vdpa_reset,
|
||||
.find_vqs = virtio_vdpa_find_vqs,
|
||||
.del_vqs = virtio_vdpa_del_vqs,
|
||||
.get_features = virtio_vdpa_get_features,
|
||||
.finalize_features = virtio_vdpa_finalize_features,
|
||||
.bus_name = virtio_vdpa_bus_name,
|
||||
};
|
||||
|
||||
static void virtio_vdpa_release_dev(struct device *_d)
|
||||
{
|
||||
struct virtio_device *vdev =
|
||||
container_of(_d, struct virtio_device, dev);
|
||||
struct virtio_vdpa_device *vd_dev =
|
||||
container_of(vdev, struct virtio_vdpa_device, vdev);
|
||||
|
||||
kfree(vd_dev);
|
||||
}
|
||||
|
||||
static int virtio_vdpa_probe(struct vdpa_device *vdpa)
|
||||
{
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
struct virtio_vdpa_device *vd_dev, *reg_dev = NULL;
|
||||
int ret = -EINVAL;
|
||||
|
||||
vd_dev = kzalloc(sizeof(*vd_dev), GFP_KERNEL);
|
||||
if (!vd_dev)
|
||||
return -ENOMEM;
|
||||
|
||||
vd_dev->vdev.dev.parent = vdpa_get_dma_dev(vdpa);
|
||||
vd_dev->vdev.dev.release = virtio_vdpa_release_dev;
|
||||
vd_dev->vdev.config = &virtio_vdpa_config_ops;
|
||||
vd_dev->vdpa = vdpa;
|
||||
INIT_LIST_HEAD(&vd_dev->virtqueues);
|
||||
spin_lock_init(&vd_dev->lock);
|
||||
|
||||
vd_dev->vdev.id.device = ops->get_device_id(vdpa);
|
||||
if (vd_dev->vdev.id.device == 0)
|
||||
goto err;
|
||||
|
||||
vd_dev->vdev.id.vendor = ops->get_vendor_id(vdpa);
|
||||
ret = register_virtio_device(&vd_dev->vdev);
|
||||
reg_dev = vd_dev;
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
vdpa_set_drvdata(vdpa, vd_dev);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
if (reg_dev)
|
||||
put_device(&vd_dev->vdev.dev);
|
||||
else
|
||||
kfree(vd_dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void virtio_vdpa_remove(struct vdpa_device *vdpa)
|
||||
{
|
||||
struct virtio_vdpa_device *vd_dev = vdpa_get_drvdata(vdpa);
|
||||
|
||||
unregister_virtio_device(&vd_dev->vdev);
|
||||
}
|
||||
|
||||
static struct vdpa_driver virtio_vdpa_driver = {
|
||||
.driver = {
|
||||
.name = "virtio_vdpa",
|
||||
},
|
||||
.probe = virtio_vdpa_probe,
|
||||
.remove = virtio_vdpa_remove,
|
||||
};
|
||||
|
||||
module_vdpa_driver(virtio_vdpa_driver);
|
||||
|
||||
MODULE_VERSION(MOD_VERSION);
|
||||
MODULE_LICENSE(MOD_LICENSE);
|
||||
MODULE_AUTHOR(MOD_AUTHOR);
|
||||
MODULE_DESCRIPTION(MOD_DESC);
|
253
include/linux/vdpa.h
Normal file
253
include/linux/vdpa.h
Normal file
@ -0,0 +1,253 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_VDPA_H
|
||||
#define _LINUX_VDPA_H
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/vhost_iotlb.h>
|
||||
|
||||
/**
|
||||
* vDPA callback definition.
|
||||
* @callback: interrupt callback function
|
||||
* @private: the data passed to the callback function
|
||||
*/
|
||||
struct vdpa_callback {
|
||||
irqreturn_t (*callback)(void *data);
|
||||
void *private;
|
||||
};
|
||||
|
||||
/**
|
||||
* vDPA device - representation of a vDPA device
|
||||
* @dev: underlying device
|
||||
* @dma_dev: the actual device that is performing DMA
|
||||
* @config: the configuration ops for this device.
|
||||
* @index: device index
|
||||
*/
|
||||
struct vdpa_device {
|
||||
struct device dev;
|
||||
struct device *dma_dev;
|
||||
const struct vdpa_config_ops *config;
|
||||
unsigned int index;
|
||||
};
|
||||
|
||||
/**
|
||||
* vDPA_config_ops - operations for configuring a vDPA device.
|
||||
* Note: vDPA device drivers are required to implement all of the
|
||||
* operations unless it is mentioned to be optional in the following
|
||||
* list.
|
||||
*
|
||||
* @set_vq_address: Set the address of virtqueue
|
||||
* @vdev: vdpa device
|
||||
* @idx: virtqueue index
|
||||
* @desc_area: address of desc area
|
||||
* @driver_area: address of driver area
|
||||
* @device_area: address of device area
|
||||
* Returns integer: success (0) or error (< 0)
|
||||
* @set_vq_num: Set the size of virtqueue
|
||||
* @vdev: vdpa device
|
||||
* @idx: virtqueue index
|
||||
* @num: the size of virtqueue
|
||||
* @kick_vq: Kick the virtqueue
|
||||
* @vdev: vdpa device
|
||||
* @idx: virtqueue index
|
||||
* @set_vq_cb: Set the interrupt callback function for
|
||||
* a virtqueue
|
||||
* @vdev: vdpa device
|
||||
* @idx: virtqueue index
|
||||
* @cb: virtio-vdev interrupt callback structure
|
||||
* @set_vq_ready: Set ready status for a virtqueue
|
||||
* @vdev: vdpa device
|
||||
* @idx: virtqueue index
|
||||
* @ready: ready (true) not ready(false)
|
||||
* @get_vq_ready: Get ready status for a virtqueue
|
||||
* @vdev: vdpa device
|
||||
* @idx: virtqueue index
|
||||
* Returns boolean: ready (true) or not (false)
|
||||
* @set_vq_state: Set the state for a virtqueue
|
||||
* @vdev: vdpa device
|
||||
* @idx: virtqueue index
|
||||
* @state: virtqueue state (last_avail_idx)
|
||||
* Returns integer: success (0) or error (< 0)
|
||||
* @get_vq_state: Get the state for a virtqueue
|
||||
* @vdev: vdpa device
|
||||
* @idx: virtqueue index
|
||||
* Returns virtqueue state (last_avail_idx)
|
||||
* @get_vq_align: Get the virtqueue align requirement
|
||||
* for the device
|
||||
* @vdev: vdpa device
|
||||
* Returns virtqueue algin requirement
|
||||
* @get_features: Get virtio features supported by the device
|
||||
* @vdev: vdpa device
|
||||
* Returns the virtio features support by the
|
||||
* device
|
||||
* @set_features: Set virtio features supported by the driver
|
||||
* @vdev: vdpa device
|
||||
* @features: feature support by the driver
|
||||
* Returns integer: success (0) or error (< 0)
|
||||
* @set_config_cb: Set the config interrupt callback
|
||||
* @vdev: vdpa device
|
||||
* @cb: virtio-vdev interrupt callback structure
|
||||
* @get_vq_num_max: Get the max size of virtqueue
|
||||
* @vdev: vdpa device
|
||||
* Returns u16: max size of virtqueue
|
||||
* @get_device_id: Get virtio device id
|
||||
* @vdev: vdpa device
|
||||
* Returns u32: virtio device id
|
||||
* @get_vendor_id: Get id for the vendor that provides this device
|
||||
* @vdev: vdpa device
|
||||
* Returns u32: virtio vendor id
|
||||
* @get_status: Get the device status
|
||||
* @vdev: vdpa device
|
||||
* Returns u8: virtio device status
|
||||
* @set_status: Set the device status
|
||||
* @vdev: vdpa device
|
||||
* @status: virtio device status
|
||||
* @get_config: Read from device specific configuration space
|
||||
* @vdev: vdpa device
|
||||
* @offset: offset from the beginning of
|
||||
* configuration space
|
||||
* @buf: buffer used to read to
|
||||
* @len: the length to read from
|
||||
* configuration space
|
||||
* @set_config: Write to device specific configuration space
|
||||
* @vdev: vdpa device
|
||||
* @offset: offset from the beginning of
|
||||
* configuration space
|
||||
* @buf: buffer used to write from
|
||||
* @len: the length to write to
|
||||
* configuration space
|
||||
* @get_generation: Get device config generation (optional)
|
||||
* @vdev: vdpa device
|
||||
* Returns u32: device generation
|
||||
* @set_map: Set device memory mapping (optional)
|
||||
* Needed for device that using device
|
||||
* specific DMA translation (on-chip IOMMU)
|
||||
* @vdev: vdpa device
|
||||
* @iotlb: vhost memory mapping to be
|
||||
* used by the vDPA
|
||||
* Returns integer: success (0) or error (< 0)
|
||||
* @dma_map: Map an area of PA to IOVA (optional)
|
||||
* Needed for device that using device
|
||||
* specific DMA translation (on-chip IOMMU)
|
||||
* and preferring incremental map.
|
||||
* @vdev: vdpa device
|
||||
* @iova: iova to be mapped
|
||||
* @size: size of the area
|
||||
* @pa: physical address for the map
|
||||
* @perm: device access permission (VHOST_MAP_XX)
|
||||
* Returns integer: success (0) or error (< 0)
|
||||
* @dma_unmap: Unmap an area of IOVA (optional but
|
||||
* must be implemented with dma_map)
|
||||
* Needed for device that using device
|
||||
* specific DMA translation (on-chip IOMMU)
|
||||
* and preferring incremental unmap.
|
||||
* @vdev: vdpa device
|
||||
* @iova: iova to be unmapped
|
||||
* @size: size of the area
|
||||
* Returns integer: success (0) or error (< 0)
|
||||
* @free: Free resources that belongs to vDPA (optional)
|
||||
* @vdev: vdpa device
|
||||
*/
|
||||
struct vdpa_config_ops {
|
||||
/* Virtqueue ops */
|
||||
int (*set_vq_address)(struct vdpa_device *vdev,
|
||||
u16 idx, u64 desc_area, u64 driver_area,
|
||||
u64 device_area);
|
||||
void (*set_vq_num)(struct vdpa_device *vdev, u16 idx, u32 num);
|
||||
void (*kick_vq)(struct vdpa_device *vdev, u16 idx);
|
||||
void (*set_vq_cb)(struct vdpa_device *vdev, u16 idx,
|
||||
struct vdpa_callback *cb);
|
||||
void (*set_vq_ready)(struct vdpa_device *vdev, u16 idx, bool ready);
|
||||
bool (*get_vq_ready)(struct vdpa_device *vdev, u16 idx);
|
||||
int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, u64 state);
|
||||
u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx);
|
||||
|
||||
/* Device ops */
|
||||
u16 (*get_vq_align)(struct vdpa_device *vdev);
|
||||
u64 (*get_features)(struct vdpa_device *vdev);
|
||||
int (*set_features)(struct vdpa_device *vdev, u64 features);
|
||||
void (*set_config_cb)(struct vdpa_device *vdev,
|
||||
struct vdpa_callback *cb);
|
||||
u16 (*get_vq_num_max)(struct vdpa_device *vdev);
|
||||
u32 (*get_device_id)(struct vdpa_device *vdev);
|
||||
u32 (*get_vendor_id)(struct vdpa_device *vdev);
|
||||
u8 (*get_status)(struct vdpa_device *vdev);
|
||||
void (*set_status)(struct vdpa_device *vdev, u8 status);
|
||||
void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
|
||||
void *buf, unsigned int len);
|
||||
void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
|
||||
const void *buf, unsigned int len);
|
||||
u32 (*get_generation)(struct vdpa_device *vdev);
|
||||
|
||||
/* DMA ops */
|
||||
int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
|
||||
int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size,
|
||||
u64 pa, u32 perm);
|
||||
int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size);
|
||||
|
||||
/* Free device resources */
|
||||
void (*free)(struct vdpa_device *vdev);
|
||||
};
|
||||
|
||||
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
|
||||
const struct vdpa_config_ops *config,
|
||||
size_t size);
|
||||
|
||||
#define vdpa_alloc_device(dev_struct, member, parent, config) \
|
||||
container_of(__vdpa_alloc_device( \
|
||||
parent, config, \
|
||||
sizeof(dev_struct) + \
|
||||
BUILD_BUG_ON_ZERO(offsetof( \
|
||||
dev_struct, member))), \
|
||||
dev_struct, member)
|
||||
|
||||
int vdpa_register_device(struct vdpa_device *vdev);
|
||||
void vdpa_unregister_device(struct vdpa_device *vdev);
|
||||
|
||||
/**
|
||||
* vdpa_driver - operations for a vDPA driver
|
||||
* @driver: underlying device driver
|
||||
* @probe: the function to call when a device is found. Returns 0 or -errno.
|
||||
* @remove: the function to call when a device is removed.
|
||||
*/
|
||||
struct vdpa_driver {
|
||||
struct device_driver driver;
|
||||
int (*probe)(struct vdpa_device *vdev);
|
||||
void (*remove)(struct vdpa_device *vdev);
|
||||
};
|
||||
|
||||
#define vdpa_register_driver(drv) \
|
||||
__vdpa_register_driver(drv, THIS_MODULE)
|
||||
int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner);
|
||||
void vdpa_unregister_driver(struct vdpa_driver *drv);
|
||||
|
||||
#define module_vdpa_driver(__vdpa_driver) \
|
||||
module_driver(__vdpa_driver, vdpa_register_driver, \
|
||||
vdpa_unregister_driver)
|
||||
|
||||
static inline struct vdpa_driver *drv_to_vdpa(struct device_driver *driver)
|
||||
{
|
||||
return container_of(driver, struct vdpa_driver, driver);
|
||||
}
|
||||
|
||||
static inline struct vdpa_device *dev_to_vdpa(struct device *_dev)
|
||||
{
|
||||
return container_of(_dev, struct vdpa_device, dev);
|
||||
}
|
||||
|
||||
static inline void *vdpa_get_drvdata(const struct vdpa_device *vdev)
|
||||
{
|
||||
return dev_get_drvdata(&vdev->dev);
|
||||
}
|
||||
|
||||
static inline void vdpa_set_drvdata(struct vdpa_device *vdev, void *data)
|
||||
{
|
||||
dev_set_drvdata(&vdev->dev, data);
|
||||
}
|
||||
|
||||
static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
|
||||
{
|
||||
return vdev->dma_dev;
|
||||
}
|
||||
#endif /* _LINUX_VDPA_H */
|
47
include/linux/vhost_iotlb.h
Normal file
47
include/linux/vhost_iotlb.h
Normal file
@ -0,0 +1,47 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_VHOST_IOTLB_H
|
||||
#define _LINUX_VHOST_IOTLB_H
|
||||
|
||||
#include <linux/interval_tree_generic.h>
|
||||
|
||||
struct vhost_iotlb_map {
|
||||
struct rb_node rb;
|
||||
struct list_head link;
|
||||
u64 start;
|
||||
u64 last;
|
||||
u64 size;
|
||||
u64 addr;
|
||||
#define VHOST_MAP_RO 0x1
|
||||
#define VHOST_MAP_WO 0x2
|
||||
#define VHOST_MAP_RW 0x3
|
||||
u32 perm;
|
||||
u32 flags_padding;
|
||||
u64 __subtree_last;
|
||||
};
|
||||
|
||||
#define VHOST_IOTLB_FLAG_RETIRE 0x1
|
||||
|
||||
struct vhost_iotlb {
|
||||
struct rb_root_cached root;
|
||||
struct list_head list;
|
||||
unsigned int limit;
|
||||
unsigned int nmaps;
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last,
|
||||
u64 addr, unsigned int perm);
|
||||
void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last);
|
||||
|
||||
struct vhost_iotlb *vhost_iotlb_alloc(unsigned int limit, unsigned int flags);
|
||||
void vhost_iotlb_free(struct vhost_iotlb *iotlb);
|
||||
void vhost_iotlb_reset(struct vhost_iotlb *iotlb);
|
||||
|
||||
struct vhost_iotlb_map *
|
||||
vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last);
|
||||
struct vhost_iotlb_map *
|
||||
vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last);
|
||||
|
||||
void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
|
||||
struct vhost_iotlb_map *map);
|
||||
#endif
|
@ -14,6 +14,8 @@
|
||||
#include <linux/virtio_byteorder.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/dma-direction.h>
|
||||
#include <linux/vhost_iotlb.h>
|
||||
#include <asm/barrier.h>
|
||||
|
||||
/* virtio_ring with information needed for host access. */
|
||||
@ -39,6 +41,9 @@ struct vringh {
|
||||
/* The vring (note: it may contain user pointers!) */
|
||||
struct vring vring;
|
||||
|
||||
/* IOTLB for this vring */
|
||||
struct vhost_iotlb *iotlb;
|
||||
|
||||
/* The function to call to notify the guest about added buffers */
|
||||
void (*notify)(struct vringh *);
|
||||
};
|
||||
@ -248,4 +253,35 @@ static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val)
|
||||
{
|
||||
return __cpu_to_virtio64(vringh_is_little_endian(vrh), val);
|
||||
}
|
||||
|
||||
void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb);
|
||||
|
||||
int vringh_init_iotlb(struct vringh *vrh, u64 features,
|
||||
unsigned int num, bool weak_barriers,
|
||||
struct vring_desc *desc,
|
||||
struct vring_avail *avail,
|
||||
struct vring_used *used);
|
||||
|
||||
int vringh_getdesc_iotlb(struct vringh *vrh,
|
||||
struct vringh_kiov *riov,
|
||||
struct vringh_kiov *wiov,
|
||||
u16 *head,
|
||||
gfp_t gfp);
|
||||
|
||||
ssize_t vringh_iov_pull_iotlb(struct vringh *vrh,
|
||||
struct vringh_kiov *riov,
|
||||
void *dst, size_t len);
|
||||
ssize_t vringh_iov_push_iotlb(struct vringh *vrh,
|
||||
struct vringh_kiov *wiov,
|
||||
const void *src, size_t len);
|
||||
|
||||
void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num);
|
||||
|
||||
int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len);
|
||||
|
||||
bool vringh_notify_enable_iotlb(struct vringh *vrh);
|
||||
void vringh_notify_disable_iotlb(struct vringh *vrh);
|
||||
|
||||
int vringh_need_notify_iotlb(struct vringh *vrh);
|
||||
|
||||
#endif /* _LINUX_VRINGH_H */
|
||||
|
@ -116,4 +116,28 @@
|
||||
#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64)
|
||||
#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int)
|
||||
|
||||
/* VHOST_VDPA specific defines */
|
||||
|
||||
/* Get the device id. The device ids follow the same definition of
|
||||
* the device id defined in virtio-spec.
|
||||
*/
|
||||
#define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32)
|
||||
/* Get and set the status. The status bits follow the same definition
|
||||
* of the device status defined in virtio-spec.
|
||||
*/
|
||||
#define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8)
|
||||
#define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8)
|
||||
/* Get and set the device config. The device config follows the same
|
||||
* definition of the device config defined in virtio-spec.
|
||||
*/
|
||||
#define VHOST_VDPA_GET_CONFIG _IOR(VHOST_VIRTIO, 0x73, \
|
||||
struct vhost_vdpa_config)
|
||||
#define VHOST_VDPA_SET_CONFIG _IOW(VHOST_VIRTIO, 0x74, \
|
||||
struct vhost_vdpa_config)
|
||||
/* Enable/disable the ring. */
|
||||
#define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \
|
||||
struct vhost_vring_state)
|
||||
/* Get the max ring size. */
|
||||
#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16)
|
||||
|
||||
#endif
|
||||
|
@ -119,6 +119,14 @@ struct vhost_scsi_target {
|
||||
unsigned short reserved;
|
||||
};
|
||||
|
||||
/* VHOST_VDPA specific definitions */
|
||||
|
||||
struct vhost_vdpa_config {
|
||||
__u32 off;
|
||||
__u32 len;
|
||||
__u8 buf[0];
|
||||
};
|
||||
|
||||
/* Feature bits */
|
||||
/* Log all write descriptors. Can be changed while device is active. */
|
||||
#define VHOST_F_LOG_ALL 26
|
||||
|
@ -57,6 +57,9 @@
|
||||
* Steering */
|
||||
#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */
|
||||
|
||||
#define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */
|
||||
#define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */
|
||||
#define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */
|
||||
#define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device
|
||||
* with the same MAC.
|
||||
*/
|
||||
@ -69,6 +72,17 @@
|
||||
#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
|
||||
#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */
|
||||
|
||||
/* supported/enabled hash types */
|
||||
#define VIRTIO_NET_RSS_HASH_TYPE_IPv4 (1 << 0)
|
||||
#define VIRTIO_NET_RSS_HASH_TYPE_TCPv4 (1 << 1)
|
||||
#define VIRTIO_NET_RSS_HASH_TYPE_UDPv4 (1 << 2)
|
||||
#define VIRTIO_NET_RSS_HASH_TYPE_IPv6 (1 << 3)
|
||||
#define VIRTIO_NET_RSS_HASH_TYPE_TCPv6 (1 << 4)
|
||||
#define VIRTIO_NET_RSS_HASH_TYPE_UDPv6 (1 << 5)
|
||||
#define VIRTIO_NET_RSS_HASH_TYPE_IP_EX (1 << 6)
|
||||
#define VIRTIO_NET_RSS_HASH_TYPE_TCP_EX (1 << 7)
|
||||
#define VIRTIO_NET_RSS_HASH_TYPE_UDP_EX (1 << 8)
|
||||
|
||||
struct virtio_net_config {
|
||||
/* The config defining mac address (if VIRTIO_NET_F_MAC) */
|
||||
__u8 mac[ETH_ALEN];
|
||||
@ -92,6 +106,12 @@ struct virtio_net_config {
|
||||
* Any other value stands for unknown.
|
||||
*/
|
||||
__u8 duplex;
|
||||
/* maximum size of RSS key */
|
||||
__u8 rss_max_key_size;
|
||||
/* maximum number of indirection table entries */
|
||||
__le16 rss_max_indirection_table_length;
|
||||
/* bitmask of supported VIRTIO_NET_RSS_HASH_ types */
|
||||
__le32 supported_hash_types;
|
||||
} __attribute__((packed));
|
||||
|
||||
/*
|
||||
@ -104,6 +124,7 @@ struct virtio_net_config {
|
||||
struct virtio_net_hdr_v1 {
|
||||
#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
|
||||
#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
|
||||
#define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc info in csum_ fields */
|
||||
__u8 flags;
|
||||
#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
|
||||
#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
|
||||
@ -113,11 +134,46 @@ struct virtio_net_hdr_v1 {
|
||||
__u8 gso_type;
|
||||
__virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */
|
||||
__virtio16 gso_size; /* Bytes to append to hdr_len per frame */
|
||||
__virtio16 csum_start; /* Position to start checksumming from */
|
||||
__virtio16 csum_offset; /* Offset after that to place checksum */
|
||||
union {
|
||||
struct {
|
||||
__virtio16 csum_start;
|
||||
__virtio16 csum_offset;
|
||||
};
|
||||
/* Checksum calculation */
|
||||
struct {
|
||||
/* Position to start checksumming from */
|
||||
__virtio16 start;
|
||||
/* Offset after that to place checksum */
|
||||
__virtio16 offset;
|
||||
} csum;
|
||||
/* Receive Segment Coalescing */
|
||||
struct {
|
||||
/* Number of coalesced segments */
|
||||
__le16 segments;
|
||||
/* Number of duplicated acks */
|
||||
__le16 dup_acks;
|
||||
} rsc;
|
||||
};
|
||||
__virtio16 num_buffers; /* Number of merged rx buffers */
|
||||
};
|
||||
|
||||
struct virtio_net_hdr_v1_hash {
|
||||
struct virtio_net_hdr_v1 hdr;
|
||||
__le32 hash_value;
|
||||
#define VIRTIO_NET_HASH_REPORT_NONE 0
|
||||
#define VIRTIO_NET_HASH_REPORT_IPv4 1
|
||||
#define VIRTIO_NET_HASH_REPORT_TCPv4 2
|
||||
#define VIRTIO_NET_HASH_REPORT_UDPv4 3
|
||||
#define VIRTIO_NET_HASH_REPORT_IPv6 4
|
||||
#define VIRTIO_NET_HASH_REPORT_TCPv6 5
|
||||
#define VIRTIO_NET_HASH_REPORT_UDPv6 6
|
||||
#define VIRTIO_NET_HASH_REPORT_IPv6_EX 7
|
||||
#define VIRTIO_NET_HASH_REPORT_TCPv6_EX 8
|
||||
#define VIRTIO_NET_HASH_REPORT_UDPv6_EX 9
|
||||
__le16 hash_report;
|
||||
__le16 padding;
|
||||
};
|
||||
|
||||
#ifndef VIRTIO_NET_NO_LEGACY
|
||||
/* This header comes first in the scatter-gather list.
|
||||
* For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must
|
||||
@ -228,7 +284,9 @@ struct virtio_net_ctrl_mac {
|
||||
|
||||
/*
|
||||
* Control Receive Flow Steering
|
||||
*
|
||||
*/
|
||||
#define VIRTIO_NET_CTRL_MQ 4
|
||||
/*
|
||||
* The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
|
||||
* enables Receive Flow Steering, specifying the number of the transmit and
|
||||
* receive queues that will be used. After the command is consumed and acked by
|
||||
@ -241,11 +299,47 @@ struct virtio_net_ctrl_mq {
|
||||
__virtio16 virtqueue_pairs;
|
||||
};
|
||||
|
||||
#define VIRTIO_NET_CTRL_MQ 4
|
||||
#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0
|
||||
#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1
|
||||
#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000
|
||||
|
||||
/*
|
||||
* The command VIRTIO_NET_CTRL_MQ_RSS_CONFIG has the same effect as
|
||||
* VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET does and additionally configures
|
||||
* the receive steering to use a hash calculated for incoming packet
|
||||
* to decide on receive virtqueue to place the packet. The command
|
||||
* also provides parameters to calculate a hash and receive virtqueue.
|
||||
*/
|
||||
struct virtio_net_rss_config {
|
||||
__le32 hash_types;
|
||||
__le16 indirection_table_mask;
|
||||
__le16 unclassified_queue;
|
||||
__le16 indirection_table[1/* + indirection_table_mask */];
|
||||
__le16 max_tx_vq;
|
||||
__u8 hash_key_length;
|
||||
__u8 hash_key_data[/* hash_key_length */];
|
||||
};
|
||||
|
||||
#define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1
|
||||
|
||||
/*
|
||||
* The command VIRTIO_NET_CTRL_MQ_HASH_CONFIG requests the device
|
||||
* to include in the virtio header of the packet the value of the
|
||||
* calculated hash and the report type of hash. It also provides
|
||||
* parameters for hash calculation. The command requires feature
|
||||
* VIRTIO_NET_F_HASH_REPORT to be negotiated to extend the
|
||||
* layout of virtio header as defined in virtio_net_hdr_v1_hash.
|
||||
*/
|
||||
struct virtio_net_hash_config {
|
||||
__le32 hash_types;
|
||||
/* for compatibility with virtio_net_rss_config */
|
||||
__le16 reserved[4];
|
||||
__u8 hash_key_length;
|
||||
__u8 hash_key_data[/* hash_key_length */];
|
||||
};
|
||||
|
||||
#define VIRTIO_NET_CTRL_MQ_HASH_CONFIG 2
|
||||
|
||||
/*
|
||||
* Control network offloads
|
||||
*
|
||||
|
@ -8,7 +8,32 @@ CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-poin
|
||||
vpath %.c ../../drivers/virtio ../../drivers/vhost
|
||||
mod:
|
||||
${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V}
|
||||
.PHONY: all test mod clean
|
||||
|
||||
#oot: build vhost as an out of tree module for a distro kernel
|
||||
#no effort is taken to make it actually build or work, but tends to mostly work
|
||||
#if the distro kernel is very close to upstream
|
||||
#unsupported! this is a development tool only, don't use the
|
||||
#resulting modules in production!
|
||||
OOT_KSRC=/lib/modules/$$(uname -r)/build
|
||||
OOT_VHOST=`pwd`/../../drivers/vhost
|
||||
#Everyone depends on vhost
|
||||
#Tweak the below to enable more modules
|
||||
OOT_CONFIGS=\
|
||||
CONFIG_VHOST=m \
|
||||
CONFIG_VHOST_NET=n \
|
||||
CONFIG_VHOST_SCSI=n \
|
||||
CONFIG_VHOST_VSOCK=n
|
||||
OOT_BUILD=KCFLAGS="-I "${OOT_VHOST} ${MAKE} -C ${OOT_KSRC} V=${V}
|
||||
oot-build:
|
||||
echo "UNSUPPORTED! Don't use the resulting modules in production!"
|
||||
${OOT_BUILD} M=`pwd`/vhost_test
|
||||
${OOT_BUILD} M=${OOT_VHOST} ${OOT_CONFIGS}
|
||||
|
||||
oot-clean: oot-build
|
||||
oot: oot-build
|
||||
oot-clean: OOT_BUILD+=clean
|
||||
|
||||
.PHONY: all test mod clean vhost oot oot-clean oot-build
|
||||
clean:
|
||||
${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
|
||||
vhost_test/Module.symvers vhost_test/modules.order *.d
|
||||
|
Loading…
Reference in New Issue
Block a user