mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-23 20:24:12 +08:00
virtio,vhost,vdpa: features, fixes
Doorbell remapping for ifcvf, mlx5. virtio_vdpa support for mlx5. Validate device input in several drivers (for SEV and friends). ZONE_MOVABLE aware handling in virtio-mem. Misc fixes, cleanups. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmDm5jQPHG1zdEByZWRo YXQuY29tAAoJECgfDbjSjVRp6mYIAMTk5ggM5xdt6NCAASAigssEAoCTMorfoxkx i7O562TEejgLvYKx/EZnYF+YpmYGyWEY9AgxMPxP/nPRLszuf0nZSmMp5ivu/vMz zwpAto+7RpUmIQP+N6QjWabiWrpQI9EnXA47kOnyU703Y+RnITPNCvD1PpnDG3zs W2GdH7DKqwsCY22hB+zboH2D6HNf3gTuUtgUBYbdBnYVxdOsSd1dx9Te0EKUTV3y uvENmFEcushDRYpUhAsZm4bKcLOn+6rgNGXuXNa4R/hUlJTwrQjGmzu+ua6vfMwF dcGxdaeMJUo8o0C1Pz7wJBXF5UZXQlxoyBP+0b0ZTm69AwmIHMY= =o6A1 -----END PGP SIGNATURE----- Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost Pull virtio,vhost,vdpa updates from Michael Tsirkin: - Doorbell remapping for ifcvf, mlx5 - virtio_vdpa support for mlx5 - Validate device input in several drivers (for SEV and friends) - ZONE_MOVABLE aware handling in virtio-mem - Misc fixes, cleanups * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (48 commits) virtio-mem: prioritize unplug from ZONE_MOVABLE in Big Block Mode virtio-mem: simplify high-level unplug handling in Big Block Mode virtio-mem: prioritize unplug from ZONE_MOVABLE in Sub Block Mode virtio-mem: simplify high-level unplug handling in Sub Block Mode virtio-mem: simplify high-level plug handling in Sub Block Mode virtio-mem: use page_zonenum() in virtio_mem_fake_offline() virtio-mem: don't read big block size in Sub Block Mode virtio/vdpa: clear the virtqueue state during probe vp_vdpa: allow set vq state to initial state after reset virtio-pci library: introduce vp_modern_get_driver_features() vdpa: support packed virtqueue for set/get_vq_state() virtio-ring: store DMA metadata in desc_extra for split virtqueue virtio: use err label in __vring_new_virtqueue() virtio_ring: introduce virtqueue_desc_add_split() virtio_ring: secure handling of mapping errors virtio-ring: factor out desc_extra allocation virtio_ring: rename vring_desc_extra_packed virtio-ring: maintain next in extra state for packed virtqueue vdpa/mlx5: Clear vq ready indication upon device reset vdpa/mlx5: Add support for doorbell bypassing ...
This commit is contained in:
commit
1eb8df1867
@ -21,6 +21,9 @@
|
||||
#define VQ_NAME_LEN 16
|
||||
#define MAX_DISCARD_SEGMENTS 256u
|
||||
|
||||
/* The maximum number of sg elements that fit into a virtqueue */
|
||||
#define VIRTIO_BLK_MAX_SG_ELEMS 32768
|
||||
|
||||
static int major;
|
||||
static DEFINE_IDA(vd_index_ida);
|
||||
|
||||
@ -447,13 +450,6 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
|
||||
/* Host must always specify the capacity. */
|
||||
virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
|
||||
|
||||
/* If capacity is too big, truncate with warning. */
|
||||
if ((sector_t)capacity != capacity) {
|
||||
dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
|
||||
(unsigned long long)capacity);
|
||||
capacity = (sector_t)-1;
|
||||
}
|
||||
|
||||
nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9);
|
||||
|
||||
string_get_size(nblocks, queue_logical_block_size(q),
|
||||
@ -728,7 +724,10 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
if (err || !sg_elems)
|
||||
sg_elems = 1;
|
||||
|
||||
/* We need an extra sg elements at head and tail. */
|
||||
/* Prevent integer overflows and honor max vq size */
|
||||
sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);
|
||||
|
||||
/* We need extra sg elements at head and tail. */
|
||||
sg_elems += 2;
|
||||
vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
|
||||
if (!vblk) {
|
||||
@ -936,6 +935,8 @@ static int virtblk_freeze(struct virtio_device *vdev)
|
||||
blk_mq_quiesce_queue(vblk->disk->queue);
|
||||
|
||||
vdev->config->del_vqs(vdev);
|
||||
kfree(vblk->vqs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -475,7 +475,7 @@ static struct port_buffer *get_inbuf(struct port *port)
|
||||
|
||||
buf = virtqueue_get_buf(port->in_vq, &len);
|
||||
if (buf) {
|
||||
buf->len = len;
|
||||
buf->len = min_t(size_t, len, buf->size);
|
||||
buf->offset = 0;
|
||||
port->stats.bytes_received += len;
|
||||
}
|
||||
@ -1709,7 +1709,7 @@ static void control_work_handler(struct work_struct *work)
|
||||
while ((buf = virtqueue_get_buf(vq, &len))) {
|
||||
spin_unlock(&portdev->c_ivq_lock);
|
||||
|
||||
buf->len = len;
|
||||
buf->len = min_t(size_t, len, buf->size);
|
||||
buf->offset = 0;
|
||||
|
||||
handle_control_message(vq->vdev, portdev, buf);
|
||||
|
@ -1516,12 +1516,16 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
|
||||
return;
|
||||
|
||||
if (__netif_tx_trylock(txq)) {
|
||||
free_old_xmit_skbs(sq, true);
|
||||
do {
|
||||
virtqueue_disable_cb(sq->vq);
|
||||
free_old_xmit_skbs(sq, true);
|
||||
} while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
|
||||
|
||||
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
|
||||
netif_tx_wake_queue(txq);
|
||||
|
||||
__netif_tx_unlock(txq);
|
||||
}
|
||||
|
||||
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
|
||||
netif_tx_wake_queue(txq);
|
||||
}
|
||||
|
||||
static int virtnet_poll(struct napi_struct *napi, int budget)
|
||||
@ -1592,6 +1596,8 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
|
||||
struct virtnet_info *vi = sq->vq->vdev->priv;
|
||||
unsigned int index = vq2txq(sq->vq);
|
||||
struct netdev_queue *txq;
|
||||
int opaque;
|
||||
bool done;
|
||||
|
||||
if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
|
||||
/* We don't need to enable cb for XDP */
|
||||
@ -1601,14 +1607,32 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
|
||||
|
||||
txq = netdev_get_tx_queue(vi->dev, index);
|
||||
__netif_tx_lock(txq, raw_smp_processor_id());
|
||||
virtqueue_disable_cb(sq->vq);
|
||||
free_old_xmit_skbs(sq, true);
|
||||
__netif_tx_unlock(txq);
|
||||
|
||||
virtqueue_napi_complete(napi, sq->vq, 0);
|
||||
|
||||
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
|
||||
netif_tx_wake_queue(txq);
|
||||
|
||||
opaque = virtqueue_enable_cb_prepare(sq->vq);
|
||||
|
||||
done = napi_complete_done(napi, 0);
|
||||
|
||||
if (!done)
|
||||
virtqueue_disable_cb(sq->vq);
|
||||
|
||||
__netif_tx_unlock(txq);
|
||||
|
||||
if (done) {
|
||||
if (unlikely(virtqueue_poll(sq->vq, opaque))) {
|
||||
if (napi_schedule_prep(napi)) {
|
||||
__netif_tx_lock(txq, raw_smp_processor_id());
|
||||
virtqueue_disable_cb(sq->vq);
|
||||
__netif_tx_unlock(txq);
|
||||
__napi_schedule(napi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1670,10 +1694,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
bool use_napi = sq->napi.weight;
|
||||
|
||||
/* Free up any pending old buffers before queueing new ones. */
|
||||
free_old_xmit_skbs(sq, false);
|
||||
do {
|
||||
if (use_napi)
|
||||
virtqueue_disable_cb(sq->vq);
|
||||
|
||||
if (use_napi && kick)
|
||||
virtqueue_enable_cb_delayed(sq->vq);
|
||||
free_old_xmit_skbs(sq, false);
|
||||
|
||||
} while (use_napi && kick &&
|
||||
unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
|
||||
|
||||
/* timestamp packet in software */
|
||||
skb_tx_timestamp(skb);
|
||||
@ -3310,8 +3338,11 @@ static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
|
||||
virtnet_set_queues(vi, vi->curr_queue_pairs);
|
||||
|
||||
err = virtnet_cpu_notif_add(vi);
|
||||
if (err)
|
||||
if (err) {
|
||||
virtnet_freeze_down(vdev);
|
||||
remove_vq_common(vi);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -133,6 +133,8 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
|
||||
&hw->notify_off_multiplier);
|
||||
hw->notify_bar = cap.bar;
|
||||
hw->notify_base = get_cap_addr(hw, &cap);
|
||||
hw->notify_base_pa = pci_resource_start(pdev, cap.bar) +
|
||||
le32_to_cpu(cap.offset);
|
||||
IFCVF_DBG(pdev, "hw->notify_base = %p\n",
|
||||
hw->notify_base);
|
||||
break;
|
||||
@ -161,6 +163,8 @@ next:
|
||||
notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off);
|
||||
hw->vring[i].notify_addr = hw->notify_base +
|
||||
notify_off * hw->notify_off_multiplier;
|
||||
hw->vring[i].notify_pa = hw->notify_base_pa +
|
||||
notify_off * hw->notify_off_multiplier;
|
||||
}
|
||||
|
||||
hw->lm_cfg = hw->base[IFCVF_LM_BAR];
|
||||
|
@ -19,21 +19,9 @@
|
||||
#include <uapi/linux/virtio_config.h>
|
||||
#include <uapi/linux/virtio_pci.h>
|
||||
|
||||
#define N3000_VENDOR_ID 0x1AF4
|
||||
#define N3000_DEVICE_ID 0x1041
|
||||
#define N3000_SUBSYS_VENDOR_ID 0x8086
|
||||
#define N3000_SUBSYS_DEVICE_ID 0x001A
|
||||
|
||||
#define C5000X_PL_VENDOR_ID 0x1AF4
|
||||
#define C5000X_PL_DEVICE_ID 0x1000
|
||||
#define C5000X_PL_SUBSYS_VENDOR_ID 0x8086
|
||||
#define C5000X_PL_SUBSYS_DEVICE_ID 0x0001
|
||||
|
||||
#define C5000X_PL_BLK_VENDOR_ID 0x1AF4
|
||||
#define C5000X_PL_BLK_DEVICE_ID 0x1001
|
||||
#define C5000X_PL_BLK_SUBSYS_VENDOR_ID 0x8086
|
||||
#define C5000X_PL_BLK_SUBSYS_DEVICE_ID 0x0002
|
||||
|
||||
#define IFCVF_NET_SUPPORTED_FEATURES \
|
||||
((1ULL << VIRTIO_NET_F_MAC) | \
|
||||
(1ULL << VIRTIO_F_ANY_LAYOUT) | \
|
||||
@ -73,6 +61,7 @@ struct vring_info {
|
||||
u16 last_avail_idx;
|
||||
bool ready;
|
||||
void __iomem *notify_addr;
|
||||
phys_addr_t notify_pa;
|
||||
u32 irq;
|
||||
struct vdpa_callback cb;
|
||||
char msix_name[256];
|
||||
@ -87,6 +76,7 @@ struct ifcvf_hw {
|
||||
u8 notify_bar;
|
||||
/* Notificaiton bar address */
|
||||
void __iomem *notify_base;
|
||||
phys_addr_t notify_base_pa;
|
||||
u32 notify_off_multiplier;
|
||||
u64 req_features;
|
||||
u64 hw_features;
|
||||
|
@ -264,7 +264,7 @@ static int ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
state->avail_index = ifcvf_get_vq_state(vf, qid);
|
||||
state->split.avail_index = ifcvf_get_vq_state(vf, qid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -273,7 +273,7 @@ static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
|
||||
return ifcvf_set_vq_state(vf, qid, state->avail_index);
|
||||
return ifcvf_set_vq_state(vf, qid, state->split.avail_index);
|
||||
}
|
||||
|
||||
static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid,
|
||||
@ -413,6 +413,21 @@ static int ifcvf_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev,
|
||||
return vf->vring[qid].irq;
|
||||
}
|
||||
|
||||
static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_device *vdpa_dev,
|
||||
u16 idx)
|
||||
{
|
||||
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
|
||||
struct vdpa_notification_area area;
|
||||
|
||||
area.addr = vf->vring[idx].notify_pa;
|
||||
if (!vf->notify_off_multiplier)
|
||||
area.size = PAGE_SIZE;
|
||||
else
|
||||
area.size = vf->notify_off_multiplier;
|
||||
|
||||
return area;
|
||||
}
|
||||
|
||||
/*
|
||||
* IFCVF currently does't have on-chip IOMMU, so not
|
||||
* implemented set_map()/dma_map()/dma_unmap()
|
||||
@ -440,6 +455,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = {
|
||||
.get_config = ifcvf_vdpa_get_config,
|
||||
.set_config = ifcvf_vdpa_set_config,
|
||||
.set_config_cb = ifcvf_vdpa_set_config_cb,
|
||||
.get_vq_notification = ifcvf_get_vq_notification,
|
||||
};
|
||||
|
||||
static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
@ -536,18 +552,21 @@ static void ifcvf_remove(struct pci_dev *pdev)
|
||||
}
|
||||
|
||||
static struct pci_device_id ifcvf_pci_ids[] = {
|
||||
{ PCI_DEVICE_SUB(N3000_VENDOR_ID,
|
||||
/* N3000 network device */
|
||||
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET,
|
||||
N3000_DEVICE_ID,
|
||||
N3000_SUBSYS_VENDOR_ID,
|
||||
PCI_VENDOR_ID_INTEL,
|
||||
N3000_SUBSYS_DEVICE_ID) },
|
||||
{ PCI_DEVICE_SUB(C5000X_PL_VENDOR_ID,
|
||||
C5000X_PL_DEVICE_ID,
|
||||
C5000X_PL_SUBSYS_VENDOR_ID,
|
||||
C5000X_PL_SUBSYS_DEVICE_ID) },
|
||||
{ PCI_DEVICE_SUB(C5000X_PL_BLK_VENDOR_ID,
|
||||
C5000X_PL_BLK_DEVICE_ID,
|
||||
C5000X_PL_BLK_SUBSYS_VENDOR_ID,
|
||||
C5000X_PL_BLK_SUBSYS_DEVICE_ID) },
|
||||
/* C5000X-PL network device */
|
||||
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET,
|
||||
VIRTIO_TRANS_ID_NET,
|
||||
PCI_VENDOR_ID_INTEL,
|
||||
VIRTIO_ID_NET) },
|
||||
/* C5000X-PL block device */
|
||||
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET,
|
||||
VIRTIO_TRANS_ID_BLOCK,
|
||||
PCI_VENDOR_ID_INTEL,
|
||||
VIRTIO_ID_BLOCK) },
|
||||
|
||||
{ 0 },
|
||||
};
|
||||
|
@ -35,12 +35,14 @@ struct mlx5_vdpa_mr {
|
||||
|
||||
/* serialize mkey creation and destruction */
|
||||
struct mutex mkey_mtx;
|
||||
bool user_mr;
|
||||
};
|
||||
|
||||
struct mlx5_vdpa_resources {
|
||||
u32 pdn;
|
||||
struct mlx5_uars_page *uar;
|
||||
void __iomem *kick_addr;
|
||||
u64 phys_kick_addr;
|
||||
u16 uid;
|
||||
u32 null_mkey;
|
||||
bool valid;
|
||||
|
@ -219,11 +219,6 @@ static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_m
|
||||
mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey);
|
||||
}
|
||||
|
||||
static struct device *get_dma_device(struct mlx5_vdpa_dev *mvdev)
|
||||
{
|
||||
return &mvdev->mdev->pdev->dev;
|
||||
}
|
||||
|
||||
static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
|
||||
struct vhost_iotlb *iotlb)
|
||||
{
|
||||
@ -239,7 +234,7 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
|
||||
u64 pa;
|
||||
u64 paend;
|
||||
struct scatterlist *sg;
|
||||
struct device *dma = get_dma_device(mvdev);
|
||||
struct device *dma = mvdev->vdev.dma_dev;
|
||||
|
||||
for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
|
||||
map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
|
||||
@ -298,7 +293,7 @@ err_map:
|
||||
|
||||
static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
|
||||
{
|
||||
struct device *dma = get_dma_device(mvdev);
|
||||
struct device *dma = mvdev->vdev.dma_dev;
|
||||
|
||||
destroy_direct_mr(mvdev, mr);
|
||||
dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
|
||||
@ -360,7 +355,7 @@ err_alloc:
|
||||
* indirect memory key that provides access to the enitre address space given
|
||||
* by iotlb.
|
||||
*/
|
||||
static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
|
||||
static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
|
||||
{
|
||||
struct mlx5_vdpa_mr *mr = &mvdev->mr;
|
||||
struct mlx5_vdpa_direct_mr *dmr;
|
||||
@ -374,9 +369,6 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
|
||||
int err = 0;
|
||||
int nnuls;
|
||||
|
||||
if (mr->initialized)
|
||||
return 0;
|
||||
|
||||
INIT_LIST_HEAD(&mr->head);
|
||||
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
|
||||
map = vhost_iotlb_itree_next(map, start, last)) {
|
||||
@ -414,7 +406,7 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
|
||||
if (err)
|
||||
goto err_chain;
|
||||
|
||||
mr->initialized = true;
|
||||
mr->user_mr = true;
|
||||
return 0;
|
||||
|
||||
err_chain:
|
||||
@ -426,33 +418,94 @@ err_chain:
|
||||
return err;
|
||||
}
|
||||
|
||||
int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
|
||||
static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
|
||||
{
|
||||
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
|
||||
void *mkc;
|
||||
u32 *in;
|
||||
int err;
|
||||
|
||||
in = kzalloc(inlen, GFP_KERNEL);
|
||||
if (!in)
|
||||
return -ENOMEM;
|
||||
|
||||
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
|
||||
|
||||
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
|
||||
MLX5_SET(mkc, mkc, length64, 1);
|
||||
MLX5_SET(mkc, mkc, lw, 1);
|
||||
MLX5_SET(mkc, mkc, lr, 1);
|
||||
MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
|
||||
MLX5_SET(mkc, mkc, qpn, 0xffffff);
|
||||
|
||||
err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
|
||||
if (!err)
|
||||
mr->user_mr = false;
|
||||
|
||||
kfree(in);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
|
||||
{
|
||||
mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
|
||||
}
|
||||
|
||||
static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
|
||||
{
|
||||
struct mlx5_vdpa_mr *mr = &mvdev->mr;
|
||||
int err;
|
||||
|
||||
mutex_lock(&mr->mkey_mtx);
|
||||
err = _mlx5_vdpa_create_mr(mvdev, iotlb);
|
||||
mutex_unlock(&mr->mkey_mtx);
|
||||
if (mr->initialized)
|
||||
return 0;
|
||||
|
||||
if (iotlb)
|
||||
err = create_user_mr(mvdev, iotlb);
|
||||
else
|
||||
err = create_dma_mr(mvdev, mr);
|
||||
|
||||
if (!err)
|
||||
mr->initialized = true;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
|
||||
int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
|
||||
{
|
||||
int err;
|
||||
|
||||
mutex_lock(&mvdev->mr.mkey_mtx);
|
||||
err = _mlx5_vdpa_create_mr(mvdev, iotlb);
|
||||
mutex_unlock(&mvdev->mr.mkey_mtx);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
|
||||
{
|
||||
struct mlx5_vdpa_mr *mr = &mvdev->mr;
|
||||
struct mlx5_vdpa_direct_mr *dmr;
|
||||
struct mlx5_vdpa_direct_mr *n;
|
||||
|
||||
mutex_lock(&mr->mkey_mtx);
|
||||
if (!mr->initialized)
|
||||
goto out;
|
||||
|
||||
destroy_indirect_key(mvdev, mr);
|
||||
list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
|
||||
list_del_init(&dmr->list);
|
||||
unmap_direct_mr(mvdev, dmr);
|
||||
kfree(dmr);
|
||||
}
|
||||
}
|
||||
|
||||
void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
|
||||
{
|
||||
struct mlx5_vdpa_mr *mr = &mvdev->mr;
|
||||
|
||||
mutex_lock(&mr->mkey_mtx);
|
||||
if (!mr->initialized)
|
||||
goto out;
|
||||
|
||||
if (mr->user_mr)
|
||||
destroy_user_mr(mvdev, mr);
|
||||
else
|
||||
destroy_dma_mr(mvdev, mr);
|
||||
|
||||
memset(mr, 0, sizeof(*mr));
|
||||
mr->initialized = false;
|
||||
out:
|
||||
|
@ -54,6 +54,9 @@ static int create_uctx(struct mlx5_vdpa_dev *mvdev, u16 *uid)
|
||||
void *in;
|
||||
int err;
|
||||
|
||||
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0))
|
||||
return 0;
|
||||
|
||||
/* 0 means not supported */
|
||||
if (!MLX5_CAP_GEN(mvdev->mdev, log_max_uctx))
|
||||
return -EOPNOTSUPP;
|
||||
@ -79,6 +82,9 @@ static void destroy_uctx(struct mlx5_vdpa_dev *mvdev, u32 uid)
|
||||
u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {};
|
||||
u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {};
|
||||
|
||||
if (!uid)
|
||||
return;
|
||||
|
||||
MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
|
||||
MLX5_SET(destroy_uctx_in, in, uid, uid);
|
||||
|
||||
@ -247,6 +253,7 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
|
||||
goto err_key;
|
||||
|
||||
kick_addr = mdev->bar_addr + offset;
|
||||
res->phys_kick_addr = kick_addr;
|
||||
|
||||
res->kick_addr = ioremap(kick_addr, PAGE_SIZE);
|
||||
if (!res->kick_addr) {
|
||||
|
@ -611,8 +611,8 @@ static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
|
||||
mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
|
||||
}
|
||||
|
||||
static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
|
||||
struct mlx5_vdpa_umem **umemp)
|
||||
static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
|
||||
struct mlx5_vdpa_umem **umemp)
|
||||
{
|
||||
struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
|
||||
int p_a;
|
||||
@ -635,7 +635,7 @@ static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq
|
||||
*umemp = &mvq->umem3;
|
||||
break;
|
||||
}
|
||||
return p_a * mvq->num_ent + p_b;
|
||||
(*umemp)->size = p_a * mvq->num_ent + p_b;
|
||||
}
|
||||
|
||||
static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
|
||||
@ -651,15 +651,10 @@ static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m
|
||||
void *in;
|
||||
int err;
|
||||
__be64 *pas;
|
||||
int size;
|
||||
struct mlx5_vdpa_umem *umem;
|
||||
|
||||
size = umem_size(ndev, mvq, num, &umem);
|
||||
if (size < 0)
|
||||
return size;
|
||||
|
||||
umem->size = size;
|
||||
err = umem_frag_buf_alloc(ndev, umem, size);
|
||||
set_umem_size(ndev, mvq, num, &umem);
|
||||
err = umem_frag_buf_alloc(ndev, umem, umem->size);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -829,9 +824,9 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
|
||||
MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
|
||||
MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
|
||||
MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
|
||||
MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size);
|
||||
MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
|
||||
MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
|
||||
MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size);
|
||||
MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
|
||||
MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
|
||||
if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
|
||||
MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
|
||||
@ -1428,8 +1423,8 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mvq->used_idx = state->avail_index;
|
||||
mvq->avail_idx = state->avail_index;
|
||||
mvq->used_idx = state->split.avail_index;
|
||||
mvq->avail_idx = state->split.avail_index;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1450,7 +1445,7 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
|
||||
* Since both values should be identical, we take the value of
|
||||
* used_idx which is reported correctly.
|
||||
*/
|
||||
state->avail_index = mvq->used_idx;
|
||||
state->split.avail_index = mvq->used_idx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1459,7 +1454,7 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
|
||||
mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
|
||||
return err;
|
||||
}
|
||||
state->avail_index = attr.used_index;
|
||||
state->split.avail_index = attr.used_index;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1772,6 +1767,14 @@ out:
|
||||
mutex_unlock(&ndev->reslock);
|
||||
}
|
||||
|
||||
static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ndev->mvdev.max_vqs; i++)
|
||||
ndev->vqs[i].ready = false;
|
||||
}
|
||||
|
||||
static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
@ -1782,10 +1785,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
|
||||
if (!status) {
|
||||
mlx5_vdpa_info(mvdev, "performing device reset\n");
|
||||
teardown_driver(ndev);
|
||||
clear_vqs_ready(ndev);
|
||||
mlx5_vdpa_destroy_mr(&ndev->mvdev);
|
||||
ndev->mvdev.status = 0;
|
||||
ndev->mvdev.mlx_features = 0;
|
||||
++mvdev->generation;
|
||||
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
|
||||
if (mlx5_vdpa_create_mr(mvdev, NULL))
|
||||
mlx5_vdpa_warn(mvdev, "create MR failed\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1866,6 +1874,7 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
|
||||
ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
|
||||
free_resources(ndev);
|
||||
mlx5_vdpa_destroy_mr(mvdev);
|
||||
if (!is_zero_ether_addr(ndev->config.mac)) {
|
||||
pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
|
||||
mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
|
||||
@ -1876,8 +1885,22 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
|
||||
|
||||
static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
struct vdpa_notification_area ret = {};
|
||||
struct mlx5_vdpa_net *ndev;
|
||||
phys_addr_t addr;
|
||||
|
||||
/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
|
||||
* notification to avoid the risk of mapping pages that contain BAR of more
|
||||
* than one SF
|
||||
*/
|
||||
if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
|
||||
return ret;
|
||||
|
||||
ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
|
||||
ret.addr = addr;
|
||||
ret.size = PAGE_SIZE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2037,14 +2060,20 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
|
||||
goto err_mtu;
|
||||
}
|
||||
|
||||
mvdev->vdev.dma_dev = mdev->device;
|
||||
mvdev->vdev.dma_dev = &mdev->pdev->dev;
|
||||
err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
|
||||
if (err)
|
||||
goto err_mpfs;
|
||||
|
||||
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
|
||||
err = mlx5_vdpa_create_mr(mvdev, NULL);
|
||||
if (err)
|
||||
goto err_res;
|
||||
}
|
||||
|
||||
err = alloc_resources(ndev);
|
||||
if (err)
|
||||
goto err_res;
|
||||
goto err_mr;
|
||||
|
||||
mvdev->vdev.mdev = &mgtdev->mgtdev;
|
||||
err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
|
||||
@ -2056,6 +2085,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
|
||||
|
||||
err_reg:
|
||||
free_resources(ndev);
|
||||
err_mr:
|
||||
mlx5_vdpa_destroy_mr(mvdev);
|
||||
err_res:
|
||||
mlx5_vdpa_free_resources(&ndev->mvdev);
|
||||
err_mpfs:
|
||||
|
@ -374,7 +374,7 @@ static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx,
|
||||
struct vringh *vrh = &vq->vring;
|
||||
|
||||
spin_lock(&vdpasim->lock);
|
||||
vrh->last_avail_idx = state->avail_index;
|
||||
vrh->last_avail_idx = state->split.avail_index;
|
||||
spin_unlock(&vdpasim->lock);
|
||||
|
||||
return 0;
|
||||
@ -387,7 +387,7 @@ static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx,
|
||||
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
|
||||
struct vringh *vrh = &vq->vring;
|
||||
|
||||
state->avail_index = vrh->last_avail_idx;
|
||||
state->split.avail_index = vrh->last_avail_idx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/vringh.h>
|
||||
#include <linux/vdpa.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <uapi/linux/virtio_blk.h>
|
||||
|
||||
#include "vdpa_sim.h"
|
||||
|
@ -210,13 +210,49 @@ static int vp_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid,
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int vp_vdpa_set_vq_state_split(struct vdpa_device *vdpa,
|
||||
const struct vdpa_vq_state *state)
|
||||
{
|
||||
const struct vdpa_vq_state_split *split = &state->split;
|
||||
|
||||
if (split->avail_index == 0)
|
||||
return 0;
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int vp_vdpa_set_vq_state_packed(struct vdpa_device *vdpa,
|
||||
const struct vdpa_vq_state *state)
|
||||
{
|
||||
const struct vdpa_vq_state_packed *packed = &state->packed;
|
||||
|
||||
if (packed->last_avail_counter == 1 &&
|
||||
packed->last_avail_idx == 0 &&
|
||||
packed->last_used_counter == 1 &&
|
||||
packed->last_used_idx == 0)
|
||||
return 0;
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int vp_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid,
|
||||
const struct vdpa_vq_state *state)
|
||||
{
|
||||
/* Note that this is not supported by virtio specification, so
|
||||
* we return -ENOPOTSUPP here. This means we can't support live
|
||||
* migration, vhost device start/stop.
|
||||
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
|
||||
|
||||
/* Note that this is not supported by virtio specification.
|
||||
* But if the state is by chance equal to the device initial
|
||||
* state, we can let it go.
|
||||
*/
|
||||
if ((vp_modern_get_status(mdev) & VIRTIO_CONFIG_S_FEATURES_OK) &&
|
||||
!vp_modern_get_queue_enable(mdev, qid)) {
|
||||
if (vp_modern_get_driver_features(mdev) &
|
||||
BIT_ULL(VIRTIO_F_RING_PACKED))
|
||||
return vp_vdpa_set_vq_state_packed(vdpa, state);
|
||||
else
|
||||
return vp_vdpa_set_vq_state_split(vdpa, state);
|
||||
}
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
@ -442,6 +478,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
vp_modern_map_vq_notify(mdev, i,
|
||||
&vp_vdpa->vring[i].notify_pa);
|
||||
if (!vp_vdpa->vring[i].notify) {
|
||||
ret = -EINVAL;
|
||||
dev_warn(&pdev->dev, "Fail to map vq notify %d\n", i);
|
||||
goto err;
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
|
||||
EXPORT_SYMBOL_GPL(vhost_iotlb_add_range);
|
||||
|
||||
/**
|
||||
* vring_iotlb_del_range - delete overlapped ranges from vhost IOTLB
|
||||
* vhost_iotlb_del_range - delete overlapped ranges from vhost IOTLB
|
||||
* @iotlb: the IOTLB
|
||||
* @start: start of the IOVA range
|
||||
* @last: last of IOVA range
|
||||
|
@ -1430,11 +1430,6 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
|
||||
vhost_scsi_handle_vq(vs, vq);
|
||||
}
|
||||
|
||||
static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
|
||||
{
|
||||
vhost_poll_flush(&vs->vqs[index].vq.poll);
|
||||
}
|
||||
|
||||
/* Callers must hold dev mutex */
|
||||
static void vhost_scsi_flush(struct vhost_scsi *vs)
|
||||
{
|
||||
@ -1453,10 +1448,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
|
||||
kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight);
|
||||
|
||||
/* Flush both the vhost poll and vhost work */
|
||||
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
|
||||
vhost_scsi_flush_vq(vs, i);
|
||||
vhost_work_flush(&vs->dev, &vs->vs_completion_work);
|
||||
vhost_work_flush(&vs->dev, &vs->vs_event_work);
|
||||
vhost_work_dev_flush(&vs->dev);
|
||||
|
||||
/* Wait for all reqs issued before the flush to be finished */
|
||||
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
|
||||
@ -1740,11 +1732,12 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
|
||||
mutex_lock(&vq->mutex);
|
||||
vhost_vq_set_backend(vq, NULL);
|
||||
mutex_unlock(&vq->mutex);
|
||||
/*
|
||||
* Make sure cmds are not running before tearing them
|
||||
* down.
|
||||
*/
|
||||
vhost_scsi_flush(vs);
|
||||
}
|
||||
/* Make sure cmds are not running before tearing them down. */
|
||||
vhost_scsi_flush(vs);
|
||||
|
||||
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
|
||||
vq = &vs->vqs[i].vq;
|
||||
vhost_scsi_destroy_vq_cmds(vq);
|
||||
}
|
||||
}
|
||||
|
@ -383,7 +383,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
vq->last_avail_idx = vq_state.avail_index;
|
||||
vq->last_avail_idx = vq_state.split.avail_index;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -401,7 +401,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
|
||||
break;
|
||||
|
||||
case VHOST_SET_VRING_BASE:
|
||||
vq_state.avail_index = vq->last_avail_idx;
|
||||
vq_state.split.avail_index = vq->last_avail_idx;
|
||||
if (ops->set_vq_state(vdpa, idx, &vq_state))
|
||||
r = -EINVAL;
|
||||
break;
|
||||
|
@ -231,7 +231,7 @@ void vhost_poll_stop(struct vhost_poll *poll)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_poll_stop);
|
||||
|
||||
void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
|
||||
void vhost_work_dev_flush(struct vhost_dev *dev)
|
||||
{
|
||||
struct vhost_flush_struct flush;
|
||||
|
||||
@ -243,13 +243,13 @@ void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
|
||||
wait_for_completion(&flush.wait_event);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_work_flush);
|
||||
EXPORT_SYMBOL_GPL(vhost_work_dev_flush);
|
||||
|
||||
/* Flush any work that has been scheduled. When calling this, don't hold any
|
||||
* locks that are also used by the callback. */
|
||||
void vhost_poll_flush(struct vhost_poll *poll)
|
||||
{
|
||||
vhost_work_flush(poll->dev, &poll->work);
|
||||
vhost_work_dev_flush(poll->dev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_poll_flush);
|
||||
|
||||
@ -538,7 +538,7 @@ static int vhost_attach_cgroups(struct vhost_dev *dev)
|
||||
attach.owner = current;
|
||||
vhost_work_init(&attach.work, vhost_attach_cgroups_work);
|
||||
vhost_work_queue(dev, &attach.work);
|
||||
vhost_work_flush(dev, &attach.work);
|
||||
vhost_work_dev_flush(dev);
|
||||
return attach.ret;
|
||||
}
|
||||
|
||||
|
@ -20,20 +20,20 @@ typedef void (*vhost_work_fn_t)(struct vhost_work *work);
|
||||
|
||||
#define VHOST_WORK_QUEUED 1
|
||||
struct vhost_work {
|
||||
struct llist_node node;
|
||||
vhost_work_fn_t fn;
|
||||
unsigned long flags;
|
||||
struct llist_node node;
|
||||
vhost_work_fn_t fn;
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
/* Poll a file (eventfd or socket) */
|
||||
/* Note: there's nothing vhost specific about this structure. */
|
||||
struct vhost_poll {
|
||||
poll_table table;
|
||||
wait_queue_head_t *wqh;
|
||||
wait_queue_entry_t wait;
|
||||
struct vhost_work work;
|
||||
__poll_t mask;
|
||||
struct vhost_dev *dev;
|
||||
poll_table table;
|
||||
wait_queue_head_t *wqh;
|
||||
wait_queue_entry_t wait;
|
||||
struct vhost_work work;
|
||||
__poll_t mask;
|
||||
struct vhost_dev *dev;
|
||||
};
|
||||
|
||||
void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
|
||||
@ -46,8 +46,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file);
|
||||
void vhost_poll_stop(struct vhost_poll *poll);
|
||||
void vhost_poll_flush(struct vhost_poll *poll);
|
||||
void vhost_poll_queue(struct vhost_poll *poll);
|
||||
void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work);
|
||||
long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp);
|
||||
void vhost_work_dev_flush(struct vhost_dev *dev);
|
||||
|
||||
struct vhost_log {
|
||||
u64 addr;
|
||||
|
@ -708,7 +708,7 @@ static void vhost_vsock_flush(struct vhost_vsock *vsock)
|
||||
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
|
||||
if (vsock->vqs[i].handle_kick)
|
||||
vhost_poll_flush(&vsock->vqs[i].poll);
|
||||
vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
|
||||
vhost_work_dev_flush(&vsock->dev);
|
||||
}
|
||||
|
||||
static void vhost_vsock_reset_orphans(struct sock *sk)
|
||||
|
@ -75,10 +75,14 @@ enum virtio_mem_sbm_mb_state {
|
||||
VIRTIO_MEM_SBM_MB_OFFLINE,
|
||||
/* Partially plugged, fully added to Linux, offline. */
|
||||
VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
|
||||
/* Fully plugged, fully added to Linux, online. */
|
||||
VIRTIO_MEM_SBM_MB_ONLINE,
|
||||
/* Partially plugged, fully added to Linux, online. */
|
||||
VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL,
|
||||
/* Fully plugged, fully added to Linux, onlined to a kernel zone. */
|
||||
VIRTIO_MEM_SBM_MB_KERNEL,
|
||||
/* Partially plugged, fully added to Linux, online to a kernel zone */
|
||||
VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
|
||||
/* Fully plugged, fully added to Linux, onlined to ZONE_MOVABLE. */
|
||||
VIRTIO_MEM_SBM_MB_MOVABLE,
|
||||
/* Partially plugged, fully added to Linux, onlined to ZONE_MOVABLE. */
|
||||
VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
|
||||
VIRTIO_MEM_SBM_MB_COUNT
|
||||
};
|
||||
|
||||
@ -698,18 +702,6 @@ static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id)
|
||||
return virtio_mem_remove_memory(vm, addr, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* See virtio_mem_remove_memory(): Try to remove all Linux memory blocks covered
|
||||
* by the big block.
|
||||
*/
|
||||
static int virtio_mem_bbm_remove_bb(struct virtio_mem *vm, unsigned long bb_id)
|
||||
{
|
||||
const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
|
||||
const uint64_t size = vm->bbm.bb_size;
|
||||
|
||||
return virtio_mem_remove_memory(vm, addr, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Try offlining and removing memory from Linux.
|
||||
*
|
||||
@ -832,11 +824,13 @@ static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm,
|
||||
unsigned long mb_id)
|
||||
{
|
||||
switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
|
||||
case VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL:
|
||||
case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL:
|
||||
case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL:
|
||||
virtio_mem_sbm_set_mb_state(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
|
||||
break;
|
||||
case VIRTIO_MEM_SBM_MB_ONLINE:
|
||||
case VIRTIO_MEM_SBM_MB_KERNEL:
|
||||
case VIRTIO_MEM_SBM_MB_MOVABLE:
|
||||
virtio_mem_sbm_set_mb_state(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_OFFLINE);
|
||||
break;
|
||||
@ -847,21 +841,29 @@ static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm,
|
||||
}
|
||||
|
||||
static void virtio_mem_sbm_notify_online(struct virtio_mem *vm,
|
||||
unsigned long mb_id)
|
||||
unsigned long mb_id,
|
||||
unsigned long start_pfn)
|
||||
{
|
||||
const bool is_movable = page_zonenum(pfn_to_page(start_pfn)) ==
|
||||
ZONE_MOVABLE;
|
||||
int new_state;
|
||||
|
||||
switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
|
||||
case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
|
||||
virtio_mem_sbm_set_mb_state(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL);
|
||||
new_state = VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL;
|
||||
if (is_movable)
|
||||
new_state = VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL;
|
||||
break;
|
||||
case VIRTIO_MEM_SBM_MB_OFFLINE:
|
||||
virtio_mem_sbm_set_mb_state(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_ONLINE);
|
||||
new_state = VIRTIO_MEM_SBM_MB_KERNEL;
|
||||
if (is_movable)
|
||||
new_state = VIRTIO_MEM_SBM_MB_MOVABLE;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
virtio_mem_sbm_set_mb_state(vm, mb_id, new_state);
|
||||
}
|
||||
|
||||
static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm,
|
||||
@ -1015,7 +1017,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
|
||||
break;
|
||||
case MEM_ONLINE:
|
||||
if (vm->in_sbm)
|
||||
virtio_mem_sbm_notify_online(vm, id);
|
||||
virtio_mem_sbm_notify_online(vm, id, mhp->start_pfn);
|
||||
|
||||
atomic64_sub(size, &vm->offline_size);
|
||||
/*
|
||||
@ -1137,7 +1139,7 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages)
|
||||
*/
|
||||
static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages)
|
||||
{
|
||||
const bool is_movable = zone_idx(page_zone(pfn_to_page(pfn))) ==
|
||||
const bool is_movable = page_zonenum(pfn_to_page(pfn)) ==
|
||||
ZONE_MOVABLE;
|
||||
int rc, retry_count;
|
||||
|
||||
@ -1455,8 +1457,8 @@ static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id)
|
||||
*
|
||||
* Note: can fail after some subblocks were unplugged.
|
||||
*/
|
||||
static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm,
|
||||
unsigned long mb_id, uint64_t *nb_sb)
|
||||
static int virtio_mem_sbm_unplug_any_sb_raw(struct virtio_mem *vm,
|
||||
unsigned long mb_id, uint64_t *nb_sb)
|
||||
{
|
||||
int sb_id, count;
|
||||
int rc;
|
||||
@ -1498,7 +1500,7 @@ static int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id)
|
||||
{
|
||||
uint64_t nb_sb = vm->sbm.sbs_per_mb;
|
||||
|
||||
return virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb);
|
||||
return virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, &nb_sb);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1585,9 +1587,9 @@ static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm,
|
||||
* Note: Can fail after some subblocks were successfully plugged.
|
||||
*/
|
||||
static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
|
||||
unsigned long mb_id, uint64_t *nb_sb,
|
||||
bool online)
|
||||
unsigned long mb_id, uint64_t *nb_sb)
|
||||
{
|
||||
const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
|
||||
unsigned long pfn, nr_pages;
|
||||
int sb_id, count;
|
||||
int rc;
|
||||
@ -1609,7 +1611,7 @@ static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
|
||||
if (rc)
|
||||
return rc;
|
||||
*nb_sb -= count;
|
||||
if (!online)
|
||||
if (old_state == VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL)
|
||||
continue;
|
||||
|
||||
/* fake-online the pages if the memory block is online */
|
||||
@ -1619,23 +1621,22 @@ static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
|
||||
virtio_mem_fake_online(pfn, nr_pages);
|
||||
}
|
||||
|
||||
if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
|
||||
if (online)
|
||||
virtio_mem_sbm_set_mb_state(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_ONLINE);
|
||||
else
|
||||
virtio_mem_sbm_set_mb_state(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_OFFLINE);
|
||||
}
|
||||
if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
|
||||
virtio_mem_sbm_set_mb_state(vm, mb_id, old_state - 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff)
|
||||
{
|
||||
const int mb_states[] = {
|
||||
VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
|
||||
VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
|
||||
VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
|
||||
};
|
||||
uint64_t nb_sb = diff / vm->sbm.sb_size;
|
||||
unsigned long mb_id;
|
||||
int rc;
|
||||
int rc, i;
|
||||
|
||||
if (!nb_sb)
|
||||
return 0;
|
||||
@ -1643,22 +1644,13 @@ static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff)
|
||||
/* Don't race with onlining/offlining */
|
||||
mutex_lock(&vm->hotplug_mutex);
|
||||
|
||||
/* Try to plug subblocks of partially plugged online blocks. */
|
||||
virtio_mem_sbm_for_each_mb(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) {
|
||||
rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, true);
|
||||
if (rc || !nb_sb)
|
||||
goto out_unlock;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
/* Try to plug subblocks of partially plugged offline blocks. */
|
||||
virtio_mem_sbm_for_each_mb(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
|
||||
rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, false);
|
||||
if (rc || !nb_sb)
|
||||
goto out_unlock;
|
||||
cond_resched();
|
||||
for (i = 0; i < ARRAY_SIZE(mb_states); i++) {
|
||||
virtio_mem_sbm_for_each_mb(vm, mb_id, mb_states[i]) {
|
||||
rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb);
|
||||
if (rc || !nb_sb)
|
||||
goto out_unlock;
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1819,7 +1811,7 @@ static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm,
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, nb_sb);
|
||||
rc = virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, nb_sb);
|
||||
|
||||
/* some subblocks might have been unplugged even on failure */
|
||||
if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
|
||||
@ -1856,6 +1848,7 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm,
|
||||
int count)
|
||||
{
|
||||
const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count;
|
||||
const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
|
||||
unsigned long start_pfn;
|
||||
int rc;
|
||||
|
||||
@ -1874,8 +1867,17 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm,
|
||||
return rc;
|
||||
}
|
||||
|
||||
virtio_mem_sbm_set_mb_state(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL);
|
||||
switch (old_state) {
|
||||
case VIRTIO_MEM_SBM_MB_KERNEL:
|
||||
virtio_mem_sbm_set_mb_state(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL);
|
||||
break;
|
||||
case VIRTIO_MEM_SBM_MB_MOVABLE:
|
||||
virtio_mem_sbm_set_mb_state(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1942,11 +1944,50 @@ unplugged:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unplug the desired number of plugged subblocks of a memory block that is
|
||||
* already added to Linux. Will skip subblock of online memory blocks that are
|
||||
* busy (by the OS). Will fail if any subblock that's not busy cannot get
|
||||
* unplugged.
|
||||
*
|
||||
* Will modify the state of the memory block. Might temporarily drop the
|
||||
* hotplug_mutex.
|
||||
*
|
||||
* Note: Can fail after some subblocks were successfully unplugged. Can
|
||||
* return 0 even if subblocks were busy and could not get unplugged.
|
||||
*/
|
||||
static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm,
|
||||
unsigned long mb_id,
|
||||
uint64_t *nb_sb)
|
||||
{
|
||||
const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
|
||||
|
||||
switch (old_state) {
|
||||
case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL:
|
||||
case VIRTIO_MEM_SBM_MB_KERNEL:
|
||||
case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL:
|
||||
case VIRTIO_MEM_SBM_MB_MOVABLE:
|
||||
return virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, nb_sb);
|
||||
case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
|
||||
case VIRTIO_MEM_SBM_MB_OFFLINE:
|
||||
return virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, nb_sb);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
|
||||
{
|
||||
const int mb_states[] = {
|
||||
VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
|
||||
VIRTIO_MEM_SBM_MB_OFFLINE,
|
||||
VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
|
||||
VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
|
||||
VIRTIO_MEM_SBM_MB_MOVABLE,
|
||||
VIRTIO_MEM_SBM_MB_KERNEL,
|
||||
};
|
||||
uint64_t nb_sb = diff / vm->sbm.sb_size;
|
||||
unsigned long mb_id;
|
||||
int rc;
|
||||
int rc, i;
|
||||
|
||||
if (!nb_sb)
|
||||
return 0;
|
||||
@ -1958,47 +1999,26 @@ static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
|
||||
*/
|
||||
mutex_lock(&vm->hotplug_mutex);
|
||||
|
||||
/* Try to unplug subblocks of partially plugged offline blocks. */
|
||||
virtio_mem_sbm_for_each_mb_rev(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
|
||||
rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb);
|
||||
if (rc || !nb_sb)
|
||||
goto out_unlock;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
/* Try to unplug subblocks of plugged offline blocks. */
|
||||
virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE) {
|
||||
rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb);
|
||||
if (rc || !nb_sb)
|
||||
goto out_unlock;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (!unplug_online) {
|
||||
mutex_unlock(&vm->hotplug_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Try to unplug subblocks of partially plugged online blocks. */
|
||||
virtio_mem_sbm_for_each_mb_rev(vm, mb_id,
|
||||
VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) {
|
||||
rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb);
|
||||
if (rc || !nb_sb)
|
||||
goto out_unlock;
|
||||
mutex_unlock(&vm->hotplug_mutex);
|
||||
cond_resched();
|
||||
mutex_lock(&vm->hotplug_mutex);
|
||||
}
|
||||
|
||||
/* Try to unplug subblocks of plugged online blocks. */
|
||||
virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE) {
|
||||
rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb);
|
||||
if (rc || !nb_sb)
|
||||
goto out_unlock;
|
||||
mutex_unlock(&vm->hotplug_mutex);
|
||||
cond_resched();
|
||||
mutex_lock(&vm->hotplug_mutex);
|
||||
/*
|
||||
* We try unplug from partially plugged blocks first, to try removing
|
||||
* whole memory blocks along with metadata. We prioritize ZONE_MOVABLE
|
||||
* as it's more reliable to unplug memory and remove whole memory
|
||||
* blocks, and we don't want to trigger a zone imbalances by
|
||||
* accidentially removing too much kernel memory.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(mb_states); i++) {
|
||||
virtio_mem_sbm_for_each_mb_rev(vm, mb_id, mb_states[i]) {
|
||||
rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb);
|
||||
if (rc || !nb_sb)
|
||||
goto out_unlock;
|
||||
mutex_unlock(&vm->hotplug_mutex);
|
||||
cond_resched();
|
||||
mutex_lock(&vm->hotplug_mutex);
|
||||
}
|
||||
if (!unplug_online && i == 1) {
|
||||
mutex_unlock(&vm->hotplug_mutex);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&vm->hotplug_mutex);
|
||||
@ -2084,35 +2104,6 @@ rollback_safe_unplug:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to remove a big block from Linux and unplug it. Will fail with
|
||||
* -EBUSY if some memory is online.
|
||||
*
|
||||
* Will modify the state of the memory block.
|
||||
*/
|
||||
static int virtio_mem_bbm_remove_and_unplug_bb(struct virtio_mem *vm,
|
||||
unsigned long bb_id)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
|
||||
VIRTIO_MEM_BBM_BB_ADDED))
|
||||
return -EINVAL;
|
||||
|
||||
rc = virtio_mem_bbm_remove_bb(vm, bb_id);
|
||||
if (rc)
|
||||
return -EBUSY;
|
||||
|
||||
rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
|
||||
if (rc)
|
||||
virtio_mem_bbm_set_bb_state(vm, bb_id,
|
||||
VIRTIO_MEM_BBM_BB_PLUGGED);
|
||||
else
|
||||
virtio_mem_bbm_set_bb_state(vm, bb_id,
|
||||
VIRTIO_MEM_BBM_BB_UNUSED);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test if a big block is completely offline.
|
||||
*/
|
||||
@ -2132,46 +2123,64 @@ static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm,
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test if a big block is completely onlined to ZONE_MOVABLE (or offline).
|
||||
*/
|
||||
static bool virtio_mem_bbm_bb_is_movable(struct virtio_mem *vm,
|
||||
unsigned long bb_id)
|
||||
{
|
||||
const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
|
||||
const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
|
||||
struct page *page;
|
||||
unsigned long pfn;
|
||||
|
||||
for (pfn = start_pfn; pfn < start_pfn + nr_pages;
|
||||
pfn += PAGES_PER_SECTION) {
|
||||
page = pfn_to_online_page(pfn);
|
||||
if (!page)
|
||||
continue;
|
||||
if (page_zonenum(page) != ZONE_MOVABLE)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
|
||||
{
|
||||
uint64_t nb_bb = diff / vm->bbm.bb_size;
|
||||
uint64_t bb_id;
|
||||
int rc;
|
||||
int rc, i;
|
||||
|
||||
if (!nb_bb)
|
||||
return 0;
|
||||
|
||||
/* Try to unplug completely offline big blocks first. */
|
||||
virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
|
||||
cond_resched();
|
||||
/*
|
||||
* As we're holding no locks, this check is racy as memory
|
||||
* can get onlined in the meantime - but we'll fail gracefully.
|
||||
*/
|
||||
if (!virtio_mem_bbm_bb_is_offline(vm, bb_id))
|
||||
continue;
|
||||
rc = virtio_mem_bbm_remove_and_unplug_bb(vm, bb_id);
|
||||
if (rc == -EBUSY)
|
||||
continue;
|
||||
if (!rc)
|
||||
nb_bb--;
|
||||
if (rc || !nb_bb)
|
||||
return rc;
|
||||
}
|
||||
/*
|
||||
* Try to unplug big blocks. Similar to SBM, start with offline
|
||||
* big blocks.
|
||||
*/
|
||||
for (i = 0; i < 3; i++) {
|
||||
virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
|
||||
cond_resched();
|
||||
|
||||
if (!unplug_online)
|
||||
return 0;
|
||||
|
||||
/* Try to unplug any big blocks. */
|
||||
virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
|
||||
cond_resched();
|
||||
rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id);
|
||||
if (rc == -EBUSY)
|
||||
continue;
|
||||
if (!rc)
|
||||
nb_bb--;
|
||||
if (rc || !nb_bb)
|
||||
return rc;
|
||||
/*
|
||||
* As we're holding no locks, these checks are racy,
|
||||
* but we don't care.
|
||||
*/
|
||||
if (i == 0 && !virtio_mem_bbm_bb_is_offline(vm, bb_id))
|
||||
continue;
|
||||
if (i == 1 && !virtio_mem_bbm_bb_is_movable(vm, bb_id))
|
||||
continue;
|
||||
rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id);
|
||||
if (rc == -EBUSY)
|
||||
continue;
|
||||
if (!rc)
|
||||
nb_bb--;
|
||||
if (rc || !nb_bb)
|
||||
return rc;
|
||||
}
|
||||
if (i == 0 && !unplug_online)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return nb_bb ? -EBUSY : 0;
|
||||
@ -2422,6 +2431,10 @@ static int virtio_mem_init(struct virtio_mem *vm)
|
||||
dev_warn(&vm->vdev->dev,
|
||||
"Some device memory is not addressable/pluggable. This can make some memory unusable.\n");
|
||||
|
||||
/* Prepare the offline threshold - make sure we can add two blocks. */
|
||||
vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(),
|
||||
VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
|
||||
|
||||
/*
|
||||
* We want subblocks to span at least MAX_ORDER_NR_PAGES and
|
||||
* pageblock_nr_pages pages. This:
|
||||
@ -2468,14 +2481,11 @@ static int virtio_mem_init(struct virtio_mem *vm)
|
||||
vm->bbm.bb_size - 1;
|
||||
vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr);
|
||||
vm->bbm.next_bb_id = vm->bbm.first_bb_id;
|
||||
}
|
||||
|
||||
/* Prepare the offline threshold - make sure we can add two blocks. */
|
||||
vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(),
|
||||
VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
|
||||
/* In BBM, we also want at least two big blocks. */
|
||||
vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size,
|
||||
vm->offline_threshold);
|
||||
/* Make sure we can add two big blocks. */
|
||||
vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size,
|
||||
vm->offline_threshold);
|
||||
}
|
||||
|
||||
dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
|
||||
dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
|
||||
|
@ -383,6 +383,27 @@ u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vp_modern_get_features);
|
||||
|
||||
/*
|
||||
* vp_modern_get_driver_features - get driver features from device
|
||||
* @mdev: the modern virtio-pci device
|
||||
*
|
||||
* Returns the driver features read from the device
|
||||
*/
|
||||
u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev)
|
||||
{
|
||||
struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
|
||||
|
||||
u64 features;
|
||||
|
||||
vp_iowrite32(0, &cfg->guest_feature_select);
|
||||
features = vp_ioread32(&cfg->guest_feature);
|
||||
vp_iowrite32(1, &cfg->guest_feature_select);
|
||||
features |= ((u64)vp_ioread32(&cfg->guest_feature) << 32);
|
||||
|
||||
return features;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vp_modern_get_driver_features);
|
||||
|
||||
/*
|
||||
* vp_modern_set_features - set features to device
|
||||
* @mdev: the modern virtio-pci device
|
||||
|
@ -74,14 +74,14 @@ struct vring_desc_state_packed {
|
||||
void *data; /* Data for callback. */
|
||||
struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
|
||||
u16 num; /* Descriptor list length. */
|
||||
u16 next; /* The next desc state in a list. */
|
||||
u16 last; /* The last desc state in a list. */
|
||||
};
|
||||
|
||||
struct vring_desc_extra_packed {
|
||||
struct vring_desc_extra {
|
||||
dma_addr_t addr; /* Buffer DMA addr. */
|
||||
u32 len; /* Buffer length. */
|
||||
u16 flags; /* Descriptor flags. */
|
||||
u16 next; /* The next desc state in a list. */
|
||||
};
|
||||
|
||||
struct vring_virtqueue {
|
||||
@ -113,6 +113,9 @@ struct vring_virtqueue {
|
||||
/* Last used index we've seen. */
|
||||
u16 last_used_idx;
|
||||
|
||||
/* Hint for event idx: already triggered no need to disable. */
|
||||
bool event_triggered;
|
||||
|
||||
union {
|
||||
/* Available for split ring */
|
||||
struct {
|
||||
@ -130,6 +133,7 @@ struct vring_virtqueue {
|
||||
|
||||
/* Per-descriptor state. */
|
||||
struct vring_desc_state_split *desc_state;
|
||||
struct vring_desc_extra *desc_extra;
|
||||
|
||||
/* DMA address and size information */
|
||||
dma_addr_t queue_dma_addr;
|
||||
@ -166,7 +170,7 @@ struct vring_virtqueue {
|
||||
|
||||
/* Per-descriptor state. */
|
||||
struct vring_desc_state_packed *desc_state;
|
||||
struct vring_desc_extra_packed *desc_extra;
|
||||
struct vring_desc_extra *desc_extra;
|
||||
|
||||
/* DMA address and size information */
|
||||
dma_addr_t ring_dma_addr;
|
||||
@ -364,8 +368,8 @@ static int vring_mapping_error(const struct vring_virtqueue *vq,
|
||||
* Split ring specific functions - *_split().
|
||||
*/
|
||||
|
||||
static void vring_unmap_one_split(const struct vring_virtqueue *vq,
|
||||
struct vring_desc *desc)
|
||||
static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
|
||||
struct vring_desc *desc)
|
||||
{
|
||||
u16 flags;
|
||||
|
||||
@ -389,6 +393,35 @@ static void vring_unmap_one_split(const struct vring_virtqueue *vq,
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
|
||||
unsigned int i)
|
||||
{
|
||||
struct vring_desc_extra *extra = vq->split.desc_extra;
|
||||
u16 flags;
|
||||
|
||||
if (!vq->use_dma_api)
|
||||
goto out;
|
||||
|
||||
flags = extra[i].flags;
|
||||
|
||||
if (flags & VRING_DESC_F_INDIRECT) {
|
||||
dma_unmap_single(vring_dma_dev(vq),
|
||||
extra[i].addr,
|
||||
extra[i].len,
|
||||
(flags & VRING_DESC_F_WRITE) ?
|
||||
DMA_FROM_DEVICE : DMA_TO_DEVICE);
|
||||
} else {
|
||||
dma_unmap_page(vring_dma_dev(vq),
|
||||
extra[i].addr,
|
||||
extra[i].len,
|
||||
(flags & VRING_DESC_F_WRITE) ?
|
||||
DMA_FROM_DEVICE : DMA_TO_DEVICE);
|
||||
}
|
||||
|
||||
out:
|
||||
return extra[i].next;
|
||||
}
|
||||
|
||||
static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
|
||||
unsigned int total_sg,
|
||||
gfp_t gfp)
|
||||
@ -412,6 +445,35 @@ static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
|
||||
return desc;
|
||||
}
|
||||
|
||||
static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
|
||||
struct vring_desc *desc,
|
||||
unsigned int i,
|
||||
dma_addr_t addr,
|
||||
unsigned int len,
|
||||
u16 flags,
|
||||
bool indirect)
|
||||
{
|
||||
struct vring_virtqueue *vring = to_vvq(vq);
|
||||
struct vring_desc_extra *extra = vring->split.desc_extra;
|
||||
u16 next;
|
||||
|
||||
desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
|
||||
desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
|
||||
desc[i].len = cpu_to_virtio32(vq->vdev, len);
|
||||
|
||||
if (!indirect) {
|
||||
next = extra[i].next;
|
||||
desc[i].next = cpu_to_virtio16(vq->vdev, next);
|
||||
|
||||
extra[i].addr = addr;
|
||||
extra[i].len = len;
|
||||
extra[i].flags = flags;
|
||||
} else
|
||||
next = virtio16_to_cpu(vq->vdev, desc[i].next);
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
static inline int virtqueue_add_split(struct virtqueue *_vq,
|
||||
struct scatterlist *sgs[],
|
||||
unsigned int total_sg,
|
||||
@ -484,11 +546,13 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
|
||||
if (vring_mapping_error(vq, addr))
|
||||
goto unmap_release;
|
||||
|
||||
desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
|
||||
desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
|
||||
desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
|
||||
prev = i;
|
||||
i = virtio16_to_cpu(_vq->vdev, desc[i].next);
|
||||
/* Note that we trust indirect descriptor
|
||||
* table since it use stream DMA mapping.
|
||||
*/
|
||||
i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
|
||||
VRING_DESC_F_NEXT,
|
||||
indirect);
|
||||
}
|
||||
}
|
||||
for (; n < (out_sgs + in_sgs); n++) {
|
||||
@ -497,15 +561,22 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
|
||||
if (vring_mapping_error(vq, addr))
|
||||
goto unmap_release;
|
||||
|
||||
desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
|
||||
desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
|
||||
desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
|
||||
prev = i;
|
||||
i = virtio16_to_cpu(_vq->vdev, desc[i].next);
|
||||
/* Note that we trust indirect descriptor
|
||||
* table since it use stream DMA mapping.
|
||||
*/
|
||||
i = virtqueue_add_desc_split(_vq, desc, i, addr,
|
||||
sg->length,
|
||||
VRING_DESC_F_NEXT |
|
||||
VRING_DESC_F_WRITE,
|
||||
indirect);
|
||||
}
|
||||
}
|
||||
/* Last one doesn't continue. */
|
||||
desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
|
||||
if (!indirect && vq->use_dma_api)
|
||||
vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags =
|
||||
~VRING_DESC_F_NEXT;
|
||||
|
||||
if (indirect) {
|
||||
/* Now that the indirect table is filled in, map it. */
|
||||
@ -515,13 +586,11 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
|
||||
if (vring_mapping_error(vq, addr))
|
||||
goto unmap_release;
|
||||
|
||||
vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev,
|
||||
VRING_DESC_F_INDIRECT);
|
||||
vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev,
|
||||
addr);
|
||||
|
||||
vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev,
|
||||
total_sg * sizeof(struct vring_desc));
|
||||
virtqueue_add_desc_split(_vq, vq->split.vring.desc,
|
||||
head, addr,
|
||||
total_sg * sizeof(struct vring_desc),
|
||||
VRING_DESC_F_INDIRECT,
|
||||
false);
|
||||
}
|
||||
|
||||
/* We're using some buffers from the free list. */
|
||||
@ -529,8 +598,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
|
||||
|
||||
/* Update free pointer */
|
||||
if (indirect)
|
||||
vq->free_head = virtio16_to_cpu(_vq->vdev,
|
||||
vq->split.vring.desc[head].next);
|
||||
vq->free_head = vq->split.desc_extra[head].next;
|
||||
else
|
||||
vq->free_head = i;
|
||||
|
||||
@ -575,8 +643,11 @@ unmap_release:
|
||||
for (n = 0; n < total_sg; n++) {
|
||||
if (i == err_idx)
|
||||
break;
|
||||
vring_unmap_one_split(vq, &desc[i]);
|
||||
i = virtio16_to_cpu(_vq->vdev, desc[i].next);
|
||||
if (indirect) {
|
||||
vring_unmap_one_split_indirect(vq, &desc[i]);
|
||||
i = virtio16_to_cpu(_vq->vdev, desc[i].next);
|
||||
} else
|
||||
i = vring_unmap_one_split(vq, i);
|
||||
}
|
||||
|
||||
if (indirect)
|
||||
@ -630,14 +701,13 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
|
||||
i = head;
|
||||
|
||||
while (vq->split.vring.desc[i].flags & nextflag) {
|
||||
vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
|
||||
i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next);
|
||||
vring_unmap_one_split(vq, i);
|
||||
i = vq->split.desc_extra[i].next;
|
||||
vq->vq.num_free++;
|
||||
}
|
||||
|
||||
vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
|
||||
vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev,
|
||||
vq->free_head);
|
||||
vring_unmap_one_split(vq, i);
|
||||
vq->split.desc_extra[i].next = vq->free_head;
|
||||
vq->free_head = head;
|
||||
|
||||
/* Plus final descriptor */
|
||||
@ -652,15 +722,14 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
|
||||
if (!indir_desc)
|
||||
return;
|
||||
|
||||
len = virtio32_to_cpu(vq->vq.vdev,
|
||||
vq->split.vring.desc[head].len);
|
||||
len = vq->split.desc_extra[head].len;
|
||||
|
||||
BUG_ON(!(vq->split.vring.desc[head].flags &
|
||||
cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
|
||||
BUG_ON(!(vq->split.desc_extra[head].flags &
|
||||
VRING_DESC_F_INDIRECT));
|
||||
BUG_ON(len == 0 || len % sizeof(struct vring_desc));
|
||||
|
||||
for (j = 0; j < len / sizeof(struct vring_desc); j++)
|
||||
vring_unmap_one_split(vq, &indir_desc[j]);
|
||||
vring_unmap_one_split_indirect(vq, &indir_desc[j]);
|
||||
|
||||
kfree(indir_desc);
|
||||
vq->split.desc_state[head].indir_desc = NULL;
|
||||
@ -739,7 +808,10 @@ static void virtqueue_disable_cb_split(struct virtqueue *_vq)
|
||||
|
||||
if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
|
||||
vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
|
||||
if (!vq->event)
|
||||
if (vq->event)
|
||||
/* TODO: this is a hack. Figure out a cleaner value to write. */
|
||||
vring_used_event(&vq->split.vring) = 0x0;
|
||||
else
|
||||
vq->split.vring.avail->flags =
|
||||
cpu_to_virtio16(_vq->vdev,
|
||||
vq->split.avail_flags_shadow);
|
||||
@ -912,7 +984,7 @@ static struct virtqueue *vring_create_virtqueue_split(
|
||||
*/
|
||||
|
||||
static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
|
||||
struct vring_desc_extra_packed *state)
|
||||
struct vring_desc_extra *state)
|
||||
{
|
||||
u16 flags;
|
||||
|
||||
@ -1061,7 +1133,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
|
||||
1 << VRING_PACKED_DESC_F_USED;
|
||||
}
|
||||
vq->packed.next_avail_idx = n;
|
||||
vq->free_head = vq->packed.desc_state[id].next;
|
||||
vq->free_head = vq->packed.desc_extra[id].next;
|
||||
|
||||
/* Store token and indirect buffer state. */
|
||||
vq->packed.desc_state[id].num = 1;
|
||||
@ -1169,7 +1241,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
|
||||
le16_to_cpu(flags);
|
||||
}
|
||||
prev = curr;
|
||||
curr = vq->packed.desc_state[curr].next;
|
||||
curr = vq->packed.desc_extra[curr].next;
|
||||
|
||||
if ((unlikely(++i >= vq->packed.vring.num))) {
|
||||
i = 0;
|
||||
@ -1213,13 +1285,16 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
|
||||
unmap_release:
|
||||
err_idx = i;
|
||||
i = head;
|
||||
curr = vq->free_head;
|
||||
|
||||
vq->packed.avail_used_flags = avail_used_flags;
|
||||
|
||||
for (n = 0; n < total_sg; n++) {
|
||||
if (i == err_idx)
|
||||
break;
|
||||
vring_unmap_desc_packed(vq, &desc[i]);
|
||||
vring_unmap_state_packed(vq,
|
||||
&vq->packed.desc_extra[curr]);
|
||||
curr = vq->packed.desc_extra[curr].next;
|
||||
i++;
|
||||
if (i >= vq->packed.vring.num)
|
||||
i = 0;
|
||||
@ -1290,7 +1365,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
|
||||
/* Clear data ptr. */
|
||||
state->data = NULL;
|
||||
|
||||
vq->packed.desc_state[state->last].next = vq->free_head;
|
||||
vq->packed.desc_extra[state->last].next = vq->free_head;
|
||||
vq->free_head = id;
|
||||
vq->vq.num_free += state->num;
|
||||
|
||||
@ -1299,7 +1374,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
|
||||
for (i = 0; i < state->num; i++) {
|
||||
vring_unmap_state_packed(vq,
|
||||
&vq->packed.desc_extra[curr]);
|
||||
curr = vq->packed.desc_state[curr].next;
|
||||
curr = vq->packed.desc_extra[curr].next;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1550,6 +1625,25 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq,
|
||||
unsigned int num)
|
||||
{
|
||||
struct vring_desc_extra *desc_extra;
|
||||
unsigned int i;
|
||||
|
||||
desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
|
||||
GFP_KERNEL);
|
||||
if (!desc_extra)
|
||||
return NULL;
|
||||
|
||||
memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
|
||||
|
||||
for (i = 0; i < num - 1; i++)
|
||||
desc_extra[i].next = i + 1;
|
||||
|
||||
return desc_extra;
|
||||
}
|
||||
|
||||
static struct virtqueue *vring_create_virtqueue_packed(
|
||||
unsigned int index,
|
||||
unsigned int num,
|
||||
@ -1567,7 +1661,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
|
||||
struct vring_packed_desc_event *driver, *device;
|
||||
dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
|
||||
size_t ring_size_in_bytes, event_size_in_bytes;
|
||||
unsigned int i;
|
||||
|
||||
ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
|
||||
|
||||
@ -1605,6 +1698,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
|
||||
vq->weak_barriers = weak_barriers;
|
||||
vq->broken = false;
|
||||
vq->last_used_idx = 0;
|
||||
vq->event_triggered = false;
|
||||
vq->num_added = 0;
|
||||
vq->packed_ring = true;
|
||||
vq->use_dma_api = vring_use_dma_api(vdev);
|
||||
@ -1649,18 +1743,11 @@ static struct virtqueue *vring_create_virtqueue_packed(
|
||||
|
||||
/* Put everything in free lists. */
|
||||
vq->free_head = 0;
|
||||
for (i = 0; i < num-1; i++)
|
||||
vq->packed.desc_state[i].next = i + 1;
|
||||
|
||||
vq->packed.desc_extra = kmalloc_array(num,
|
||||
sizeof(struct vring_desc_extra_packed),
|
||||
GFP_KERNEL);
|
||||
vq->packed.desc_extra = vring_alloc_desc_extra(vq, num);
|
||||
if (!vq->packed.desc_extra)
|
||||
goto err_desc_extra;
|
||||
|
||||
memset(vq->packed.desc_extra, 0,
|
||||
num * sizeof(struct vring_desc_extra_packed));
|
||||
|
||||
/* No callback? Tell other side not to bother us. */
|
||||
if (!callback) {
|
||||
vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
|
||||
@ -1875,7 +1962,7 @@ bool virtqueue_kick(struct virtqueue *vq)
|
||||
EXPORT_SYMBOL_GPL(virtqueue_kick);
|
||||
|
||||
/**
|
||||
* virtqueue_get_buf - get the next used buffer
|
||||
* virtqueue_get_buf_ctx - get the next used buffer
|
||||
* @_vq: the struct virtqueue we're talking about.
|
||||
* @len: the length written into the buffer
|
||||
* @ctx: extra context for the token
|
||||
@ -1919,6 +2006,12 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
|
||||
{
|
||||
struct vring_virtqueue *vq = to_vvq(_vq);
|
||||
|
||||
/* If device triggered an event already it won't trigger one again:
|
||||
* no need to disable.
|
||||
*/
|
||||
if (vq->event_triggered)
|
||||
return;
|
||||
|
||||
if (vq->packed_ring)
|
||||
virtqueue_disable_cb_packed(_vq);
|
||||
else
|
||||
@ -1942,6 +2035,9 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
|
||||
{
|
||||
struct vring_virtqueue *vq = to_vvq(_vq);
|
||||
|
||||
if (vq->event_triggered)
|
||||
vq->event_triggered = false;
|
||||
|
||||
return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
|
||||
virtqueue_enable_cb_prepare_split(_vq);
|
||||
}
|
||||
@ -2005,6 +2101,9 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
|
||||
{
|
||||
struct vring_virtqueue *vq = to_vvq(_vq);
|
||||
|
||||
if (vq->event_triggered)
|
||||
vq->event_triggered = false;
|
||||
|
||||
return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
|
||||
virtqueue_enable_cb_delayed_split(_vq);
|
||||
}
|
||||
@ -2044,6 +2143,10 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
|
||||
if (unlikely(vq->broken))
|
||||
return IRQ_HANDLED;
|
||||
|
||||
/* Just a hint for performance: so it's ok that this can be racy! */
|
||||
if (vq->event)
|
||||
vq->event_triggered = true;
|
||||
|
||||
pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
|
||||
if (vq->vq.callback)
|
||||
vq->vq.callback(&vq->vq);
|
||||
@ -2062,7 +2165,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
|
||||
void (*callback)(struct virtqueue *),
|
||||
const char *name)
|
||||
{
|
||||
unsigned int i;
|
||||
struct vring_virtqueue *vq;
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
|
||||
@ -2083,6 +2185,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
|
||||
vq->weak_barriers = weak_barriers;
|
||||
vq->broken = false;
|
||||
vq->last_used_idx = 0;
|
||||
vq->event_triggered = false;
|
||||
vq->num_added = 0;
|
||||
vq->use_dma_api = vring_use_dma_api(vdev);
|
||||
#ifdef DEBUG
|
||||
@ -2114,20 +2217,26 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
|
||||
|
||||
vq->split.desc_state = kmalloc_array(vring.num,
|
||||
sizeof(struct vring_desc_state_split), GFP_KERNEL);
|
||||
if (!vq->split.desc_state) {
|
||||
kfree(vq);
|
||||
return NULL;
|
||||
}
|
||||
if (!vq->split.desc_state)
|
||||
goto err_state;
|
||||
|
||||
vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num);
|
||||
if (!vq->split.desc_extra)
|
||||
goto err_extra;
|
||||
|
||||
/* Put everything in free lists. */
|
||||
vq->free_head = 0;
|
||||
for (i = 0; i < vring.num-1; i++)
|
||||
vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
|
||||
memset(vq->split.desc_state, 0, vring.num *
|
||||
sizeof(struct vring_desc_state_split));
|
||||
|
||||
list_add_tail(&vq->vq.list, &vdev->vqs);
|
||||
return &vq->vq;
|
||||
|
||||
err_extra:
|
||||
kfree(vq->split.desc_state);
|
||||
err_state:
|
||||
kfree(vq);
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
|
||||
|
||||
@ -2208,8 +2317,10 @@ void vring_del_virtqueue(struct virtqueue *_vq)
|
||||
vq->split.queue_dma_addr);
|
||||
}
|
||||
}
|
||||
if (!vq->packed_ring)
|
||||
if (!vq->packed_ring) {
|
||||
kfree(vq->split.desc_state);
|
||||
kfree(vq->split.desc_extra);
|
||||
}
|
||||
list_del(&_vq->list);
|
||||
kfree(vq);
|
||||
}
|
||||
|
@ -142,6 +142,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
|
||||
struct vdpa_callback cb;
|
||||
struct virtqueue *vq;
|
||||
u64 desc_addr, driver_addr, device_addr;
|
||||
/* Assume split virtqueue, switch to packed if necessary */
|
||||
struct vdpa_vq_state state = {0};
|
||||
unsigned long flags;
|
||||
u32 align, num;
|
||||
int err;
|
||||
@ -191,6 +193,19 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
|
||||
goto err_vq;
|
||||
}
|
||||
|
||||
/* reset virtqueue state index */
|
||||
if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
||||
struct vdpa_vq_state_packed *s = &state.packed;
|
||||
|
||||
s->last_avail_counter = 1;
|
||||
s->last_avail_idx = 0;
|
||||
s->last_used_counter = 1;
|
||||
s->last_used_idx = 0;
|
||||
}
|
||||
err = ops->set_vq_state(vdpa, index, &state);
|
||||
if (err)
|
||||
goto err_vq;
|
||||
|
||||
ops->set_vq_ready(vdpa, index, 1);
|
||||
|
||||
vq->priv = info;
|
||||
|
@ -1512,7 +1512,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
|
||||
u8 uar_4k[0x1];
|
||||
u8 reserved_at_241[0x9];
|
||||
u8 uar_sz[0x6];
|
||||
u8 reserved_at_250[0x8];
|
||||
u8 reserved_at_248[0x2];
|
||||
u8 umem_uid_0[0x1];
|
||||
u8 reserved_at_250[0x5];
|
||||
u8 log_pg_sz[0x8];
|
||||
|
||||
u8 bf[0x1];
|
||||
|
@ -28,13 +28,34 @@ struct vdpa_notification_area {
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vdpa_vq_state - vDPA vq_state definition
|
||||
* struct vdpa_vq_state_split - vDPA split virtqueue state
|
||||
* @avail_index: available index
|
||||
*/
|
||||
struct vdpa_vq_state {
|
||||
struct vdpa_vq_state_split {
|
||||
u16 avail_index;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vdpa_vq_state_packed - vDPA packed virtqueue state
|
||||
* @last_avail_counter: last driver ring wrap counter observed by device
|
||||
* @last_avail_idx: device available index
|
||||
* @last_used_counter: device ring wrap counter
|
||||
* @last_used_idx: used index
|
||||
*/
|
||||
struct vdpa_vq_state_packed {
|
||||
u16 last_avail_counter:1;
|
||||
u16 last_avail_idx:15;
|
||||
u16 last_used_counter:1;
|
||||
u16 last_used_idx:15;
|
||||
};
|
||||
|
||||
struct vdpa_vq_state {
|
||||
union {
|
||||
struct vdpa_vq_state_split split;
|
||||
struct vdpa_vq_state_packed packed;
|
||||
};
|
||||
};
|
||||
|
||||
struct vdpa_mgmt_dev;
|
||||
|
||||
/**
|
||||
|
@ -79,6 +79,7 @@ static inline void vp_iowrite64_twopart(u64 val,
|
||||
}
|
||||
|
||||
u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev);
|
||||
u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev);
|
||||
void vp_modern_set_features(struct virtio_pci_modern_device *mdev,
|
||||
u64 features);
|
||||
u32 vp_modern_generation(struct virtio_pci_modern_device *mdev);
|
||||
|
@ -57,4 +57,16 @@
|
||||
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
|
||||
#define VIRTIO_ID_BT 40 /* virtio bluetooth */
|
||||
|
||||
/*
|
||||
* Virtio Transitional IDs
|
||||
*/
|
||||
|
||||
#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */
|
||||
#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */
|
||||
#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */
|
||||
#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */
|
||||
#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */
|
||||
#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */
|
||||
#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */
|
||||
|
||||
#endif /* _LINUX_VIRTIO_IDS_H */
|
||||
|
Loading…
Reference in New Issue
Block a user