[v4,06/15] vdpa/mlx5: pre-create virtq at probe time

Message ID 20220618090258.91157-7-lizh@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Maxime Coquelin
Headers
Series mlx5/vdpa: optimize live migration time |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Li Zhang June 18, 2022, 9:02 a.m. UTC
  dev_config operation is called in LM progress.
LM time is very critical because all
the VM packets are dropped directly at that time.

Move the virtq creation to probe time and
only modify the configuration later in
the dev_config stage using the new ability
to modify virtq.

This optimization accelerates the LM process and
reduces its time by 70%.

Signed-off-by: Li Zhang <lizh@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 doc/guides/rel_notes/release_22_07.rst |   4 +
 drivers/vdpa/mlx5/mlx5_vdpa.h          |   4 +
 drivers/vdpa/mlx5/mlx5_vdpa_lm.c       |  19 +-
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c    | 257 +++++++++++++++----------
 4 files changed, 176 insertions(+), 108 deletions(-)
  

Patch

diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst
index f2cf41def9..2056cd9ee7 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -175,6 +175,10 @@  New Features
   This is a fall-back implementation for platforms that
   don't support vector operations.
 
+* **Updated Nvidia mlx5 vDPA driver.**
+
+  * Added new devargs ``queue_size`` and ``queues`` to allow prior creation of virtq resources.
+
 
 Removed Items
 -------------
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index bf82026e37..e5553079fe 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -80,6 +80,7 @@  struct mlx5_vdpa_virtq {
 	uint16_t vq_size;
 	uint8_t notifier_state;
 	bool stopped;
+	uint32_t configured:1;
 	uint32_t version;
 	struct mlx5_vdpa_priv *priv;
 	struct mlx5_devx_obj *virtq;
@@ -489,4 +490,7 @@  mlx5_vdpa_virtq_stats_reset(struct mlx5_vdpa_priv *priv, int qid);
  */
 void
 mlx5_vdpa_drain_cq(struct mlx5_vdpa_priv *priv);
+
+bool
+mlx5_vdpa_is_modify_virtq_supported(struct mlx5_vdpa_priv *priv);
 #endif /* RTE_PMD_MLX5_VDPA_H_ */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_lm.c b/drivers/vdpa/mlx5/mlx5_vdpa_lm.c
index 43a2b98255..284758ad56 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_lm.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_lm.c
@@ -12,20 +12,21 @@  int
 mlx5_vdpa_logging_enable(struct mlx5_vdpa_priv *priv, int enable)
 {
 	struct mlx5_devx_virtq_attr attr = {
-		.type = MLX5_VIRTQ_MODIFY_TYPE_DIRTY_BITMAP_DUMP_ENABLE,
+		.mod_fields_bitmap =
+			MLX5_VIRTQ_MODIFY_TYPE_DIRTY_BITMAP_DUMP_ENABLE,
 		.dirty_bitmap_dump_enable = enable,
 	};
+	struct mlx5_vdpa_virtq *virtq;
 	int i;
 
 	for (i = 0; i < priv->nr_virtqs; ++i) {
 		attr.queue_index = i;
-		if (!priv->virtqs[i].virtq) {
-			DRV_LOG(DEBUG, "virtq %d is invalid for dirty bitmap "
-				"enabling.", i);
+		virtq = &priv->virtqs[i];
+		if (!virtq->configured) {
+			DRV_LOG(DEBUG, "virtq %d is invalid for dirty bitmap enabling.", i);
 		} else if (mlx5_devx_cmd_modify_virtq(priv->virtqs[i].virtq,
 			   &attr)) {
-			DRV_LOG(ERR, "Failed to modify virtq %d for dirty "
-				"bitmap enabling.", i);
+			DRV_LOG(ERR, "Failed to modify virtq %d for dirty bitmap enabling.", i);
 			return -1;
 		}
 	}
@@ -37,10 +38,11 @@  mlx5_vdpa_dirty_bitmap_set(struct mlx5_vdpa_priv *priv, uint64_t log_base,
 			   uint64_t log_size)
 {
 	struct mlx5_devx_virtq_attr attr = {
-		.type = MLX5_VIRTQ_MODIFY_TYPE_DIRTY_BITMAP_PARAMS,
+		.mod_fields_bitmap = MLX5_VIRTQ_MODIFY_TYPE_DIRTY_BITMAP_PARAMS,
 		.dirty_bitmap_addr = log_base,
 		.dirty_bitmap_size = log_size,
 	};
+	struct mlx5_vdpa_virtq *virtq;
 	int i;
 	int ret = mlx5_os_wrapped_mkey_create(priv->cdev->ctx, priv->cdev->pd,
 					      priv->cdev->pdn,
@@ -54,7 +56,8 @@  mlx5_vdpa_dirty_bitmap_set(struct mlx5_vdpa_priv *priv, uint64_t log_base,
 	attr.dirty_bitmap_mkey = priv->lm_mr.lkey;
 	for (i = 0; i < priv->nr_virtqs; ++i) {
 		attr.queue_index = i;
-		if (!priv->virtqs[i].virtq) {
+		virtq = &priv->virtqs[i];
+		if (!virtq->configured) {
 			DRV_LOG(DEBUG, "virtq %d is invalid for LM.", i);
 		} else if (mlx5_devx_cmd_modify_virtq(priv->virtqs[i].virtq,
 						      &attr)) {
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index 6637ba1503..6e08d619e4 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -75,6 +75,7 @@  mlx5_vdpa_virtqs_cleanup(struct mlx5_vdpa_priv *priv)
 	for (i = 0; i < priv->caps.max_num_virtio_queues; i++) {
 		struct mlx5_vdpa_virtq *virtq = &priv->virtqs[i];
 
+		virtq->configured = 0;
 		for (j = 0; j < RTE_DIM(virtq->umems); ++j) {
 			if (virtq->umems[j].obj) {
 				claim_zero(mlx5_glue->devx_umem_dereg
@@ -111,11 +112,12 @@  mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq)
 		rte_intr_fd_set(virtq->intr_handle, -1);
 	}
 	rte_intr_instance_free(virtq->intr_handle);
-	if (virtq->virtq) {
+	if (virtq->configured) {
 		ret = mlx5_vdpa_virtq_stop(virtq->priv, virtq->index);
 		if (ret)
 			DRV_LOG(WARNING, "Failed to stop virtq %d.",
 				virtq->index);
+		virtq->configured = 0;
 		claim_zero(mlx5_devx_cmd_destroy(virtq->virtq));
 	}
 	virtq->virtq = NULL;
@@ -138,7 +140,7 @@  int
 mlx5_vdpa_virtq_modify(struct mlx5_vdpa_virtq *virtq, int state)
 {
 	struct mlx5_devx_virtq_attr attr = {
-			.type = MLX5_VIRTQ_MODIFY_TYPE_STATE,
+			.mod_fields_bitmap = MLX5_VIRTQ_MODIFY_TYPE_STATE,
 			.state = state ? MLX5_VIRTQ_STATE_RDY :
 					 MLX5_VIRTQ_STATE_SUSPEND,
 			.queue_index = virtq->index,
@@ -153,7 +155,7 @@  mlx5_vdpa_virtq_stop(struct mlx5_vdpa_priv *priv, int index)
 	struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
 	int ret;
 
-	if (virtq->stopped)
+	if (virtq->stopped || !virtq->configured)
 		return 0;
 	ret = mlx5_vdpa_virtq_modify(virtq, 0);
 	if (ret)
@@ -209,51 +211,54 @@  mlx5_vdpa_hva_to_gpa(struct rte_vhost_memory *mem, uint64_t hva)
 }
 
 static int
-mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index)
+mlx5_vdpa_virtq_sub_objs_prepare(struct mlx5_vdpa_priv *priv,
+		struct mlx5_devx_virtq_attr *attr,
+		struct rte_vhost_vring *vq, int index)
 {
 	struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
-	struct rte_vhost_vring vq;
-	struct mlx5_devx_virtq_attr attr = {0};
 	uint64_t gpa;
 	int ret;
 	unsigned int i;
-	uint16_t last_avail_idx;
-	uint16_t last_used_idx;
-	uint16_t event_num = MLX5_EVENT_TYPE_OBJECT_CHANGE;
-	uint64_t cookie;
-
-	ret = rte_vhost_get_vhost_vring(priv->vid, index, &vq);
-	if (ret)
-		return -1;
-	if (vq.size == 0)
-		return 0;
-	virtq->index = index;
-	virtq->vq_size = vq.size;
-	attr.tso_ipv4 = !!(priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4));
-	attr.tso_ipv6 = !!(priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6));
-	attr.tx_csum = !!(priv->features & (1ULL << VIRTIO_NET_F_CSUM));
-	attr.rx_csum = !!(priv->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM));
-	attr.virtio_version_1_0 = !!(priv->features & (1ULL <<
-							VIRTIO_F_VERSION_1));
-	attr.type = (priv->features & (1ULL << VIRTIO_F_RING_PACKED)) ?
+	uint16_t last_avail_idx = 0;
+	uint16_t last_used_idx = 0;
+
+	if (virtq->virtq)
+		attr->mod_fields_bitmap = MLX5_VIRTQ_MODIFY_TYPE_STATE |
+			MLX5_VIRTQ_MODIFY_TYPE_ADDR |
+			MLX5_VIRTQ_MODIFY_TYPE_HW_AVAILABLE_INDEX |
+			MLX5_VIRTQ_MODIFY_TYPE_HW_USED_INDEX |
+			MLX5_VIRTQ_MODIFY_TYPE_VERSION_1_0 |
+			MLX5_VIRTQ_MODIFY_TYPE_Q_TYPE |
+			MLX5_VIRTQ_MODIFY_TYPE_Q_MKEY |
+			MLX5_VIRTQ_MODIFY_TYPE_QUEUE_FEATURE_BIT_MASK |
+			MLX5_VIRTQ_MODIFY_TYPE_EVENT_MODE;
+	attr->tso_ipv4 = !!(priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4));
+	attr->tso_ipv6 = !!(priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6));
+	attr->tx_csum = !!(priv->features & (1ULL << VIRTIO_NET_F_CSUM));
+	attr->rx_csum = !!(priv->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM));
+	attr->virtio_version_1_0 =
+		!!(priv->features & (1ULL << VIRTIO_F_VERSION_1));
+	attr->q_type =
+		(priv->features & (1ULL << VIRTIO_F_RING_PACKED)) ?
 			MLX5_VIRTQ_TYPE_PACKED : MLX5_VIRTQ_TYPE_SPLIT;
 	/*
 	 * No need event QPs creation when the guest in poll mode or when the
 	 * capability allows it.
 	 */
-	attr.event_mode = vq.callfd != -1 || !(priv->caps.event_mode & (1 <<
-					       MLX5_VIRTQ_EVENT_MODE_NO_MSIX)) ?
-						      MLX5_VIRTQ_EVENT_MODE_QP :
-						  MLX5_VIRTQ_EVENT_MODE_NO_MSIX;
-	if (attr.event_mode == MLX5_VIRTQ_EVENT_MODE_QP) {
-		ret = mlx5_vdpa_event_qp_prepare(priv, vq.size, vq.callfd,
-						&virtq->eqp);
+	attr->event_mode = vq->callfd != -1 ||
+	!(priv->caps.event_mode & (1 << MLX5_VIRTQ_EVENT_MODE_NO_MSIX)) ?
+	MLX5_VIRTQ_EVENT_MODE_QP : MLX5_VIRTQ_EVENT_MODE_NO_MSIX;
+	if (attr->event_mode == MLX5_VIRTQ_EVENT_MODE_QP) {
+		ret = mlx5_vdpa_event_qp_prepare(priv,
+				vq->size, vq->callfd, &virtq->eqp);
 		if (ret) {
-			DRV_LOG(ERR, "Failed to create event QPs for virtq %d.",
+			DRV_LOG(ERR,
+				"Failed to create event QPs for virtq %d.",
 				index);
 			return -1;
 		}
-		attr.qp_id = virtq->eqp.fw_qp->id;
+		attr->mod_fields_bitmap |= MLX5_VIRTQ_MODIFY_TYPE_EVENT_MODE;
+		attr->qp_id = virtq->eqp.fw_qp->id;
 	} else {
 		DRV_LOG(INFO, "Virtq %d is, for sure, working by poll mode, no"
 			" need event QPs and event mechanism.", index);
@@ -265,77 +270,82 @@  mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index)
 		if (!virtq->counters) {
 			DRV_LOG(ERR, "Failed to create virtq couners for virtq"
 				" %d.", index);
-			goto error;
+			return -1;
 		}
-		attr.counters_obj_id = virtq->counters->id;
+		attr->counters_obj_id = virtq->counters->id;
 	}
 	/* Setup 3 UMEMs for each virtq. */
-	for (i = 0; i < RTE_DIM(virtq->umems); ++i) {
-		uint32_t size;
-		void *buf;
-		struct mlx5dv_devx_umem *obj;
-
-		size = priv->caps.umems[i].a * vq.size + priv->caps.umems[i].b;
-		if (virtq->umems[i].size == size &&
-		    virtq->umems[i].obj != NULL) {
-			/* Reuse registered memory. */
-			memset(virtq->umems[i].buf, 0, size);
-			goto reuse;
-		}
-		if (virtq->umems[i].obj)
-			claim_zero(mlx5_glue->devx_umem_dereg
+	if (virtq->virtq) {
+		for (i = 0; i < RTE_DIM(virtq->umems); ++i) {
+			uint32_t size;
+			void *buf;
+			struct mlx5dv_devx_umem *obj;
+
+			size =
+		priv->caps.umems[i].a * vq->size + priv->caps.umems[i].b;
+			if (virtq->umems[i].size == size &&
+				virtq->umems[i].obj != NULL) {
+				/* Reuse registered memory. */
+				memset(virtq->umems[i].buf, 0, size);
+				goto reuse;
+			}
+			if (virtq->umems[i].obj)
+				claim_zero(mlx5_glue->devx_umem_dereg
 				   (virtq->umems[i].obj));
-		if (virtq->umems[i].buf)
-			rte_free(virtq->umems[i].buf);
-		virtq->umems[i].size = 0;
-		virtq->umems[i].obj = NULL;
-		virtq->umems[i].buf = NULL;
-		buf = rte_zmalloc(__func__, size, 4096);
-		if (buf == NULL) {
-			DRV_LOG(ERR, "Cannot allocate umem %d memory for virtq"
+			if (virtq->umems[i].buf)
+				rte_free(virtq->umems[i].buf);
+			virtq->umems[i].size = 0;
+			virtq->umems[i].obj = NULL;
+			virtq->umems[i].buf = NULL;
+			buf = rte_zmalloc(__func__,
+				size, 4096);
+			if (buf == NULL) {
+				DRV_LOG(ERR, "Cannot allocate umem %d memory for virtq"
 				" %u.", i, index);
-			goto error;
-		}
-		obj = mlx5_glue->devx_umem_reg(priv->cdev->ctx, buf, size,
-					       IBV_ACCESS_LOCAL_WRITE);
-		if (obj == NULL) {
-			DRV_LOG(ERR, "Failed to register umem %d for virtq %u.",
+				return -1;
+			}
+			obj = mlx5_glue->devx_umem_reg(priv->cdev->ctx,
+				buf, size, IBV_ACCESS_LOCAL_WRITE);
+			if (obj == NULL) {
+				DRV_LOG(ERR, "Failed to register umem %d for virtq %u.",
 				i, index);
-			goto error;
-		}
-		virtq->umems[i].size = size;
-		virtq->umems[i].buf = buf;
-		virtq->umems[i].obj = obj;
+				rte_free(buf);
+				return -1;
+			}
+			virtq->umems[i].size = size;
+			virtq->umems[i].buf = buf;
+			virtq->umems[i].obj = obj;
 reuse:
-		attr.umems[i].id = virtq->umems[i].obj->umem_id;
-		attr.umems[i].offset = 0;
-		attr.umems[i].size = virtq->umems[i].size;
+			attr->umems[i].id = virtq->umems[i].obj->umem_id;
+			attr->umems[i].offset = 0;
+			attr->umems[i].size = virtq->umems[i].size;
+		}
 	}
-	if (attr.type == MLX5_VIRTQ_TYPE_SPLIT) {
+	if (attr->q_type == MLX5_VIRTQ_TYPE_SPLIT) {
 		gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
-					   (uint64_t)(uintptr_t)vq.desc);
+					   (uint64_t)(uintptr_t)vq->desc);
 		if (!gpa) {
 			DRV_LOG(ERR, "Failed to get descriptor ring GPA.");
-			goto error;
+			return -1;
 		}
-		attr.desc_addr = gpa;
+		attr->desc_addr = gpa;
 		gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
-					   (uint64_t)(uintptr_t)vq.used);
+					   (uint64_t)(uintptr_t)vq->used);
 		if (!gpa) {
 			DRV_LOG(ERR, "Failed to get GPA for used ring.");
-			goto error;
+			return -1;
 		}
-		attr.used_addr = gpa;
+		attr->used_addr = gpa;
 		gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
-					   (uint64_t)(uintptr_t)vq.avail);
+					   (uint64_t)(uintptr_t)vq->avail);
 		if (!gpa) {
 			DRV_LOG(ERR, "Failed to get GPA for available ring.");
-			goto error;
+			return -1;
 		}
-		attr.available_addr = gpa;
+		attr->available_addr = gpa;
 	}
-	ret = rte_vhost_get_vring_base(priv->vid, index, &last_avail_idx,
-				 &last_used_idx);
+	ret = rte_vhost_get_vring_base(priv->vid,
+			index, &last_avail_idx, &last_used_idx);
 	if (ret) {
 		last_avail_idx = 0;
 		last_used_idx = 0;
@@ -345,24 +355,71 @@  mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index)
 				"virtq %d.", priv->vid, last_avail_idx,
 				last_used_idx, index);
 	}
-	attr.hw_available_index = last_avail_idx;
-	attr.hw_used_index = last_used_idx;
-	attr.q_size = vq.size;
-	attr.mkey = priv->gpa_mkey_index;
-	attr.tis_id = priv->tiss[(index / 2) % priv->num_lag_ports]->id;
-	attr.queue_index = index;
-	attr.pd = priv->cdev->pdn;
-	attr.hw_latency_mode = priv->hw_latency_mode;
-	attr.hw_max_latency_us = priv->hw_max_latency_us;
-	attr.hw_max_pending_comp = priv->hw_max_pending_comp;
-	virtq->virtq = mlx5_devx_cmd_create_virtq(priv->cdev->ctx, &attr);
+	attr->hw_available_index = last_avail_idx;
+	attr->hw_used_index = last_used_idx;
+	attr->q_size = vq->size;
+	attr->mkey = priv->gpa_mkey_index;
+	attr->tis_id = priv->tiss[(index / 2) % priv->num_lag_ports]->id;
+	attr->queue_index = index;
+	attr->pd = priv->cdev->pdn;
+	attr->hw_latency_mode = priv->hw_latency_mode;
+	attr->hw_max_latency_us = priv->hw_max_latency_us;
+	attr->hw_max_pending_comp = priv->hw_max_pending_comp;
+	if (attr->hw_latency_mode || attr->hw_max_latency_us ||
+		attr->hw_max_pending_comp)
+		attr->mod_fields_bitmap |= MLX5_VIRTQ_MODIFY_TYPE_QUEUE_PERIOD;
+	return 0;
+}
+
+bool
+mlx5_vdpa_is_modify_virtq_supported(struct mlx5_vdpa_priv *priv)
+{
+	return (priv->caps.vnet_modify_ext &&
+			priv->caps.virtio_net_q_addr_modify &&
+			priv->caps.virtio_q_index_modify) ? true : false;
+}
+
+static int
+mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index)
+{
+	struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
+	struct rte_vhost_vring vq;
+	struct mlx5_devx_virtq_attr attr = {0};
+	int ret;
+	uint16_t event_num = MLX5_EVENT_TYPE_OBJECT_CHANGE;
+	uint64_t cookie;
+
+	ret = rte_vhost_get_vhost_vring(priv->vid, index, &vq);
+	if (ret)
+		return -1;
+	if (vq.size == 0)
+		return 0;
 	virtq->priv = priv;
-	if (!virtq->virtq)
+	virtq->stopped = 0;
+	ret = mlx5_vdpa_virtq_sub_objs_prepare(priv, &attr,
+				&vq, index);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to setup update virtq attr %d.",
+			index);
 		goto error;
-	claim_zero(rte_vhost_enable_guest_notification(priv->vid, index, 1));
-	if (mlx5_vdpa_virtq_modify(virtq, 1))
+	}
+	if (!virtq->virtq) {
+		virtq->index = index;
+		virtq->vq_size = vq.size;
+		virtq->virtq = mlx5_devx_cmd_create_virtq(priv->cdev->ctx,
+			&attr);
+		if (!virtq->virtq)
+			goto error;
+		attr.mod_fields_bitmap = MLX5_VIRTQ_MODIFY_TYPE_STATE;
+	}
+	attr.state = MLX5_VIRTQ_STATE_RDY;
+	ret = mlx5_devx_cmd_modify_virtq(virtq->virtq, &attr);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to modify virtq %d.", index);
 		goto error;
-	virtq->priv = priv;
+	}
+	claim_zero(rte_vhost_enable_guest_notification(priv->vid, index, 1));
+	virtq->configured = 1;
 	rte_write32(virtq->index, priv->virtq_db_addr);
 	/* Setup doorbell mapping. */
 	virtq->intr_handle =
@@ -553,7 +610,7 @@  mlx5_vdpa_virtq_enable(struct mlx5_vdpa_priv *priv, int index, int enable)
 			return 0;
 		DRV_LOG(INFO, "Virtq %d was modified, recreate it.", index);
 	}
-	if (virtq->virtq) {
+	if (virtq->configured) {
 		virtq->enable = 0;
 		if (is_virtq_recvq(virtq->index, priv->nr_virtqs)) {
 			ret = mlx5_vdpa_steer_update(priv);