@@ -5,6 +5,7 @@
#include <net/if.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
+#include <sys/mman.h>
#include <fcntl.h>
#include <netinet/in.h>
@@ -49,6 +50,8 @@ TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
TAILQ_HEAD_INITIALIZER(priv_list);
static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
+static void mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv);
+
static struct mlx5_vdpa_priv *
mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev)
{
@@ -250,7 +253,6 @@ mlx5_vdpa_dev_close(int vid)
DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
return -1;
}
- mlx5_vdpa_err_event_unset(priv);
mlx5_vdpa_cqe_event_unset(priv);
if (priv->state == MLX5_VDPA_STATE_CONFIGURED) {
ret |= mlx5_vdpa_lm_log(priv);
@@ -258,7 +260,6 @@ mlx5_vdpa_dev_close(int vid)
}
mlx5_vdpa_steer_unset(priv);
mlx5_vdpa_virtqs_release(priv);
- mlx5_vdpa_event_qp_global_release(priv);
mlx5_vdpa_mem_dereg(priv);
priv->state = MLX5_VDPA_STATE_PROBED;
priv->vid = 0;
@@ -288,7 +289,7 @@ mlx5_vdpa_dev_config(int vid)
if (mlx5_vdpa_mtu_set(priv))
DRV_LOG(WARNING, "MTU cannot be set on device %s.",
vdev->device->name);
- if (mlx5_vdpa_mem_register(priv) || mlx5_vdpa_err_event_setup(priv) ||
+ if (mlx5_vdpa_mem_register(priv) ||
mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
mlx5_vdpa_cqe_event_setup(priv)) {
mlx5_vdpa_dev_close(vid);
@@ -504,12 +505,89 @@ mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
}
+static int
+mlx5_vdpa_create_dev_resources(struct mlx5_vdpa_priv *priv)
+{
+ struct mlx5_devx_tis_attr tis_attr = {0};
+ struct ibv_context *ctx = priv->cdev->ctx;
+ uint32_t i;
+ int retry;
+
+ for (retry = 0; retry < 7; retry++) {
+ priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
+ if (priv->var != NULL)
+ break;
+ DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.", retry);
+ /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */
+ usleep(100000U << retry);
+ }
+ if (!priv->var) {
+ DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ /* Always map the entire page. */
+ priv->virtq_db_addr = mmap(NULL, priv->var->length, PROT_READ |
+ PROT_WRITE, MAP_SHARED, ctx->cmd_fd,
+ priv->var->mmap_off);
+ if (priv->virtq_db_addr == MAP_FAILED) {
+ DRV_LOG(ERR, "Failed to map doorbell page %u.", errno);
+ priv->virtq_db_addr = NULL;
+ rte_errno = errno;
+ return -rte_errno;
+ }
+ DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.",
+ priv->virtq_db_addr);
+ priv->td = mlx5_devx_cmd_create_td(ctx);
+ if (!priv->td) {
+ DRV_LOG(ERR, "Failed to create transport domain.");
+ rte_errno = errno;
+ return -rte_errno;
+ }
+ tis_attr.transport_domain = priv->td->id;
+ for (i = 0; i < priv->num_lag_ports; i++) {
+ /* 0 is auto affinity, non-zero value to propose port. */
+ tis_attr.lag_tx_port_affinity = i + 1;
+ priv->tiss[i] = mlx5_devx_cmd_create_tis(ctx, &tis_attr);
+ if (!priv->tiss[i]) {
+ DRV_LOG(ERR, "Failed to create TIS %u.", i);
+ return -rte_errno;
+ }
+ }
+ priv->null_mr = mlx5_glue->alloc_null_mr(priv->cdev->pd);
+ if (!priv->null_mr) {
+ DRV_LOG(ERR, "Failed to allocate null MR.");
+ rte_errno = errno;
+ return -rte_errno;
+ }
+ DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey);
+#ifdef HAVE_MLX5DV_DR
+ priv->steer.domain = mlx5_glue->dr_create_domain(ctx,
+ MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
+ if (!priv->steer.domain) {
+ DRV_LOG(ERR, "Failed to create Rx domain.");
+ rte_errno = errno;
+ return -rte_errno;
+ }
+#endif
+ priv->steer.tbl = mlx5_glue->dr_create_flow_tbl(priv->steer.domain, 0);
+ if (!priv->steer.tbl) {
+ DRV_LOG(ERR, "Failed to create table 0 with Rx domain.");
+ rte_errno = errno;
+ return -rte_errno;
+ }
+ if (mlx5_vdpa_err_event_setup(priv) != 0)
+ return -rte_errno;
+ if (mlx5_vdpa_event_qp_global_prepare(priv))
+ return -rte_errno;
+ return 0;
+}
+
static int
mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev)
{
struct mlx5_vdpa_priv *priv = NULL;
struct mlx5_hca_attr *attr = &cdev->config.hca_attr;
- int retry;
if (!attr->vdpa.valid || !attr->vdpa.max_num_virtio_queues) {
DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe "
@@ -533,25 +611,10 @@ mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev)
priv->num_lag_ports = attr->num_lag_ports;
if (attr->num_lag_ports == 0)
priv->num_lag_ports = 1;
+ pthread_mutex_init(&priv->vq_config_lock, NULL);
priv->cdev = cdev;
- for (retry = 0; retry < 7; retry++) {
- priv->var = mlx5_glue->dv_alloc_var(priv->cdev->ctx, 0);
- if (priv->var != NULL)
- break;
- DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.\n", retry);
- /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */
- usleep(100000U << retry);
- }
- if (!priv->var) {
- DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
+ if (mlx5_vdpa_create_dev_resources(priv))
goto error;
- }
- priv->err_intr_handle =
- rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
- if (priv->err_intr_handle == NULL) {
- DRV_LOG(ERR, "Fail to allocate intr_handle");
- goto error;
- }
priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops);
if (priv->vdev == NULL) {
DRV_LOG(ERR, "Failed to register vDPA device.");
@@ -560,19 +623,13 @@ mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev)
}
mlx5_vdpa_config_get(cdev->dev->devargs, priv);
SLIST_INIT(&priv->mr_list);
- pthread_mutex_init(&priv->vq_config_lock, NULL);
pthread_mutex_lock(&priv_list_lock);
TAILQ_INSERT_TAIL(&priv_list, priv, next);
pthread_mutex_unlock(&priv_list_lock);
return 0;
-
error:
- if (priv) {
- if (priv->var)
- mlx5_glue->dv_free_var(priv->var);
- rte_intr_instance_free(priv->err_intr_handle);
- rte_free(priv);
- }
+ if (priv)
+ mlx5_vdpa_dev_release(priv);
return -rte_errno;
}
@@ -592,22 +649,48 @@ mlx5_vdpa_dev_remove(struct mlx5_common_device *cdev)
if (found)
TAILQ_REMOVE(&priv_list, priv, next);
pthread_mutex_unlock(&priv_list_lock);
- if (found) {
- if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
- mlx5_vdpa_dev_close(priv->vid);
- if (priv->var) {
- mlx5_glue->dv_free_var(priv->var);
- priv->var = NULL;
- }
- if (priv->vdev)
- rte_vdpa_unregister_device(priv->vdev);
- pthread_mutex_destroy(&priv->vq_config_lock);
- rte_intr_instance_free(priv->err_intr_handle);
- rte_free(priv);
- }
+ if (found)
+ mlx5_vdpa_dev_release(priv);
return 0;
}
+static void
+mlx5_vdpa_release_dev_resources(struct mlx5_vdpa_priv *priv)
+{
+ uint32_t i;
+
+ mlx5_vdpa_event_qp_global_release(priv);
+ mlx5_vdpa_err_event_unset(priv);
+ if (priv->steer.tbl)
+ claim_zero(mlx5_glue->dr_destroy_flow_tbl(priv->steer.tbl));
+ if (priv->steer.domain)
+ claim_zero(mlx5_glue->dr_destroy_domain(priv->steer.domain));
+ if (priv->null_mr)
+ claim_zero(mlx5_glue->dereg_mr(priv->null_mr));
+ for (i = 0; i < priv->num_lag_ports; i++) {
+ if (priv->tiss[i])
+ claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
+ }
+ if (priv->td)
+ claim_zero(mlx5_devx_cmd_destroy(priv->td));
+ if (priv->virtq_db_addr)
+ claim_zero(munmap(priv->virtq_db_addr, priv->var->length));
+ if (priv->var)
+ mlx5_glue->dv_free_var(priv->var);
+}
+
+static void
+mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv)
+{
+ if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
+ mlx5_vdpa_dev_close(priv->vid);
+ mlx5_vdpa_release_dev_resources(priv);
+ if (priv->vdev)
+ rte_vdpa_unregister_device(priv->vdev);
+ pthread_mutex_destroy(&priv->vq_config_lock);
+ rte_free(priv);
+}
+
static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
{
RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
@@ -233,6 +233,15 @@ int mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n,
*/
void mlx5_vdpa_event_qp_destroy(struct mlx5_vdpa_event_qp *eqp);
+/**
+ * Create all the event global resources.
+ *
+ * @param[in] priv
+ * The vdpa driver private structure.
+ */
+int
+mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv);
+
/**
* Release all the event global resources.
*
@@ -40,11 +40,9 @@ mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
}
/* Prepare all the global resources for all the event objects.*/
-static int
+int
mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv)
{
- if (priv->eventc)
- return 0;
priv->eventc = mlx5_os_devx_create_event_channel(priv->cdev->ctx,
MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA);
if (!priv->eventc) {
@@ -389,22 +387,30 @@ mlx5_vdpa_err_event_setup(struct mlx5_vdpa_priv *priv)
flags = fcntl(priv->err_chnl->fd, F_GETFL);
ret = fcntl(priv->err_chnl->fd, F_SETFL, flags | O_NONBLOCK);
if (ret) {
+ rte_errno = errno;
DRV_LOG(ERR, "Failed to change device event channel FD.");
goto error;
}
-
+ priv->err_intr_handle =
+ rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
+ if (priv->err_intr_handle == NULL) {
+ DRV_LOG(ERR, "Fail to allocate intr_handle");
+ goto error;
+ }
if (rte_intr_fd_set(priv->err_intr_handle, priv->err_chnl->fd))
goto error;
if (rte_intr_type_set(priv->err_intr_handle, RTE_INTR_HANDLE_EXT))
goto error;
- if (rte_intr_callback_register(priv->err_intr_handle,
- mlx5_vdpa_err_interrupt_handler,
- priv)) {
+ ret = rte_intr_callback_register(priv->err_intr_handle,
+ mlx5_vdpa_err_interrupt_handler,
+ priv);
+ if (ret != 0) {
rte_intr_fd_set(priv->err_intr_handle, 0);
DRV_LOG(ERR, "Failed to register error interrupt for device %d.",
priv->vid);
+ rte_errno = -ret;
goto error;
} else {
DRV_LOG(DEBUG, "Registered error interrupt for device%d.",
@@ -453,6 +459,7 @@ mlx5_vdpa_err_event_unset(struct mlx5_vdpa_priv *priv)
mlx5_glue->devx_destroy_event_channel(priv->err_chnl);
priv->err_chnl = NULL;
}
+ rte_intr_instance_free(priv->err_intr_handle);
}
int
@@ -575,8 +582,6 @@ mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n,
uint16_t log_desc_n = rte_log2_u32(desc_n);
uint32_t ret;
- if (mlx5_vdpa_event_qp_global_prepare(priv))
- return -1;
if (mlx5_vdpa_cq_create(priv, log_desc_n, callfd, &eqp->cq))
return -1;
attr.pd = priv->cdev->pdn;
@@ -34,10 +34,6 @@ mlx5_vdpa_mem_dereg(struct mlx5_vdpa_priv *priv)
SLIST_INIT(&priv->mr_list);
if (priv->lm_mr.addr)
mlx5_os_wrapped_mkey_destroy(&priv->lm_mr);
- if (priv->null_mr) {
- claim_zero(mlx5_glue->dereg_mr(priv->null_mr));
- priv->null_mr = NULL;
- }
if (priv->vmem) {
free(priv->vmem);
priv->vmem = NULL;
@@ -196,13 +192,6 @@ mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv)
if (!mem)
return -rte_errno;
priv->vmem = mem;
- priv->null_mr = mlx5_glue->alloc_null_mr(priv->cdev->pd);
- if (!priv->null_mr) {
- DRV_LOG(ERR, "Failed to allocate null MR.");
- ret = -errno;
- goto error;
- }
- DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey);
for (i = 0; i < mem->nregions; i++) {
reg = &mem->regions[i];
entry = rte_zmalloc(__func__, sizeof(*entry), 0);
@@ -45,14 +45,6 @@ void
mlx5_vdpa_steer_unset(struct mlx5_vdpa_priv *priv)
{
mlx5_vdpa_rss_flows_destroy(priv);
- if (priv->steer.tbl) {
- claim_zero(mlx5_glue->dr_destroy_flow_tbl(priv->steer.tbl));
- priv->steer.tbl = NULL;
- }
- if (priv->steer.domain) {
- claim_zero(mlx5_glue->dr_destroy_domain(priv->steer.domain));
- priv->steer.domain = NULL;
- }
if (priv->steer.rqt) {
claim_zero(mlx5_devx_cmd_destroy(priv->steer.rqt));
priv->steer.rqt = NULL;
@@ -248,11 +240,7 @@ mlx5_vdpa_steer_update(struct mlx5_vdpa_priv *priv)
int ret = mlx5_vdpa_rqt_prepare(priv);
if (ret == 0) {
- mlx5_vdpa_rss_flows_destroy(priv);
- if (priv->steer.rqt) {
- claim_zero(mlx5_devx_cmd_destroy(priv->steer.rqt));
- priv->steer.rqt = NULL;
- }
+ mlx5_vdpa_steer_unset(priv);
} else if (ret < 0) {
return ret;
} else if (!priv->steer.rss[0].flow) {
@@ -268,26 +256,10 @@ mlx5_vdpa_steer_update(struct mlx5_vdpa_priv *priv)
int
mlx5_vdpa_steer_setup(struct mlx5_vdpa_priv *priv)
{
-#ifdef HAVE_MLX5DV_DR
- priv->steer.domain = mlx5_glue->dr_create_domain(priv->cdev->ctx,
- MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
- if (!priv->steer.domain) {
- DRV_LOG(ERR, "Failed to create Rx domain.");
- goto error;
- }
- priv->steer.tbl = mlx5_glue->dr_create_flow_tbl(priv->steer.domain, 0);
- if (!priv->steer.tbl) {
- DRV_LOG(ERR, "Failed to create table 0 with Rx domain.");
- goto error;
- }
if (mlx5_vdpa_steer_update(priv))
goto error;
return 0;
error:
mlx5_vdpa_steer_unset(priv);
return -1;
-#else
- (void)priv;
- return -ENOTSUP;
-#endif /* HAVE_MLX5DV_DR */
}
@@ -3,7 +3,6 @@
*/
#include <string.h>
#include <unistd.h>
-#include <sys/mman.h>
#include <sys/eventfd.h>
#include <rte_malloc.h>
@@ -120,20 +119,6 @@ mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv)
if (virtq->counters)
claim_zero(mlx5_devx_cmd_destroy(virtq->counters));
}
- for (i = 0; i < priv->num_lag_ports; i++) {
- if (priv->tiss[i]) {
- claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
- priv->tiss[i] = NULL;
- }
- }
- if (priv->td) {
- claim_zero(mlx5_devx_cmd_destroy(priv->td));
- priv->td = NULL;
- }
- if (priv->virtq_db_addr) {
- claim_zero(munmap(priv->virtq_db_addr, priv->var->length));
- priv->virtq_db_addr = NULL;
- }
priv->features = 0;
memset(priv->virtqs, 0, sizeof(*virtq) * priv->nr_virtqs);
priv->nr_virtqs = 0;
@@ -462,8 +447,6 @@ mlx5_vdpa_features_validate(struct mlx5_vdpa_priv *priv)
int
mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv)
{
- struct mlx5_devx_tis_attr tis_attr = {0};
- struct ibv_context *ctx = priv->cdev->ctx;
uint32_t i;
uint16_t nr_vring = rte_vhost_get_vring_num(priv->vid);
int ret = rte_vhost_get_negotiated_features(priv->vid, &priv->features);
@@ -485,33 +468,6 @@ mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv)
(int)nr_vring);
return -1;
}
- /* Always map the entire page. */
- priv->virtq_db_addr = mmap(NULL, priv->var->length, PROT_READ |
- PROT_WRITE, MAP_SHARED, ctx->cmd_fd,
- priv->var->mmap_off);
- if (priv->virtq_db_addr == MAP_FAILED) {
- DRV_LOG(ERR, "Failed to map doorbell page %u.", errno);
- priv->virtq_db_addr = NULL;
- goto error;
- } else {
- DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.",
- priv->virtq_db_addr);
- }
- priv->td = mlx5_devx_cmd_create_td(ctx);
- if (!priv->td) {
- DRV_LOG(ERR, "Failed to create transport domain.");
- return -rte_errno;
- }
- tis_attr.transport_domain = priv->td->id;
- for (i = 0; i < priv->num_lag_ports; i++) {
- /* 0 is auto affinity, non-zero value to propose port. */
- tis_attr.lag_tx_port_affinity = i + 1;
- priv->tiss[i] = mlx5_devx_cmd_create_tis(ctx, &tis_attr);
- if (!priv->tiss[i]) {
- DRV_LOG(ERR, "Failed to create TIS %u.", i);
- goto error;
- }
- }
priv->nr_virtqs = nr_vring;
for (i = 0; i < nr_vring; i++)
if (priv->virtqs[i].enable && mlx5_vdpa_virtq_setup(priv, i))
To speed up device resume, create reuseable resources during device probe state, release when device remove. Reused resources includes TIS, TD, VAR Doorbell mmap, error handling event channel and interrupt handler, UAR, Rx event channel, NULL MR, steer domain and table. Signed-off-by: Xueming Li <xuemingl@nvidia.com> --- drivers/vdpa/mlx5/mlx5_vdpa.c | 167 +++++++++++++++++++++------- drivers/vdpa/mlx5/mlx5_vdpa.h | 9 ++ drivers/vdpa/mlx5/mlx5_vdpa_event.c | 23 ++-- drivers/vdpa/mlx5/mlx5_vdpa_mem.c | 11 -- drivers/vdpa/mlx5/mlx5_vdpa_steer.c | 30 +---- drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 44 -------- 6 files changed, 149 insertions(+), 135 deletions(-)