[1/2] common/mlx5: add device duplication function

Message ID 20250313083351.25559-1-getelson@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series [1/2] common/mlx5: add device duplication function |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Gregory Etelson March 13, 2025, 8:33 a.m. UTC
From: Michael Baum <michaelba@nvidia.com>

Add function for creating additional CTX for same device base on
existing CTX.

Signed-off-by: Michael Baum <michaelba@nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_common_os.c   | 52 ++++++++++++++++++++
 drivers/common/mlx5/mlx5_common.h            |  4 ++
 drivers/common/mlx5/version.map              |  1 +
 drivers/common/mlx5/windows/mlx5_common_os.c | 31 ++++++++++++
 4 files changed, 88 insertions(+)
  

Comments

Matan Azrad March 17, 2025, 10:47 a.m. UTC | #1
> From: Michael Baum <michaelba@nvidia.com>
> 
> Add function for creating additional CTX for same device base on existing CTX.
> 
> Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>


> ---
>  drivers/common/mlx5/linux/mlx5_common_os.c   | 52
> ++++++++++++++++++++
>  drivers/common/mlx5/mlx5_common.h            |  4 ++
>  drivers/common/mlx5/version.map              |  1 +
>  drivers/common/mlx5/windows/mlx5_common_os.c | 31 ++++++++++++
>  4 files changed, 88 insertions(+)
> 
> diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c
> b/drivers/common/mlx5/linux/mlx5_common_os.c
> index 41345e1597..2f5032f0da 100644
> --- a/drivers/common/mlx5/linux/mlx5_common_os.c
> +++ b/drivers/common/mlx5/linux/mlx5_common_os.c
> @@ -873,6 +873,58 @@ mlx5_os_open_device(struct mlx5_common_device
> *cdev, uint32_t classes)
>  	return 0;
>  }
> 
> +/**
> + * API function to obtain a new InfiniBand (IB) context for a given common
> device.
> + *
> + * This function provides a port-agnostic IB context for a physical
> +device, enabling the
> + * device to create and manage resources that can be initialized when a
> +port starts and
> + * released when another port stops.
> + *
> + * For Linux, it imports new context from the existing context.
> + *
> + * @param cdev
> + *   Pointer to the mlx5 device structure.
> + *
> + * @return
> + *   Pointer to an `ibv_context` on success, or NULL on failure, with `rte_errno`
> set.
> + */
> +void *
> +mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev) {
> +	struct ibv_context *ctx = NULL;
> +	int cmd_fd = ((struct ibv_context *)cdev->ctx)->cmd_fd;
> +	int new_cmd_fd;
> +
> +	/*
> +	 * Duplicate the command FD to pass it as input to the import device
> function.
> +	 * If the import function succeeds, the new device context takes
> ownership of
> +	 * this FD, which will be freed when the new device is closed.
> +	 * If the import function fails, we are responsible for closing this FD.
> +	 */
> +	new_cmd_fd = dup(cmd_fd);
> +	if (new_cmd_fd < 0) {
> +		DRV_LOG(ERR,
> +			"Failed to duplicate FD %d for IB device \"%s\": %s",
> +			cmd_fd, mlx5_os_get_ctx_device_name(cdev->ctx),
> +			rte_strerror(errno));
> +		rte_errno = errno;
> +		return NULL;
> +	}
> +	/* Attempt to import the duplicated FD to create a new device context.
> */
> +	ctx = mlx5_glue->import_device(new_cmd_fd);
> +	if (!ctx) {
> +		DRV_LOG(ERR, "Failed to import IB device \"%s\": %s",
> +			mlx5_os_get_ctx_device_name(cdev->ctx),
> +			rte_strerror(errno));
> +		close(new_cmd_fd);
> +		rte_errno = errno;
> +		return NULL;
> +	}
> +	DRV_LOG(INFO, "IB device \"%s\" successfully imported, old_fd=%d,
> new_fd=%d",
> +		mlx5_os_get_ctx_device_name(cdev->ctx), cmd_fd,
> new_cmd_fd);
> +	return (void *)ctx;
> +}
> +
>  int
>  mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t
> len)  { diff --git a/drivers/common/mlx5/mlx5_common.h
> b/drivers/common/mlx5/mlx5_common.h
> index e7bd4c6ec4..bea1382911 100644
> --- a/drivers/common/mlx5/mlx5_common.h
> +++ b/drivers/common/mlx5/mlx5_common.h
> @@ -651,6 +651,10 @@ mlx5_devx_uar_release(struct mlx5_uar *uar);
> 
>  /* mlx5_common_os.c */
> 
> +__rte_internal
> +void *
> +mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev);
> +
>  int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t
> classes);  int mlx5_os_pd_prepare(struct mlx5_common_device *cdev);  int
> mlx5_os_pd_release(struct mlx5_common_device *cdev); diff --git
> a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
> index 8301485839..819e6b96cb 100644
> --- a/drivers/common/mlx5/version.map
> +++ b/drivers/common/mlx5/version.map
> @@ -150,6 +150,7 @@ INTERNAL {
>  	mlx5_nl_rdma_monitor_info_get; # WINDOWS_NO_EXPORT
>  	mlx5_nl_rdma_monitor_cap_get; # WINDOWS_NO_EXPORT
> 
> +	mlx5_os_get_physical_device_ctx;
>  	mlx5_os_umem_dereg;
>  	mlx5_os_umem_reg;
> 
> diff --git a/drivers/common/mlx5/windows/mlx5_common_os.c
> b/drivers/common/mlx5/windows/mlx5_common_os.c
> index 2b6058eb3e..68f1981193 100644
> --- a/drivers/common/mlx5/windows/mlx5_common_os.c
> +++ b/drivers/common/mlx5/windows/mlx5_common_os.c
> @@ -266,6 +266,37 @@ mlx5_os_open_device(struct mlx5_common_device
> *cdev, uint32_t classes)
>  	return -rte_errno;
>  }
> 
> +/**
> + * API function to obtain a new MLX5 context for a given common device.
> + *
> + * This function provides a port-agnostic context for a physical
> +device, enabling the
> + * device to create and manage resources that can be initialized when a
> +port starts and
> + * released when another port stops.
> + *
> + * For Windows, it creates a new context for the device regardless to existing
> context.
> + *
> + * @param cdev
> + *   Pointer to the mlx5 device structure.
> + *
> + * @return
> + *   Pointer to an `ibv_context` on success, or NULL on failure, with `rte_errno`
> set.
> + */
> +void *
> +mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev) {
> +	struct mlx5_common_device temp = {
> +		.dev = cdev->dev,
> +	};
> +
> +	if (mlx5_os_open_device(&temp, MLX5_CLASS_ETH) < 0) {
> +		DRV_LOG(ERR, "Failed to duplicate DevX device \"%s\": %s",
> +			mlx5_os_get_ctx_device_name(cdev->ctx),
> +			rte_strerror(rte_errno));
> +		return NULL;
> +	}
> +	return (void *)temp.ctx;
> +}
> +
>  /**
>   * Register umem.
>   *
> --
> 2.45.2
  
Raslan Darawsheh March 18, 2025, 10:38 a.m. UTC | #2
Hi,

From: Gregory Etelson <getelson@nvidia.com>
Sent: Thursday, March 13, 2025 10:33 AM
To: dev@dpdk.org
Cc: Gregory Etelson; Maayan Kashani; Raslan Darawsheh; Michael Baum; Dariusz Sosnowski; Slava Ovsiienko; Bing Zhao; Ori Kam; Suanming Mou; Matan Azrad
Subject: [PATCH 1/2] common/mlx5: add device duplication function

From: Michael Baum <michaelba@nvidia.com>

Add function for creating additional CTX for same device base on
existing CTX.

Signed-off-by: Michael Baum <michaelba@nvidia.com>


Series applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh
  

Patch

diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c
index 41345e1597..2f5032f0da 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -873,6 +873,58 @@  mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
 	return 0;
 }
 
+/**
+ * API function to obtain a new InfiniBand (IB) context for a given common device.
+ *
+ * This function provides a port-agnostic IB context for a physical device, enabling the
+ * device to create and manage resources that can be initialized when a port starts and
+ * released when another port stops.
+ *
+ * For Linux, it imports new context from the existing context.
+ *
+ * @param cdev
+ *   Pointer to the mlx5 device structure.
+ *
+ * @return
+ *   Pointer to an `ibv_context` on success, or NULL on failure, with `rte_errno` set.
+ */
+void *
+mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev)
+{
+	struct ibv_context *ctx = NULL;
+	int cmd_fd = ((struct ibv_context *)cdev->ctx)->cmd_fd;
+	int new_cmd_fd;
+
+	/*
+	 * Duplicate the command FD to pass it as input to the import device function.
+	 * If the import function succeeds, the new device context takes ownership of
+	 * this FD, which will be freed when the new device is closed.
+	 * If the import function fails, we are responsible for closing this FD.
+	 */
+	new_cmd_fd = dup(cmd_fd);
+	if (new_cmd_fd < 0) {
+		DRV_LOG(ERR,
+			"Failed to duplicate FD %d for IB device \"%s\": %s",
+			cmd_fd, mlx5_os_get_ctx_device_name(cdev->ctx),
+			rte_strerror(errno));
+		rte_errno = errno;
+		return NULL;
+	}
+	/* Attempt to import the duplicated FD to create a new device context. */
+	ctx = mlx5_glue->import_device(new_cmd_fd);
+	if (!ctx) {
+		DRV_LOG(ERR, "Failed to import IB device \"%s\": %s",
+			mlx5_os_get_ctx_device_name(cdev->ctx),
+			rte_strerror(errno));
+		close(new_cmd_fd);
+		rte_errno = errno;
+		return NULL;
+	}
+	DRV_LOG(INFO, "IB device \"%s\" successfully imported, old_fd=%d, new_fd=%d",
+		mlx5_os_get_ctx_device_name(cdev->ctx), cmd_fd, new_cmd_fd);
+	return (void *)ctx;
+}
+
 int
 mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len)
 {
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index e7bd4c6ec4..bea1382911 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -651,6 +651,10 @@  mlx5_devx_uar_release(struct mlx5_uar *uar);
 
 /* mlx5_common_os.c */
 
+__rte_internal
+void *
+mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev);
+
 int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes);
 int mlx5_os_pd_prepare(struct mlx5_common_device *cdev);
 int mlx5_os_pd_release(struct mlx5_common_device *cdev);
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 8301485839..819e6b96cb 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -150,6 +150,7 @@  INTERNAL {
 	mlx5_nl_rdma_monitor_info_get; # WINDOWS_NO_EXPORT
 	mlx5_nl_rdma_monitor_cap_get; # WINDOWS_NO_EXPORT
 
+	mlx5_os_get_physical_device_ctx;
 	mlx5_os_umem_dereg;
 	mlx5_os_umem_reg;
 
diff --git a/drivers/common/mlx5/windows/mlx5_common_os.c b/drivers/common/mlx5/windows/mlx5_common_os.c
index 2b6058eb3e..68f1981193 100644
--- a/drivers/common/mlx5/windows/mlx5_common_os.c
+++ b/drivers/common/mlx5/windows/mlx5_common_os.c
@@ -266,6 +266,37 @@  mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
 	return -rte_errno;
 }
 
+/**
+ * API function to obtain a new MLX5 context for a given common device.
+ *
+ * This function provides a port-agnostic context for a physical device, enabling the
+ * device to create and manage resources that can be initialized when a port starts and
+ * released when another port stops.
+ *
+ * For Windows, it creates a new context for the device regardless to existing context.
+ *
+ * @param cdev
+ *   Pointer to the mlx5 device structure.
+ *
+ * @return
+ *   Pointer to an `ibv_context` on success, or NULL on failure, with `rte_errno` set.
+ */
+void *
+mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev)
+{
+	struct mlx5_common_device temp = {
+		.dev = cdev->dev,
+	};
+
+	if (mlx5_os_open_device(&temp, MLX5_CLASS_ETH) < 0) {
+		DRV_LOG(ERR, "Failed to duplicate DevX device \"%s\": %s",
+			mlx5_os_get_ctx_device_name(cdev->ctx),
+			rte_strerror(rte_errno));
+		return NULL;
+	}
+	return (void *)temp.ctx;
+}
+
 /**
  * Register umem.
  *