[1/2] common/mlx5: add device duplication function
Checks
Commit Message
From: Michael Baum <michaelba@nvidia.com>
Add function for creating additional CTX for same device base on
existing CTX.
Signed-off-by: Michael Baum <michaelba@nvidia.com>
---
drivers/common/mlx5/linux/mlx5_common_os.c | 52 ++++++++++++++++++++
drivers/common/mlx5/mlx5_common.h | 4 ++
drivers/common/mlx5/version.map | 1 +
drivers/common/mlx5/windows/mlx5_common_os.c | 31 ++++++++++++
4 files changed, 88 insertions(+)
Comments
> From: Michael Baum <michaelba@nvidia.com>
>
> Add function for creating additional CTX for same device base on existing CTX.
>
> Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
> ---
> drivers/common/mlx5/linux/mlx5_common_os.c | 52
> ++++++++++++++++++++
> drivers/common/mlx5/mlx5_common.h | 4 ++
> drivers/common/mlx5/version.map | 1 +
> drivers/common/mlx5/windows/mlx5_common_os.c | 31 ++++++++++++
> 4 files changed, 88 insertions(+)
>
> diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c
> b/drivers/common/mlx5/linux/mlx5_common_os.c
> index 41345e1597..2f5032f0da 100644
> --- a/drivers/common/mlx5/linux/mlx5_common_os.c
> +++ b/drivers/common/mlx5/linux/mlx5_common_os.c
> @@ -873,6 +873,58 @@ mlx5_os_open_device(struct mlx5_common_device
> *cdev, uint32_t classes)
> return 0;
> }
>
> +/**
> + * API function to obtain a new InfiniBand (IB) context for a given common
> device.
> + *
> + * This function provides a port-agnostic IB context for a physical
> +device, enabling the
> + * device to create and manage resources that can be initialized when a
> +port starts and
> + * released when another port stops.
> + *
> + * For Linux, it imports new context from the existing context.
> + *
> + * @param cdev
> + * Pointer to the mlx5 device structure.
> + *
> + * @return
> + * Pointer to an `ibv_context` on success, or NULL on failure, with `rte_errno`
> set.
> + */
> +void *
> +mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev) {
> + struct ibv_context *ctx = NULL;
> + int cmd_fd = ((struct ibv_context *)cdev->ctx)->cmd_fd;
> + int new_cmd_fd;
> +
> + /*
> + * Duplicate the command FD to pass it as input to the import device
> function.
> + * If the import function succeeds, the new device context takes
> ownership of
> + * this FD, which will be freed when the new device is closed.
> + * If the import function fails, we are responsible for closing this FD.
> + */
> + new_cmd_fd = dup(cmd_fd);
> + if (new_cmd_fd < 0) {
> + DRV_LOG(ERR,
> + "Failed to duplicate FD %d for IB device \"%s\": %s",
> + cmd_fd, mlx5_os_get_ctx_device_name(cdev->ctx),
> + rte_strerror(errno));
> + rte_errno = errno;
> + return NULL;
> + }
> + /* Attempt to import the duplicated FD to create a new device context.
> */
> + ctx = mlx5_glue->import_device(new_cmd_fd);
> + if (!ctx) {
> + DRV_LOG(ERR, "Failed to import IB device \"%s\": %s",
> + mlx5_os_get_ctx_device_name(cdev->ctx),
> + rte_strerror(errno));
> + close(new_cmd_fd);
> + rte_errno = errno;
> + return NULL;
> + }
> + DRV_LOG(INFO, "IB device \"%s\" successfully imported, old_fd=%d,
> new_fd=%d",
> + mlx5_os_get_ctx_device_name(cdev->ctx), cmd_fd,
> new_cmd_fd);
> + return (void *)ctx;
> +}
> +
> int
> mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t
> len) { diff --git a/drivers/common/mlx5/mlx5_common.h
> b/drivers/common/mlx5/mlx5_common.h
> index e7bd4c6ec4..bea1382911 100644
> --- a/drivers/common/mlx5/mlx5_common.h
> +++ b/drivers/common/mlx5/mlx5_common.h
> @@ -651,6 +651,10 @@ mlx5_devx_uar_release(struct mlx5_uar *uar);
>
> /* mlx5_common_os.c */
>
> +__rte_internal
> +void *
> +mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev);
> +
> int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t
> classes); int mlx5_os_pd_prepare(struct mlx5_common_device *cdev); int
> mlx5_os_pd_release(struct mlx5_common_device *cdev); diff --git
> a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
> index 8301485839..819e6b96cb 100644
> --- a/drivers/common/mlx5/version.map
> +++ b/drivers/common/mlx5/version.map
> @@ -150,6 +150,7 @@ INTERNAL {
> mlx5_nl_rdma_monitor_info_get; # WINDOWS_NO_EXPORT
> mlx5_nl_rdma_monitor_cap_get; # WINDOWS_NO_EXPORT
>
> + mlx5_os_get_physical_device_ctx;
> mlx5_os_umem_dereg;
> mlx5_os_umem_reg;
>
> diff --git a/drivers/common/mlx5/windows/mlx5_common_os.c
> b/drivers/common/mlx5/windows/mlx5_common_os.c
> index 2b6058eb3e..68f1981193 100644
> --- a/drivers/common/mlx5/windows/mlx5_common_os.c
> +++ b/drivers/common/mlx5/windows/mlx5_common_os.c
> @@ -266,6 +266,37 @@ mlx5_os_open_device(struct mlx5_common_device
> *cdev, uint32_t classes)
> return -rte_errno;
> }
>
> +/**
> + * API function to obtain a new MLX5 context for a given common device.
> + *
> + * This function provides a port-agnostic context for a physical
> +device, enabling the
> + * device to create and manage resources that can be initialized when a
> +port starts and
> + * released when another port stops.
> + *
> + * For Windows, it creates a new context for the device regardless to existing
> context.
> + *
> + * @param cdev
> + * Pointer to the mlx5 device structure.
> + *
> + * @return
> + * Pointer to an `ibv_context` on success, or NULL on failure, with `rte_errno`
> set.
> + */
> +void *
> +mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev) {
> + struct mlx5_common_device temp = {
> + .dev = cdev->dev,
> + };
> +
> + if (mlx5_os_open_device(&temp, MLX5_CLASS_ETH) < 0) {
> + DRV_LOG(ERR, "Failed to duplicate DevX device \"%s\": %s",
> + mlx5_os_get_ctx_device_name(cdev->ctx),
> + rte_strerror(rte_errno));
> + return NULL;
> + }
> + return (void *)temp.ctx;
> +}
> +
> /**
> * Register umem.
> *
> --
> 2.45.2
Hi,
From: Gregory Etelson <getelson@nvidia.com>
Sent: Thursday, March 13, 2025 10:33 AM
To: dev@dpdk.org
Cc: Gregory Etelson; Maayan Kashani; Raslan Darawsheh; Michael Baum; Dariusz Sosnowski; Slava Ovsiienko; Bing Zhao; Ori Kam; Suanming Mou; Matan Azrad
Subject: [PATCH 1/2] common/mlx5: add device duplication function
From: Michael Baum <michaelba@nvidia.com>
Add function for creating additional CTX for same device base on
existing CTX.
Signed-off-by: Michael Baum <michaelba@nvidia.com>
Series applied to next-net-mlx,
Kindest regards,
Raslan Darawsheh
@@ -873,6 +873,58 @@ mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
return 0;
}
+/**
+ * API function to obtain a new InfiniBand (IB) context for a given common device.
+ *
+ * This function provides a port-agnostic IB context for a physical device, enabling the
+ * device to create and manage resources that can be initialized when a port starts and
+ * released when another port stops.
+ *
+ * For Linux, it imports new context from the existing context.
+ *
+ * @param cdev
+ * Pointer to the mlx5 device structure.
+ *
+ * @return
+ * Pointer to an `ibv_context` on success, or NULL on failure, with `rte_errno` set.
+ */
+void *
+mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev)
+{
+ struct ibv_context *ctx = NULL;
+ int cmd_fd = ((struct ibv_context *)cdev->ctx)->cmd_fd;
+ int new_cmd_fd;
+
+ /*
+ * Duplicate the command FD to pass it as input to the import device function.
+ * If the import function succeeds, the new device context takes ownership of
+ * this FD, which will be freed when the new device is closed.
+ * If the import function fails, we are responsible for closing this FD.
+ */
+ new_cmd_fd = dup(cmd_fd);
+ if (new_cmd_fd < 0) {
+ DRV_LOG(ERR,
+ "Failed to duplicate FD %d for IB device \"%s\": %s",
+ cmd_fd, mlx5_os_get_ctx_device_name(cdev->ctx),
+ rte_strerror(errno));
+ rte_errno = errno;
+ return NULL;
+ }
+ /* Attempt to import the duplicated FD to create a new device context. */
+ ctx = mlx5_glue->import_device(new_cmd_fd);
+ if (!ctx) {
+ DRV_LOG(ERR, "Failed to import IB device \"%s\": %s",
+ mlx5_os_get_ctx_device_name(cdev->ctx),
+ rte_strerror(errno));
+ close(new_cmd_fd);
+ rte_errno = errno;
+ return NULL;
+ }
+ DRV_LOG(INFO, "IB device \"%s\" successfully imported, old_fd=%d, new_fd=%d",
+ mlx5_os_get_ctx_device_name(cdev->ctx), cmd_fd, new_cmd_fd);
+ return (void *)ctx;
+}
+
int
mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len)
{
@@ -651,6 +651,10 @@ mlx5_devx_uar_release(struct mlx5_uar *uar);
/* mlx5_common_os.c */
+__rte_internal
+void *
+mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev);
+
int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes);
int mlx5_os_pd_prepare(struct mlx5_common_device *cdev);
int mlx5_os_pd_release(struct mlx5_common_device *cdev);
@@ -150,6 +150,7 @@ INTERNAL {
mlx5_nl_rdma_monitor_info_get; # WINDOWS_NO_EXPORT
mlx5_nl_rdma_monitor_cap_get; # WINDOWS_NO_EXPORT
+ mlx5_os_get_physical_device_ctx;
mlx5_os_umem_dereg;
mlx5_os_umem_reg;
@@ -266,6 +266,37 @@ mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
return -rte_errno;
}
+/**
+ * API function to obtain a new MLX5 context for a given common device.
+ *
+ * This function provides a port-agnostic context for a physical device, enabling the
+ * device to create and manage resources that can be initialized when a port starts and
+ * released when another port stops.
+ *
+ * For Windows, it creates a new context for the device regardless to existing context.
+ *
+ * @param cdev
+ * Pointer to the mlx5 device structure.
+ *
+ * @return
+ * Pointer to an `ibv_context` on success, or NULL on failure, with `rte_errno` set.
+ */
+void *
+mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev)
+{
+ struct mlx5_common_device temp = {
+ .dev = cdev->dev,
+ };
+
+ if (mlx5_os_open_device(&temp, MLX5_CLASS_ETH) < 0) {
+ DRV_LOG(ERR, "Failed to duplicate DevX device \"%s\": %s",
+ mlx5_os_get_ctx_device_name(cdev->ctx),
+ rte_strerror(rte_errno));
+ return NULL;
+ }
+ return (void *)temp.ctx;
+}
+
/**
* Register umem.
*