[RFC,03/21] common/mlx5: add context device structure

Message ID 20210817134441.1966618-4-michaelba@nvidia.com (mailing list archive)
State RFC, archived
Delegated to: Raslan Darawsheh
Headers
Series mlx5: sharing global MR cache between drivers |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Michael Baum Aug. 17, 2021, 1:44 p.m. UTC
  Add context device structure which contains ctx and pd of dievice.
In addition, provides prepare and release functions for this structure.

Signed-off-by: Michael Baum <michaelba@nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_common_os.c   | 144 ++++++++++++-
 drivers/common/mlx5/mlx5_common.c            | 166 +++++++++++++++
 drivers/common/mlx5/mlx5_common.h            |  48 +++++
 drivers/common/mlx5/version.map              |   3 +
 drivers/common/mlx5/windows/mlx5_common_os.c | 207 ++++++++++++++++++-
 5 files changed, 562 insertions(+), 6 deletions(-)
  

Patch

diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c
index 9e0c823c97..6f78897390 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -23,6 +23,22 @@ 
 const struct mlx5_glue *mlx5_glue;
 #endif
 
+/* Environment variable to control the doorbell register mapping. */
+#define MLX5_SHUT_UP_BF "MLX5_SHUT_UP_BF"
+#if defined(RTE_ARCH_ARM64)
+#define MLX5_SHUT_UP_BF_DEFAULT "0"
+#else
+#define MLX5_SHUT_UP_BF_DEFAULT "1"
+#endif
+
+/* Default PMD specific parameter value. */
+#define MLX5_TXDB_UNSET (-1)
+
+/* MLX5_TX_DB_NC supported values. */
+#define MLX5_TXDB_CACHED 0
+#define MLX5_TXDB_NCACHED 1
+#define MLX5_TXDB_HEURISTIC 2
+
 int
 mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
 {
@@ -401,6 +417,127 @@  mlx5_glue_constructor(void)
 	mlx5_glue = NULL;
 }
 
+static int
+mlx5_config_doorbell_mapping_env(int dbnc)
+{
+	char *env;
+	int value;
+
+	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	/* Get environment variable to store. */
+	env = getenv(MLX5_SHUT_UP_BF);
+	value = env ? !!strcmp(env, "0") : MLX5_TXDB_UNSET;
+	if (dbnc == MLX5_TXDB_UNSET)
+		setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1);
+	else
+		setenv(MLX5_SHUT_UP_BF,
+		       dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1);
+	return value;
+}
+
+static void
+mlx5_restore_doorbell_mapping_env(int value)
+{
+	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	/* Restore the original environment variable state. */
+	if (value == MLX5_TXDB_UNSET)
+		unsetenv(MLX5_SHUT_UP_BF);
+	else
+		setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
+}
+
+/**
+ * Function API to open IB device using DevX.
+ *
+ * This function calls the Linux glue APIs to open a device.
+ *
+ * @param dev_ctx
+ *   Pointer to the context device data structure.
+ * @param dev
+ *   Pointer to the generic device.
+ * @param dbnc
+ *   Device argument help configure the environment variable.
+ * @param classes
+ *   Chosen classes come from device arguments.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_devx_open_device(struct mlx5_dev_ctx *dev_ctx, struct rte_device *dev,
+			 int dbnc, uint32_t classes)
+{
+	struct ibv_device *ibv;
+	struct ibv_context *ctx = NULL;
+	int dbmap_env;
+
+	ibv = mlx5_os_get_ibv_dev(dev);
+	if (!ibv)
+		return -rte_errno;
+	DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);
+	/*
+	 * Configure environment variable "MLX5_BF_SHUT_UP" before the device
+	 * creation. The rdma_core library checks the variable at device
+	 * creation and stores the result internally.
+	 */
+	dbmap_env = mlx5_config_doorbell_mapping_env(dbnc);
+	/* Try to open IB device with DV. */
+	errno = 0;
+	ctx = mlx5_glue->dv_open_device(ibv);
+	/*
+	 * The environment variable is not needed anymore, all device creation
+	 * attempts are completed.
+	 */
+	mlx5_restore_doorbell_mapping_env(dbmap_env);
+	if (ctx == NULL && classes != MLX5_CLASS_ETH) {
+		DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
+		rte_errno = errno ? errno : ENODEV;
+		return -rte_errno;
+	}
+	dev_ctx->ctx = ctx;
+	return 0;
+}
+
+/**
+ * Allocate Protection Domain object and extract its pdn using DV API.
+ *
+ * @param[out] dev_ctx
+ *   Pointer to the context device data structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_pd_create(struct mlx5_dev_ctx *dev_ctx)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+	struct mlx5dv_obj obj;
+	struct mlx5dv_pd pd_info;
+	int ret;
+
+	dev_ctx->pd = mlx5_glue->alloc_pd(dev_ctx->ctx);
+	if (dev_ctx->pd == NULL) {
+		DRV_LOG(ERR, "Failed to allocate PD.");
+		return errno ? -errno : -ENOMEM;
+	}
+	obj.pd.in = dev_ctx->pd;
+	obj.pd.out = &pd_info;
+	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);
+	if (ret != 0) {
+		DRV_LOG(ERR, "Fail to get PD object info.");
+		mlx5_glue->dealloc_pd(dev_ctx->pd);
+		dev_ctx->pd = NULL;
+		return -errno;
+	}
+	dev_ctx->pdn = pd_info.pdn;
+	return 0;
+#else
+	(void)dev_ctx;
+	DRV_LOG(ERR, "Cannot get pdn - no DV support.");
+	return -ENOTSUP;
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
+}
+
 struct ibv_device *
 mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
 {
@@ -423,8 +560,13 @@  mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
 		ibv_match = ibv_list[n];
 		break;
 	}
-	if (ibv_match == NULL)
+	if (ibv_match == NULL) {
+		DRV_LOG(WARNING,
+			"No Verbs device matches PCI device " PCI_PRI_FMT ","
+			" are kernel drivers loaded?",
+			addr->domain, addr->bus, addr->devid, addr->function);
 		rte_errno = ENOENT;
+	}
 	mlx5_glue->free_device_list(ibv_list);
 	return ibv_match;
 }
diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c
index 459cf4bcc4..be3d0f2627 100644
--- a/drivers/common/mlx5/mlx5_common.c
+++ b/drivers/common/mlx5/mlx5_common.c
@@ -41,6 +41,20 @@  static inline void mlx5_cpu_id(unsigned int level,
 }
 #endif
 
+/*
+ * Device parameter to force doorbell register mapping to non-cahed region
+ * eliminating the extra write memory barrier.
+ */
+#define MLX5_TX_DB_NC "tx_db_nc"
+
+/* Default PMD specific parameter value. */
+#define MLX5_TXDB_UNSET (-1)
+
+/* MLX5_TX_DB_NC supported values. */
+#define MLX5_TXDB_CACHED 0
+#define MLX5_TXDB_NCACHED 1
+#define MLX5_TXDB_HEURISTIC 2
+
 RTE_LOG_REGISTER_DEFAULT(mlx5_common_logtype, NOTICE)
 
 /* Head of list of drivers. */
@@ -88,6 +102,83 @@  driver_get(uint32_t class)
 	return NULL;
 }
 
+/**
+ * Verify and store value for device argument.
+ *
+ * @param[in] key
+ *   Key argument to verify.
+ * @param[in] val
+ *   Value associated with key.
+ * @param opaque
+ *   User data.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_common_args_check(const char *key, const char *val, void *opaque)
+{
+	int *dbnc = opaque;
+	signed long tmp;
+
+	errno = 0;
+	tmp = strtol(val, NULL, 0);
+	if (errno) {
+		rte_errno = errno;
+		DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val);
+		return -rte_errno;
+	}
+	if (strcmp(MLX5_TX_DB_NC, key) == 0) {
+		if (tmp != MLX5_TXDB_CACHED &&
+		    tmp != MLX5_TXDB_NCACHED &&
+		    tmp != MLX5_TXDB_HEURISTIC) {
+			DRV_LOG(ERR, "Invalid Tx doorbell mapping parameter.");
+			rte_errno = EINVAL;
+			return -rte_errno;
+		}
+		*dbnc = tmp;
+	}
+	return 0;
+}
+
+/**
+ * Parse Tx doorbell mapping parameter.
+ *
+ * @param devargs
+ *   Device arguments structure.
+ * @param dbnc
+ *   Pointer to get into doorbell mapping parameter.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_parse_db_map_arg(struct rte_devargs *devargs, int *dbnc)
+{
+	struct rte_kvargs *kvlist;
+	int ret = 0;
+
+	if (devargs == NULL)
+		return 0;
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (kvlist == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	if (rte_kvargs_count(kvlist, MLX5_TX_DB_NC)) {
+		ret = rte_kvargs_process(kvlist, MLX5_TX_DB_NC,
+					 mlx5_common_args_check, dbnc);
+		if (ret) {
+			rte_errno = EINVAL;
+			rte_kvargs_free(kvlist);
+			return -rte_errno;
+		}
+	}
+	rte_kvargs_free(kvlist);
+	return 0;
+}
+
+
 static int
 devargs_class_handler(__rte_unused const char *key,
 		      const char *class_names, void *opaque)
@@ -219,6 +310,81 @@  mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size)
 #endif
 }
 
+/**
+ * Uninitialize context device and release all its resources.
+ *
+ * @param dev_ctx
+ *   Pointer to the context device data structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+void
+mlx5_dev_ctx_release(struct mlx5_dev_ctx *dev_ctx)
+{
+	if (dev_ctx->pd != NULL) {
+		claim_zero(mlx5_os_dealloc_pd(dev_ctx->pd));
+		dev_ctx->pd = NULL;
+	}
+	if (dev_ctx->ctx != NULL) {
+		claim_zero(mlx5_glue->close_device(dev_ctx->ctx));
+		dev_ctx->ctx = NULL;
+	}
+}
+
+/**
+ * Initialize context device and allocate all its resources.
+ *
+ * @param dev_ctx
+ *   Pointer to the context device data structure.
+ * @param dev
+ *   Pointer to mlx5 device structure.
+ * @param classes_loaded
+ *   Chosen classes come from device arguments.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_dev_ctx_prepare(struct mlx5_dev_ctx *dev_ctx, struct rte_device *dev,
+		     uint32_t classes_loaded)
+{
+	int dbnc = MLX5_TXDB_UNSET;
+	int ret;
+
+	dev_ctx->numa_node = dev->numa_node;
+	/*
+	 * Parse Tx doorbell mapping parameter. It helps to configure
+	 * environment variable "MLX5_BF_SHUT_UP" before the device creation.
+	 */
+	ret = mlx5_parse_db_map_arg(dev->devargs, &dbnc);
+	if (ret < 0)
+		return ret;
+	/*
+	 * Open device using DevX.
+	 * If DevX isn't supported, ctx field remains NULL.
+	 */
+	ret = mlx5_os_devx_open_device(dev_ctx, dev, dbnc, classes_loaded);
+	if (ret < 0)
+		return ret;
+	/*
+	 * When DevX is not supported and the classes selected by the user can
+	 * also work with Verbs, the mlx5_os_devx_open_device function returns
+	 * 0 although no device has been created at this time.
+	 * Later they will try to create again in Verbs.
+	 */
+	if (dev_ctx->ctx == NULL)
+		return 0;
+	/* Allocate Protection Domain object and extract its pdn. */
+	ret = mlx5_os_pd_create(dev_ctx);
+	if (ret)
+		goto error;
+	return ret;
+error:
+	mlx5_dev_ctx_release(dev_ctx);
+	return ret;
+}
+
 static void
 dev_release(struct mlx5_common_device *dev)
 {
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index a772371200..609953b70e 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -324,6 +324,46 @@  void mlx5_common_init(void);
  *   from devargs, locating target RDMA device and probing with it.
  */
 
+/**
+ * Shared device context structure.
+ * Contains HW device objects which belong to same device with multiple drivers.
+ */
+struct mlx5_dev_ctx {
+	void *ctx;	/* Verbs/DV/DevX context. */
+	void *pd;	/* Protection Domain. */
+	uint32_t pdn;	/* Protection Domain Number. */
+	int numa_node;	/* Numa node of device. */
+};
+
+/**
+ * Uninitialize context device and release all its resources.
+ *
+ * @param dev_ctx
+ *   Pointer to the context device data structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+__rte_internal
+void mlx5_dev_ctx_release(struct mlx5_dev_ctx *dev_ctx);
+
+/**
+ * Initialize context device and allocate all its resources.
+ *
+ * @param dev_ctx
+ *   Pointer to the context device data structure.
+ * @param dev
+ *   Pointer to mlx5 device structure.
+ * @param classes_loaded
+ *   Chosen classes come from device arguments.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+__rte_internal
+int mlx5_dev_ctx_prepare(struct mlx5_dev_ctx *dev_ctx, struct rte_device *dev,
+			 uint32_t classes_loaded);
+
 /**
  * Initialization function for the driver called during device probing.
  */
@@ -419,4 +459,12 @@  __rte_internal
 bool
 mlx5_dev_is_pci(const struct rte_device *dev);
 
+/* mlx5_common_os.c */
+
+int mlx5_os_devx_open_device(struct mlx5_dev_ctx *dev_ctx,
+			     struct rte_device *dev, int dbnc,
+			     uint32_t classes);
+int mlx5_os_pd_create(struct mlx5_dev_ctx *dev_ctx);
+
+
 #endif /* RTE_PMD_MLX5_COMMON_H_ */
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index e5cb6b7060..6a88105d02 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -9,6 +9,9 @@  INTERNAL {
 
 	mlx5_common_init;
 
+	mlx5_dev_ctx_release;
+	mlx5_dev_ctx_prepare;
+
 	mlx5_common_verbs_reg_mr; # WINDOWS_NO_EXPORT
 	mlx5_common_verbs_dereg_mr; # WINDOWS_NO_EXPORT
 
diff --git a/drivers/common/mlx5/windows/mlx5_common_os.c b/drivers/common/mlx5/windows/mlx5_common_os.c
index 5031bdca26..5d178b0452 100644
--- a/drivers/common/mlx5/windows/mlx5_common_os.c
+++ b/drivers/common/mlx5/windows/mlx5_common_os.c
@@ -7,6 +7,7 @@ 
 #include <stdio.h>
 
 #include <rte_mempool.h>
+#include <rte_bus_pci.h>
 #include <rte_malloc.h>
 #include <rte_errno.h>
 
@@ -17,7 +18,7 @@ 
 #include "mlx5_malloc.h"
 
 /**
- * Initialization routine for run-time dependency on external lib
+ * Initialization routine for run-time dependency on external lib.
  */
 void
 mlx5_glue_constructor(void)
@@ -25,7 +26,7 @@  mlx5_glue_constructor(void)
 }
 
 /**
- * Allocate PD. Given a devx context object
+ * Allocate PD. Given a DevX context object
  * return an mlx5-pd object.
  *
  * @param[in] ctx
@@ -37,8 +38,8 @@  mlx5_glue_constructor(void)
 void *
 mlx5_os_alloc_pd(void *ctx)
 {
-	struct mlx5_pd *ppd =  mlx5_malloc(MLX5_MEM_ZERO,
-		sizeof(struct mlx5_pd), 0, SOCKET_ID_ANY);
+	struct mlx5_pd *ppd = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_pd),
+					  0, SOCKET_ID_ANY);
 	if (!ppd)
 		return NULL;
 
@@ -60,7 +61,7 @@  mlx5_os_alloc_pd(void *ctx)
  *   Pointer to mlx5_pd.
  *
  * @return
- *    Zero if pd is released successfully, negative number otherwise.
+ *   Zero if pd is released successfully, negative number otherwise.
  */
 int
 mlx5_os_dealloc_pd(void *pd)
@@ -72,6 +73,202 @@  mlx5_os_dealloc_pd(void *pd)
 	return 0;
 }
 
+/**
+ * Detect if a devx_device_bdf object has identical DBDF values to the
+ * rte_pci_addr found in bus/pci probing
+ *
+ * @param[in] devx_bdf
+ *   Pointer to the devx_device_bdf structure.
+ * @param[in] addr
+ *   Pointer to the rte_pci_addr structure.
+ *
+ * @return
+ *   1 on Device match, 0 on mismatch.
+ */
+static int
+mlx5_match_devx_bdf_to_addr(struct devx_device_bdf *devx_bdf,
+			    struct rte_pci_addr *addr)
+{
+	if (addr->domain != (devx_bdf->bus_id >> 8) ||
+	    addr->bus != (devx_bdf->bus_id & 0xff) ||
+	    addr->devid != devx_bdf->dev_id ||
+	    addr->function != devx_bdf->fnc_id) {
+		return 0;
+	}
+	return 1;
+}
+
+/**
+ * Detect if a devx_device_bdf object matches the rte_pci_addr
+ * found in bus/pci probing
+ * Compare both the Native/PF BDF and the raw_bdf representing a VF BDF.
+ *
+ * @param[in] devx_bdf
+ *   Pointer to the devx_device_bdf structure.
+ * @param[in] addr
+ *   Pointer to the rte_pci_addr structure.
+ *
+ * @return
+ *   1 on Device match, 0 on mismatch, rte_errno code on failure.
+ */
+static int
+mlx5_match_devx_devices_to_addr(struct devx_device_bdf *devx_bdf,
+				struct rte_pci_addr *addr)
+{
+	int err;
+	struct devx_device mlx5_dev;
+
+	if (mlx5_match_devx_bdf_to_addr(devx_bdf, addr))
+		return 1;
+	/*
+	 * Didn't match on Native/PF BDF, could still match a VF BDF,
+	 * check it next.
+	 */
+	err = mlx5_glue->query_device(devx_bdf, &mlx5_dev);
+	if (err) {
+		DRV_LOG(ERR, "query_device failed");
+		rte_errno = err;
+		return rte_errno;
+	}
+	if (mlx5_match_devx_bdf_to_addr(&mlx5_dev.raw_bdf, addr))
+		return 1;
+	return 0;
+}
+
+/**
+ * Look for DevX device that match to given rte_device.
+ *
+ * @param dev
+ *   Pointer to the generic device.
+ *
+ * @return
+ *   A device match on success, NULL otherwise and rte_errno is set.
+ */
+static struct devx_device_bdf *
+mlx5_os_get_devx_device(struct rte_device *dev)
+{
+	int n;
+	struct devx_device_bdf *devx_list;
+	struct devx_device_bdf *orig_devx_list;
+	struct devx_device_bdf *devx_match = NULL;
+	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev);
+	struct rte_pci_addr *addr = &pci_dev->addr;
+
+	errno = 0;
+	devx_list = mlx5_glue->get_device_list(&n);
+	if (devx_list == NULL) {
+		rte_errno = errno ? errno : ENOSYS;
+		DRV_LOG(ERR, "Cannot list devices, is DevX enabled?");
+		return NULL;
+	}
+	orig_devx_list = devx_list;
+	while (n-- > 0) {
+		int ret = mlx5_match_devx_devices_to_addr(devx_list, addr);
+		if (!ret) {
+			devx_list++;
+			continue;
+		}
+		if (ret != 1) {
+			rte_errno = ret;
+			goto exit;
+		}
+		devx_match = devx_list;
+		break;
+	}
+	if (devx_match == NULL) {
+		/* No device matches, just complain and bail out. */
+		DRV_LOG(WARNING,
+			"No DevX device matches PCI device " PCI_PRI_FMT ","
+			" is DevX Configured?",
+			addr->domain, addr->bus, addr->devid, addr->function);
+		rte_errno = ENOENT;
+	}
+exit:
+	mlx5_glue->free_device_list(orig_devx_list);
+	return devx_match;
+}
+
+/**
+ * Function API open device under Windows.
+ *
+ * This function calls the Windows glue APIs to open a device.
+ *
+ * @param[out] dev_ctx
+ *   Pointer to the context device data structure.
+ * @param dev
+ *   Pointer to the generic device.
+ * @param dbnc
+ *   Device argument help configure the environment variable.
+ * @param classes
+ *   Chosen classes come from device arguments.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_devx_open_device(struct mlx5_dev_ctx *dev_ctx, struct rte_device *dev,
+			 int dbnc, uint32_t classes)
+{
+	RTE_SET_USED(dbnc);
+	struct devx_device_bdf *devx_bdf_dev = NULL;
+	struct mlx5_context *mlx5_ctx;
+
+	if (classes != MLX5_CLASS_ETH) {
+		DRV_LOG(WARNING,
+			"The chosen classes are not supported on Windows.");
+		rte_errno = ENOTSUP;
+		return -rte_errno;
+	}
+	devx_bdf_dev = mlx5_os_get_devx_device(dev);
+	if (devx_bdf_dev == NULL)
+		return -rte_errno;
+	/* Try to open DevX device with DV. */
+	mlx5_ctx = mlx5_glue->open_device(devx_bdf_dev);
+	if (mlx5_ctx) {
+		DRV_LOG(ERR, "Failed to open DevX device.");
+		rte_errno = errno;
+		return -rte_errno;
+	}
+	if (mlx5_glue->query_device(devx_bdf_dev, &mlx5_ctx->mlx5_dev)) {
+		DRV_LOG(ERR, "Failed to query device context fields.");
+		claim_zero(mlx5_glue->close_device(mlx5_ctx));
+		rte_errno = errno;
+		return -rte_errno;
+	}
+	dev_ctx->ctx = mlx5_ctx;
+	return 0;
+}
+
+/**
+ * Allocate Protection Domain object and extract its pdn using DV API.
+ *
+ * @param[out] dev_ctx
+ *   Pointer to the context device data structure.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int
+mlx5_os_pd_create(struct mlx5_dev_ctx *dev_ctx)
+{
+	struct mlx5_pd *pd;
+
+	pd = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pd), 0, SOCKET_ID_ANY);
+	if (!pd)
+		return -1;
+	struct mlx5_devx_obj *obj = mlx5_devx_cmd_alloc_pd(dev_ctx->ctx);
+	if (!obj) {
+		mlx5_free(pd);
+		return -1;
+	}
+	pd->obj = obj;
+	pd->pdn = obj->id;
+	pd->devx_ctx = dev_ctx->ctx;
+	dev_ctx->pd = pd;
+	dev_ctx->pdn = pd->pdn;
+	return 0;
+}
+
 /**
  * Register umem.
  *