[RFC] common/mlx5: support cross vHCA mkey

Message ID 20210905183331.3895109-1-michaelba@nvidia.com (mailing list archive)
State RFC, archived
Delegated to: Raslan Darawsheh
Headers
Series [RFC] common/mlx5: support cross vHCA mkey |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing fail Testing issues

Commit Message

Michael Baum Sept. 5, 2021, 6:33 p.m. UTC
  The mlx5 devices support multiple vHCA under the same device.
The mlx5 devices IOVA addresses are built from mkey ID and the process
virtual address(VA); given the VA, the kernel mlx5 driver creates an
mkey that defines the mapping between the VA space and the PA space.
This feature allows having a mkey that points to another mkey that
resides in a different vHCA.
This allows one vHCA, using a locally defined mkey, to access the memory
of another vHCA.
One of the motivations is to allow a DPDK application on BF2 to map Host
(x86) memory to be used on the DPU ARM CPUs.
Using cross vHCA mkey, we can register host memory and expose this to
the DPU ARM CPUs, which will map this as a crossed-mkey
(CROSSING_VHCA_MKEY).
Using this local crossed-mkey, the DPU can issue QP operations that
access the host memory.

Suggest the following mlx5 private APIs:

 - Add a new private dynamic mbuf's flag to indicate that the mkey is
   provided in the buf_iova mbuf field - RTE_PMD_MLX5_PRIVATE_IOVA.
 - If the application wants to create the cross vHCA mkey by itself, the
   device context (CTX) and the protection domain (PD) must be shared
   between the application to the driver Due to the HW restriction to
   configure the HW objects with the same CTX and PD. Allowing this by
   two methods:
   1. Add two new devargs to get the CTX and PD from the application.
   2. Add a new driver API to provide the CTX and PD to the application.
 - Add a PMD API to create cross vHCA MKey by the driver.
 - Add a PMD API to get the mkey ID of a local buffer.

For all the usages above, the user should build the mbuf like the
following:
mbuf->buf_addr = <the virtual address of the peer/local host>
mbuf->buf_iova = struct mlx5_iova{uint32_t resrvd = 0; uint32_t
mkey_id;}
mbuf->ol_flags |= RTE_PMD_MLX5_PRIVATE_IOVA (for cross vHCA mkey it will
be probably EXT buf)

It is the application's responsibility to reset the mbuf fields before
moving the mbuf to another device.

Signed-off-by: Michael Baum <michaelba@nvidia.com>
---
 drivers/common/mlx5/mlx5_common.c  |  22 ++++++
 drivers/common/mlx5/rte_pmd_mlx5.h | 105 +++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+)
 create mode 100644 drivers/common/mlx5/rte_pmd_mlx5.h
  

Patch

diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c
index 459cf4bcc4..4ac4794983 100644
--- a/drivers/common/mlx5/mlx5_common.c
+++ b/drivers/common/mlx5/mlx5_common.c
@@ -8,13 +8,16 @@ 
 
 #include <rte_errno.h>
 #include <rte_mempool.h>
+#include <rte_common.h>
 #include <rte_class.h>
 #include <rte_malloc.h>
+#include <rte_mbuf.h>
 
 #include "mlx5_common.h"
 #include "mlx5_common_os.h"
 #include "mlx5_common_log.h"
 #include "mlx5_common_private.h"
+#include "rte_pmd_mlx5.h"
 
 uint8_t haswell_broadwell_cpu;
 
@@ -51,6 +54,25 @@  static TAILQ_HEAD(mlx5_drivers, mlx5_class_driver) drivers_list =
 static TAILQ_HEAD(mlx5_devices, mlx5_common_device) devices_list =
 				TAILQ_HEAD_INITIALIZER(devices_list);
 
+int
+rte_pmd_common_mlx5_get_dyn_flag_names(char *names[], unsigned int n)
+{
+	static const char *const dynf_names[] = {
+		RTE_PMD_MLX5_FINE_GRANULARITY_INLINE,
+		RTE_PMD_MLX5_PRIVATE_IOVA,
+	};
+	unsigned int i;
+
+	if (n < RTE_DIM(dynf_names))
+		return -ENOMEM;
+	for (i = 0; i < RTE_DIM(dynf_names); i++) {
+		if (names[i] == NULL)
+			return -EINVAL;
+		strcpy(names[i], dynf_names[i]);
+	}
+	return RTE_DIM(dynf_names);
+}
+
 static const struct {
 	const char *name;
 	unsigned int drv_class;
diff --git a/drivers/common/mlx5/rte_pmd_mlx5.h b/drivers/common/mlx5/rte_pmd_mlx5.h
new file mode 100644
index 0000000000..b032996917
--- /dev/null
+++ b/drivers/common/mlx5/rte_pmd_mlx5.h
@@ -0,0 +1,105 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2021 Mellanox Technologies, Ltd
+ */
+
+#ifndef RTE_PMD_PRIVATE_MLX5_H_
+#define RTE_PMD_PRIVATE_MLX5_H_
+
+/**
+ * @file
+ * MLX5 public header.
+ *
+ * This interface provides the ability to support private PMD dynamic flags.
+ */
+
+#define RTE_PMD_MLX5_PRIVATE_IOVA "mlx5_privet_iova"
+
+/**
+ * Returns the dynamic flags name, that are supported.
+ *
+ * @param[out] names
+ *   Array that is used to return the supported dynamic flags names.
+ * @param[in] n
+ *   The number of elements in the names array.
+ *
+ * @return
+ *   The number of dynamic flags that were copied if not negative.
+ *   Otherwise:
+ *   - ENOMEM - not enough entries in the array
+ *   - EINVAL - invalid array entry
+ */
+__rte_experimental
+int rte_pmd_common_mlx5_get_dyn_flag_names(char *names[], unsigned int n);
+
+/**
+ * Returns the device context and its Protection Domain, for given device.
+ *
+ * @param[in] dev
+ *   Pointer to the generic device.
+ * @param[out] ctx
+ *   Pointer that is used to return the context device pointer.
+ * @param[out] pd
+ *   Pointer that is used to return the protection domain pointer.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   Possible values for rte_errno:
+ *   - EINVAL - invalid parameters (dev, ctx, or pd).
+ *   - ENODEV - the given device doesn't exist.
+ */
+__rte_experimental
+int rte_pmd_mlx5_get_dev_ctx_and_pd(rte_device *dev, void **ctx, void **pd);
+
+/**
+ * A PMD API to create cross virtual HCA MKey.
+ * In BlueField family devices, there are some vHCA under same device.
+ * Each of them represents a different part of the device (host / guest), and
+ * it has id for itself.
+ *
+ * This API creates for the user cross vHCA MKey and returns it in rte_iova_t
+ * variable.
+ *
+ * @param[in] dev
+ *   Pointer to the generic device.
+ * @param[in] remote_vhca_id
+ *   ID of remote vHCA.
+ * @param[in] remote_mkey
+ *   The MKey created by remote vHCA.
+ *
+ * @return
+ *   Cross vHCA MKey for buf_iova mbuf field on success, 0 otherwise.
+ *   Possible values for rte_errno:
+ *   - ENOMEM - allocation failure.
+ *   - EINVAL - invalid parameters (dev).
+ *   - ENODEV - the given device doesn't exist.
+ */
+__rte_experimental
+rte_iova_t rte_pmd_mlx5_create_cross_vhca_mkey(rte_device *dev,
+					       uint32_t remote_vhca_id,
+					       uint32_t remote_mkey);
+
+/**
+ * A PMD API to get mlx5 IOVA on local process VA.
+ * This function creates memory regions for given virtual addresses on control
+ * path, and returns it in rte_iova_t variable.
+ *
+ * @param[in] dev
+ *   Pointer to the generic device.
+ * @param[in] va
+ *   Pointer to memory start virtual address.
+ * @param[in] len
+ *   Length of the memory to register.
+ *
+ * @return
+ *   Compatible MKey for buf_iova mbuf field on success, 0 otherwise.
+ *   Possible values for rte_errno:
+ *   - ENOMEM - allocation failure.
+ *   - EINVAL - invalid parameters (dev or va).
+ *   - ENODEV - the given device doesn't exist.
+ */
+__rte_experimental
+rte_iova_t rte_pmd_mlx5_get_buf_private_iova(rte_device *dev, void *va,
+					     uint32_t len);
+
+
+#endif /* RTE_PMD_PRIVATE_MLX5_H_ */