diff mbox series

[RFC,12/21] common/mlx5: add ROCE disable in context device creation

Message ID 20210817134441.1966618-13-michaelba@nvidia.com (mailing list archive)
State RFC, archived
Delegated to: Raslan Darawsheh
Headers show
Series mlx5: sharing global MR cache between drivers | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Michael Baum Aug. 17, 2021, 1:44 p.m. UTC
Add option to get IB device after disabling RoCE. It is relevant if
there is vDPA class in device arguments list.

Signed-off-by: Michael Baum <michaelba@nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_common_os.c | 126 ++++++++++++++++++++-
 1 file changed, 125 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c
index 6f78897390..4a94865241 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -15,6 +15,7 @@ 
 #include <rte_string_fns.h>
 
 #include "mlx5_common.h"
+#include "mlx5_nl.h"
 #include "mlx5_common_log.h"
 #include "mlx5_common_os.h"
 #include "mlx5_glue.h"
@@ -39,6 +40,9 @@  const struct mlx5_glue *mlx5_glue;
 #define MLX5_TXDB_NCACHED 1
 #define MLX5_TXDB_HEURISTIC 2
 
+#define MLX5_VDPA_MAX_RETRIES 20
+#define MLX5_VDPA_USEC 1000
+
 int
 mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
 {
@@ -417,6 +421,123 @@  mlx5_glue_constructor(void)
 	mlx5_glue = NULL;
 }
 
+/* Try to disable ROCE by Netlink\Devlink. */
+static int
+mlx5_nl_roce_disable(const char *addr)
+{
+	int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
+	int devlink_id;
+	int enable;
+	int ret;
+
+	if (nlsk_fd < 0)
+		return nlsk_fd;
+	devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
+	if (devlink_id < 0) {
+		ret = devlink_id;
+		DRV_LOG(DEBUG,
+			"Failed to get devlink id for ROCE operations by Netlink.");
+		goto close;
+	}
+	ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
+	if (ret) {
+		DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
+			ret);
+		goto close;
+	} else if (!enable) {
+		DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
+		goto close;
+	}
+	ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
+	if (ret)
+		DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
+	else
+		DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
+close:
+	close(nlsk_fd);
+	return ret;
+}
+
+/* Try to disable ROCE by sysfs. */
+static int
+mlx5_sys_roce_disable(const char *addr)
+{
+	FILE *file_o;
+	int enable;
+	int ret;
+
+	MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
+	file_o = fopen(file_p, "rb");
+	if (!file_o) {
+		rte_errno = ENOTSUP;
+		return -ENOTSUP;
+	}
+	ret = fscanf(file_o, "%d", &enable);
+	if (ret != 1) {
+		rte_errno = EINVAL;
+		ret = EINVAL;
+		goto close;
+	} else if (!enable) {
+		ret = 0;
+		DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
+		goto close;
+	}
+	fclose(file_o);
+	file_o = fopen(file_p, "wb");
+	if (!file_o) {
+		rte_errno = ENOTSUP;
+		return -ENOTSUP;
+	}
+	fprintf(file_o, "0\n");
+	ret = 0;
+close:
+	if (ret)
+		DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
+	else
+		DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
+	fclose(file_o);
+	return ret;
+}
+
+static int
+mlx5_roce_disable(struct rte_device *dev)
+{
+	char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
+
+	if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
+		return -rte_errno;
+	/* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
+	if (mlx5_nl_roce_disable(pci_addr) != 0 &&
+	    mlx5_sys_roce_disable(pci_addr) != 0)
+		return -rte_errno;
+	return 0;
+}
+
+static struct ibv_device *
+mlx5_vdpa_get_ibv_dev(struct rte_device *dev)
+{
+	struct ibv_device *ibv;
+	int retry;
+
+	if (mlx5_roce_disable(dev) != 0) {
+		DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
+			dev->name);
+		return NULL;
+	}
+	/* Wait for the IB device to appear again after reload. */
+	for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
+		ibv = mlx5_os_get_ibv_dev(dev);
+		if (ibv != NULL)
+			return ibv;
+		usleep(MLX5_VDPA_USEC);
+	}
+	DRV_LOG(ERR,
+		"Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.",
+		dev->name, MLX5_VDPA_MAX_RETRIES);
+	rte_errno = EAGAIN;
+	return NULL;
+}
+
 static int
 mlx5_config_doorbell_mapping_env(int dbnc)
 {
@@ -471,7 +592,10 @@  mlx5_os_devx_open_device(struct mlx5_dev_ctx *dev_ctx, struct rte_device *dev,
 	struct ibv_context *ctx = NULL;
 	int dbmap_env;
 
-	ibv = mlx5_os_get_ibv_dev(dev);
+	if (classes & MLX5_CLASS_VDPA)
+		ibv = mlx5_vdpa_get_ibv_dev(dev);
+	else
+		ibv = mlx5_os_get_ibv_dev(dev);
 	if (!ibv)
 		return -rte_errno;
 	DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);