[v3,1/8] common/mlx5: add netlink API to get RDMA port state

Message ID 20211019103501.2216840-2-xuemingl@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: support more than 255 representors |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Xueming Li Oct. 19, 2021, 10:34 a.m. UTC
  Introduce netlink API to get rdma port state.

Port state is restrieved based on RDMA device name and port index.

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/linux/meson.build |   2 +
 drivers/common/mlx5/linux/mlx5_nl.c   | 136 +++++++++++++++++++-------
 drivers/common/mlx5/linux/mlx5_nl.h   |   2 +
 drivers/common/mlx5/version.map       |   1 +
 4 files changed, 106 insertions(+), 35 deletions(-)
  

Comments

Ferruh Yigit Oct. 21, 2021, 1:34 p.m. UTC | #1
On 10/19/2021 11:34 AM, Xueming Li wrote:
> Introduce netlink API to get rdma port state.
> 
> Port state is restrieved based on RDMA device name and port index.
> 
> Signed-off-by: Xueming Li<xuemingl@nvidia.com>
> Acked-by: Viacheslav Ovsiienko<viacheslavo@nvidia.com>

Is there a kernel driver version dependency for used netlink message?
  

Patch

diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build
index cbea58f557d..2dcd27b7786 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -175,6 +175,8 @@  has_sym_args = [
             'RDMA_NLDEV_ATTR_DEV_NAME' ],
         [ 'HAVE_RDMA_NLDEV_ATTR_PORT_INDEX', 'rdma/rdma_netlink.h',
             'RDMA_NLDEV_ATTR_PORT_INDEX' ],
+        [ 'HAVE_RDMA_NLDEV_ATTR_PORT_STATE', 'rdma/rdma_netlink.h',
+            'RDMA_NLDEV_ATTR_PORT_STATE' ],
         [ 'HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX', 'rdma/rdma_netlink.h',
             'RDMA_NLDEV_ATTR_NDEV_INDEX' ],
         [ 'HAVE_MLX5_DR_FLOW_DUMP', 'infiniband/mlx5dv.h',
diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c
index 9120a697fd5..4b762850941 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -78,6 +78,9 @@ 
 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX
 #define RDMA_NLDEV_ATTR_PORT_INDEX 3
 #endif
+#ifndef HAVE_RDMA_NLDEV_ATTR_PORT_STATE
+#define RDMA_NLDEV_ATTR_PORT_STATE 12
+#endif
 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX
 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50
 #endif
@@ -160,14 +163,16 @@  struct mlx5_nl_mac_addr {
 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
+#define MLX5_NL_CMD_GET_PORT_STATE (1 << 4)
 
 /** Data structure used by mlx5_nl_cmdget_cb(). */
-struct mlx5_nl_ifindex_data {
+struct mlx5_nl_port_info {
 	const char *name; /**< IB device name (in). */
 	uint32_t flags; /**< found attribute flags (out). */
 	uint32_t ibindex; /**< IB device index (out). */
 	uint32_t ifindex; /**< Network interface index (out). */
 	uint32_t portnum; /**< IB device max port number (out). */
+	uint16_t state; /**< IB device port state (out). */
 };
 
 uint32_t atomic_sn;
@@ -966,8 +971,8 @@  mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable)
 static int
 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
 {
-	struct mlx5_nl_ifindex_data *data = arg;
-	struct mlx5_nl_ifindex_data local = {
+	struct mlx5_nl_port_info *data = arg;
+	struct mlx5_nl_port_info local = {
 		.flags = 0,
 	};
 	size_t off = NLMSG_HDRLEN;
@@ -1000,6 +1005,10 @@  mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
 			local.portnum = *(uint32_t *)payload;
 			local.flags |= MLX5_NL_CMD_GET_PORT_INDEX;
 			break;
+		case RDMA_NLDEV_ATTR_PORT_STATE:
+			local.state = *(uint8_t *)payload;
+			local.flags |= MLX5_NL_CMD_GET_PORT_STATE;
+			break;
 		default:
 			break;
 		}
@@ -1016,6 +1025,7 @@  mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
 		data->ibindex = local.ibindex;
 		data->ifindex = local.ifindex;
 		data->portnum = local.portnum;
+		data->state = local.state;
 	}
 	return 0;
 error:
@@ -1024,7 +1034,7 @@  mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
 }
 
 /**
- * Get index of network interface associated with some IB device.
+ * Get port info of network interface associated with some IB device.
  *
  * This is the only somewhat safe method to avoid resorting to heuristics
  * when faced with port representors. Unfortunately it requires at least
@@ -1032,27 +1042,20 @@  mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
  *
  * @param nl
  *   Netlink socket of the RDMA kind (NETLINK_RDMA).
- * @param[in] name
- *   IB device name.
  * @param[in] pindex
  *   IB device port index, starting from 1
+ * @param[out] data
+ *   Pointer to port info.
  * @return
- *   A valid (nonzero) interface index on success, 0 otherwise and rte_errno
- *   is set.
+ *   0 on success, negative on error and rte_errno is set.
  */
-unsigned int
-mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
+static int
+mlx5_nl_port_info(int nl, uint32_t pindex, struct mlx5_nl_port_info *data)
 {
-	struct mlx5_nl_ifindex_data data = {
-		.name = name,
-		.flags = 0,
-		.ibindex = 0, /* Determined during first pass. */
-		.ifindex = 0, /* Determined during second pass. */
-	};
 	union {
 		struct nlmsghdr nh;
 		uint8_t buf[NLMSG_HDRLEN +
-			    NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) +
+			    NLA_HDRLEN + NLA_ALIGN(sizeof(data->ibindex)) +
 			    NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))];
 	} req = {
 		.nh = {
@@ -1068,24 +1071,24 @@  mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
 
 	ret = mlx5_nl_send(nl, &req.nh, sn);
 	if (ret < 0)
-		return 0;
-	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
+		return ret;
+	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data);
 	if (ret < 0)
-		return 0;
-	if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
-	    !(data.flags & MLX5_NL_CMD_GET_IB_INDEX))
+		return ret;
+	if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) ||
+	    !(data->flags & MLX5_NL_CMD_GET_IB_INDEX))
 		goto error;
-	data.flags = 0;
+	data->flags = 0;
 	sn = MLX5_NL_SN_GENERATE;
 	req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
 					     RDMA_NLDEV_CMD_PORT_GET);
 	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
 	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN);
 	na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN);
-	na->nla_len = NLA_HDRLEN + sizeof(data.ibindex);
+	na->nla_len = NLA_HDRLEN + sizeof(data->ibindex);
 	na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX;
 	memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
-	       &data.ibindex, sizeof(data.ibindex));
+	       &data->ibindex, sizeof(data->ibindex));
 	na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len));
 	na->nla_len = NLA_HDRLEN + sizeof(pindex);
 	na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX;
@@ -1093,19 +1096,82 @@  mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
 	       &pindex, sizeof(pindex));
 	ret = mlx5_nl_send(nl, &req.nh, sn);
 	if (ret < 0)
-		return 0;
-	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
+		return ret;
+	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data);
 	if (ret < 0)
-		return 0;
-	if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
-	    !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
-	    !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) ||
-	    !data.ifindex)
+		return ret;
+	if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) ||
+	    !(data->flags & MLX5_NL_CMD_GET_IB_INDEX) ||
+	    !(data->flags & MLX5_NL_CMD_GET_NET_INDEX) ||
+	    !data->ifindex)
 		goto error;
-	return data.ifindex;
+	return 1;
 error:
 	rte_errno = ENODEV;
-	return 0;
+	return -rte_errno;
+}
+
+/**
+ * Get index of network interface associated with some IB device.
+ *
+ * This is the only somewhat safe method to avoid resorting to heuristics
+ * when faced with port representors. Unfortunately it requires at least
+ * Linux 4.17.
+ *
+ * @param nl
+ *   Netlink socket of the RDMA kind (NETLINK_RDMA).
+ * @param[in] name
+ *   IB device name.
+ * @param[in] pindex
+ *   IB device port index, starting from 1
+ * @return
+ *   A valid (nonzero) interface index on success, 0 otherwise and rte_errno
+ *   is set.
+ */
+unsigned int
+mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
+{
+	struct mlx5_nl_port_info data = {
+			.ifindex = 0,
+			.name = name,
+	};
+
+	if (mlx5_nl_port_info(nl, pindex, &data) < 0)
+		return 0;
+	return data.ifindex;
+}
+
+/**
+ * Get IB device port state.
+ *
+ * This is the only somewhat safe method to get info for port number >= 255.
+ * Unfortunately it requires at least Linux 4.17.
+ *
+ * @param nl
+ *   Netlink socket of the RDMA kind (NETLINK_RDMA).
+ * @param[in] name
+ *   IB device name.
+ * @param[in] pindex
+ *   IB device port index, starting from 1
+ * @return
+ *   Port state (ibv_port_state) on success, negative on error
+ *   and rte_errno is set.
+ */
+int
+mlx5_nl_port_state(int nl, const char *name, uint32_t pindex)
+{
+	struct mlx5_nl_port_info data = {
+			.state = 0,
+			.name = name,
+	};
+
+	if (mlx5_nl_port_info(nl, pindex, &data) < 0)
+		return -rte_errno;
+	if ((data.flags & MLX5_NL_CMD_GET_PORT_STATE) == 0) {
+		rte_errno = ENOTSUP;
+		return -rte_errno;
+	}
+	return (int)data.state;
 }
 
 /**
@@ -1123,7 +1189,7 @@  mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
 unsigned int
 mlx5_nl_portnum(int nl, const char *name)
 {
-	struct mlx5_nl_ifindex_data data = {
+	struct mlx5_nl_port_info data = {
 		.flags = 0,
 		.name = name,
 		.ifindex = 0,
diff --git a/drivers/common/mlx5/linux/mlx5_nl.h b/drivers/common/mlx5/linux/mlx5_nl.h
index 15129ffdc88..396f8f3f20a 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.h
+++ b/drivers/common/mlx5/linux/mlx5_nl.h
@@ -54,6 +54,8 @@  unsigned int mlx5_nl_portnum(int nl, const char *name);
 __rte_internal
 unsigned int mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex);
 __rte_internal
+int mlx5_nl_port_state(int nl, const char *name, uint32_t pindex);
+__rte_internal
 int mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
 			       struct rte_ether_addr *mac, int vf_index);
 __rte_internal
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index d3c5040aac8..2a2c7e51ba5 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -134,6 +134,7 @@  INTERNAL {
 	mlx5_nl_mac_addr_flush; # WINDOWS_NO_EXPORT
 	mlx5_nl_mac_addr_remove; # WINDOWS_NO_EXPORT
 	mlx5_nl_mac_addr_sync; # WINDOWS_NO_EXPORT
+	mlx5_nl_port_state; # WINDOWS_NO_EXPORT
 	mlx5_nl_portnum; # WINDOWS_NO_EXPORT
 	mlx5_nl_promisc; # WINDOWS_NO_EXPORT
 	mlx5_nl_switch_info; # WINDOWS_NO_EXPORT