@@ -1443,6 +1443,12 @@ for an additional list of options shared with other mlx5 drivers.
By default, the PMD will set this value to 0.
+ .. note::
+
+ There is a race condition in probing port if probe_opt_en is set to 1.
+ Port probe may fail with wrong ifindex in cache while the interrupt
+ thread is updating the cache. Please try again if port probe failed.
+
- ``lacp_by_user`` parameter [int]
A nonzero value enables the control of LACP traffic by the user application.
@@ -101,6 +101,7 @@
#ifndef HAVE_RDMA_NL_GROUP_NOTIFY
#define RDMA_NL_GROUP_NOTIFY 4
#endif
+#define RDMA_NL_GROUP_NOTIFICATION (1 << (RDMA_NL_GROUP_NOTIFY - 1))
/* These are normally found in linux/if_link.h. */
#ifndef HAVE_IFLA_NUM_VF
@@ -176,22 +177,6 @@ struct mlx5_nl_mac_addr {
int mac_n; /**< Number of addresses in the array. */
};
-#define MLX5_NL_CMD_GET_IB_NAME (1 << 0)
-#define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
-#define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
-#define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
-#define MLX5_NL_CMD_GET_PORT_STATE (1 << 4)
-
-/** Data structure used by mlx5_nl_cmdget_cb(). */
-struct mlx5_nl_port_info {
- const char *name; /**< IB device name (in). */
- uint32_t flags; /**< found attribute flags (out). */
- uint32_t ibindex; /**< IB device index (out). */
- uint32_t ifindex; /**< Network interface index (out). */
- uint32_t portnum; /**< IB device max port number (out). */
- uint16_t state; /**< IB device port state (out). */
-};
-
RTE_ATOMIC(uint32_t) atomic_sn;
/* Generate Netlink sequence number. */
@@ -2110,3 +2095,60 @@ mlx5_nl_devlink_esw_multiport_get(int nlsk_fd, int family_id, const char *pci_ad
*enable ? "en" : "dis", pci_addr);
return ret;
}
+
+int
+mlx5_nl_rdma_monitor_init(void)
+{
+ return mlx5_nl_init(NETLINK_RDMA, RDMA_NL_GROUP_NOTIFICATION);
+}
+
+void
+mlx5_nl_rdma_monitor_info_get(struct nlmsghdr *hdr, struct mlx5_nl_port_info *data)
+{
+ size_t off = NLMSG_HDRLEN;
+ uint8_t event_type = 0;
+
+ if (hdr->nlmsg_type != RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR))
+ goto error;
+
+ while (off < hdr->nlmsg_len) {
+ struct nlattr *na = (void *)((uintptr_t)hdr + off);
+ void *payload = (void *)((uintptr_t)na + NLA_HDRLEN);
+
+ if (na->nla_len > hdr->nlmsg_len - off)
+ goto error;
+ switch (na->nla_type) {
+ case RDMA_NLDEV_ATTR_EVENT_TYPE:
+ event_type = *(uint8_t *)payload;
+ if (event_type == RDMA_NETDEV_ATTACH_EVENT) {
+ data->flags |= MLX5_NL_CMD_GET_EVENT_TYPE;
+ data->event_type = MLX5_NL_RDMA_NETDEV_ATTACH_EVENT;
+ } else if (event_type == RDMA_NETDEV_DETACH_EVENT) {
+ data->flags |= MLX5_NL_CMD_GET_EVENT_TYPE;
+ data->event_type = MLX5_NL_RDMA_NETDEV_DETACH_EVENT;
+ }
+ break;
+ case RDMA_NLDEV_ATTR_DEV_INDEX:
+ data->ibindex = *(uint32_t *)payload;
+ data->flags |= MLX5_NL_CMD_GET_IB_INDEX;
+ break;
+ case RDMA_NLDEV_ATTR_PORT_INDEX:
+ data->portnum = *(uint32_t *)payload;
+ data->flags |= MLX5_NL_CMD_GET_PORT_INDEX;
+ break;
+ case RDMA_NLDEV_ATTR_NDEV_INDEX:
+ data->ifindex = *(uint32_t *)payload;
+ data->flags |= MLX5_NL_CMD_GET_NET_INDEX;
+ break;
+ default:
+ DRV_LOG(DEBUG, "Unknown attribute[%d] found", na->nla_type);
+ break;
+ }
+ off += NLA_ALIGN(na->nla_len);
+ }
+
+ return;
+
+error:
+ rte_errno = EINVAL;
+}
@@ -32,6 +32,27 @@ struct mlx5_nl_vlan_vmwa_context {
struct mlx5_nl_vlan_dev vlan_dev[4096];
};
+#define MLX5_NL_CMD_GET_IB_NAME (1 << 0)
+#define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
+#define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
+#define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
+#define MLX5_NL_CMD_GET_PORT_STATE (1 << 4)
+#define MLX5_NL_CMD_GET_EVENT_TYPE (1 << 5)
+
+/** Data structure used by mlx5_nl_cmdget_cb(). */
+struct mlx5_nl_port_info {
+ const char *name; /**< IB device name (in). */
+ uint32_t flags; /**< found attribute flags (out). */
+ uint32_t ibindex; /**< IB device index (out). */
+ uint32_t ifindex; /**< Network interface index (out). */
+ uint32_t portnum; /**< IB device max port number (out). */
+ uint16_t state; /**< IB device port state (out). */
+ uint8_t event_type; /**< IB RDMA event type (out). */
+};
+
+#define MLX5_NL_RDMA_NETDEV_ATTACH_EVENT (1)
+#define MLX5_NL_RDMA_NETDEV_DETACH_EVENT (2)
+
__rte_internal
int mlx5_nl_init(int protocol, int groups);
__rte_internal
@@ -89,4 +110,11 @@ __rte_internal
int mlx5_nl_devlink_esw_multiport_get(int nlsk_fd, int family_id,
const char *pci_addr, int *enable);
+__rte_internal
+int mlx5_nl_rdma_monitor_init(void);
+__rte_internal
+void mlx5_nl_rdma_monitor_info_get(struct nlmsghdr *hdr, struct mlx5_nl_port_info *data);
+__rte_internal
+int mlx5_nl_rdma_monitor_cap_get(int nl, uint8_t *cap);
+
#endif /* RTE_PMD_MLX5_NL_H_ */
@@ -146,6 +146,8 @@ INTERNAL {
mlx5_nl_vf_mac_addr_modify; # WINDOWS_NO_EXPORT
mlx5_nl_vlan_vmwa_create; # WINDOWS_NO_EXPORT
mlx5_nl_vlan_vmwa_delete; # WINDOWS_NO_EXPORT
+ mlx5_nl_rdma_monitor_init; # WINDOWS_NO_EXPORT
+ mlx5_nl_rdma_monitor_info_get; # WINDOWS_NO_EXPORT
mlx5_os_umem_dereg;
mlx5_os_umem_reg;
@@ -894,6 +894,85 @@ mlx5_dev_interrupt_handler_devx(void *cb_arg)
#endif /* HAVE_IBV_DEVX_ASYNC */
}
+static void
+mlx5_dev_interrupt_ib_cb(struct nlmsghdr *hdr, void *cb_arg)
+{
+ mlx5_nl_rdma_monitor_info_get(hdr, (struct mlx5_nl_port_info *)cb_arg);
+}
+
+void
+mlx5_dev_interrupt_handler_ib(void *arg)
+{
+ struct mlx5_dev_ctx_shared *sh = arg;
+ struct mlx5_nl_port_info data = {
+ .flags = 0,
+ .name = "",
+ .ifindex = 0,
+ .ibindex = 0,
+ .portnum = 0,
+ };
+ int nlsk_fd = rte_intr_fd_get(sh->intr_handle_ib);
+ struct mlx5_dev_info *dev_info;
+ uint32_t i;
+
+ dev_info = &sh->cdev->dev_info;
+ DRV_LOG(DEBUG, "IB device %s received RDMA monitor netlink event", dev_info->ibname);
+ if (dev_info->port_num <= 1 || dev_info->port_info == NULL)
+ return;
+
+ if (nlsk_fd < 0)
+ return;
+
+ if (mlx5_nl_read_events(nlsk_fd, mlx5_dev_interrupt_ib_cb, &data) < 0)
+ DRV_LOG(ERR, "Failed to process Netlink events: %s",
+ rte_strerror(rte_errno));
+
+ if (!(data.flags & MLX5_NL_CMD_GET_EVENT_TYPE) ||
+ !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX) ||
+ !(data.flags & MLX5_NL_CMD_GET_IB_INDEX))
+ return;
+
+ if (data.ibindex != dev_info->ibindex)
+ return;
+
+ if (data.event_type != MLX5_NL_RDMA_NETDEV_ATTACH_EVENT &&
+ data.event_type != MLX5_NL_RDMA_NETDEV_DETACH_EVENT)
+ return;
+
+ if (data.event_type == MLX5_NL_RDMA_NETDEV_ATTACH_EVENT &&
+ !(data.flags & MLX5_NL_CMD_GET_NET_INDEX))
+ return;
+
+ DRV_LOG(DEBUG, "Event info: type %d, ibindex %d, ifindex %d, portnum %d,",
+ data.event_type, data.ibindex, data.ifindex, data.portnum);
+
+ /* Changes found in number of SF/VF ports. All information is likely unreliable. */
+ if (data.portnum > dev_info->port_num) {
+ DRV_LOG(ERR, "Port[%d] exceeds maximum[%d]", data.portnum, dev_info->port_num);
+ goto flush_all;
+ }
+ if (data.event_type == MLX5_NL_RDMA_NETDEV_ATTACH_EVENT) {
+ if (!dev_info->port_info[data.portnum].ifindex) {
+ dev_info->port_info[data.portnum].ifindex = data.ifindex;
+ dev_info->port_info[data.portnum].valid = 1;
+ } else {
+ DRV_LOG(WARNING, "Duplicate RDMA event for port[%d] ifindex[%d]",
+ data.portnum, data.ifindex);
+ if (data.ifindex != dev_info->port_info[data.portnum].ifindex)
+ goto flush_all;
+ }
+ } else if (data.event_type == MLX5_NL_RDMA_NETDEV_DETACH_EVENT) {
+ memset(dev_info->port_info + data.portnum, 0, sizeof(struct mlx5_port_nl_info));
+ }
+ return;
+
+flush_all:
+ for (i = 1; i <= dev_info->port_num; i++) {
+ dev_info->port_info[i].ifindex = 0;
+ dev_info->port_info[i].valid = 0;
+ }
+}
+
/**
* DPDK callback to bring the link DOWN.
*
@@ -3025,6 +3025,21 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
DRV_LOG(ERR, "Failed to allocate intr_handle.");
return;
}
+ if (sh->cdev->config.probe_opt && sh->cdev->dev_info.port_num > 1) {
+ nlsk_fd = mlx5_nl_rdma_monitor_init();
+ if (nlsk_fd < 0) {
+ DRV_LOG(ERR, "Failed to create a socket for RDMA Netlink events: %s",
+ rte_strerror(rte_errno));
+ return;
+ }
+ sh->intr_handle_ib = mlx5_os_interrupt_handler_create
+ (RTE_INTR_INSTANCE_F_SHARED, true,
+ nlsk_fd, mlx5_dev_interrupt_handler_ib, sh);
+ if (sh->intr_handle_ib == NULL) {
+ DRV_LOG(ERR, "Fail to allocate intr_handle");
+ return;
+ }
+ }
nlsk_fd = mlx5_nl_init(NETLINK_ROUTE, RTMGRP_LINK);
if (nlsk_fd < 0) {
DRV_LOG(ERR, "Failed to create a socket for Netlink events: %s",
@@ -3086,6 +3101,11 @@ mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh)
if (sh->devx_comp)
mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp);
#endif
+ fd = rte_intr_fd_get(sh->intr_handle_ib);
+ mlx5_os_interrupt_handler_destroy(sh->intr_handle_ib,
+ mlx5_dev_interrupt_handler_ib, sh);
+ if (fd >= 0)
+ close(fd);
}
/**
@@ -1574,6 +1574,7 @@ struct mlx5_dev_ctx_shared {
struct rte_intr_handle *intr_handle; /* Interrupt handler for device. */
struct rte_intr_handle *intr_handle_devx; /* DEVX interrupt handler. */
struct rte_intr_handle *intr_handle_nl; /* Netlink interrupt handler. */
+ struct rte_intr_handle *intr_handle_ib; /* Interrupt handler for IB device. */
void *devx_comp; /* DEVX async comp obj. */
struct mlx5_devx_obj *tis[16]; /* TIS object. */
struct mlx5_devx_obj *td; /* Transport domain. */
@@ -2274,6 +2275,7 @@ int mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev,
void mlx5_dev_interrupt_handler(void *arg);
void mlx5_dev_interrupt_handler_devx(void *arg);
void mlx5_dev_interrupt_handler_nl(void *arg);
+void mlx5_dev_interrupt_handler_ib(void *arg);
int mlx5_set_link_down(struct rte_eth_dev *dev);
int mlx5_set_link_up(struct rte_eth_dev *dev);
int mlx5_is_removed(struct rte_eth_dev *dev);