@@ -287,8 +287,7 @@
close(priv->nl_socket_route);
if (priv->nl_socket_rdma >= 0)
close(priv->nl_socket_rdma);
- if (priv->mnl_socket)
- mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+ mlx5_flow_tcf_socket_close(&priv->tcf_socket);
ret = mlx5_hrxq_ibv_verify(dev);
if (ret)
DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1138,8 +1137,9 @@
claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
if (vf && config.vf_nl_en)
mlx5_nl_mac_addr_sync(eth_dev);
- priv->mnl_socket = mlx5_flow_tcf_socket_create();
- if (!priv->mnl_socket) {
+ /* Initialize Netlink socket for e-switch control */
+ err = mlx5_flow_tcf_socket_open(&priv->tcf_socket);
+ if (err) {
err = -rte_errno;
DRV_LOG(WARNING,
"flow rules relying on switch offloads will not be"
@@ -1154,16 +1154,15 @@
error.message =
"cannot retrieve network interface index";
} else {
- err = mlx5_flow_tcf_init(priv->mnl_socket, ifindex,
- &error);
+ err = mlx5_flow_tcf_ifindex_init(&priv->tcf_socket,
+ ifindex, &error);
}
if (err) {
DRV_LOG(WARNING,
"flow rules relying on switch offloads will"
" not be supported: %s: %s",
error.message, strerror(rte_errno));
- mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
- priv->mnl_socket = NULL;
+ mlx5_flow_tcf_socket_close(&priv->tcf_socket);
}
}
TAILQ_INIT(&priv->flows);
@@ -1218,8 +1217,7 @@
close(priv->nl_socket_route);
if (priv->nl_socket_rdma >= 0)
close(priv->nl_socket_rdma);
- if (priv->mnl_socket)
- mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+ mlx5_flow_tcf_socket_close(&priv->tcf_socket);
if (own_domain_id)
claim_zero(rte_eth_switch_domain_free(priv->domain_id));
rte_free(priv);
@@ -160,6 +160,11 @@ struct mlx5_drop {
struct mnl_socket;
+struct mlx5_tcf_socket {
+ uint32_t seq; /* Message sequence number. */
+ struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
+};
+
struct priv {
LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
struct rte_eth_dev_data *dev_data; /* Pointer to device data. */
@@ -220,12 +225,12 @@ struct priv {
int nl_socket_rdma; /* Netlink socket (NETLINK_RDMA). */
int nl_socket_route; /* Netlink socket (NETLINK_ROUTE). */
uint32_t nl_sn; /* Netlink message sequence number. */
+ struct mlx5_tcf_socket tcf_socket; /* Libmnl socket for tcf. */
#ifndef RTE_ARCH_64
rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */
rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
/* UAR same-page access control required in 32bit implementations. */
#endif
- struct mnl_socket *mnl_socket; /* Libmnl socket. */
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
@@ -348,9 +348,10 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
/* mlx5_flow_tcf.c */
-int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
- struct rte_flow_error *error);
-struct mnl_socket *mlx5_flow_tcf_socket_create(void);
-void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
+int mlx5_flow_tcf_ifindex_init(struct mlx5_tcf_socket *tcf,
+ unsigned int ifindex,
+ struct rte_flow_error *error);
+int mlx5_flow_tcf_socket_open(struct mlx5_tcf_socket *tcf);
+void mlx5_flow_tcf_socket_close(struct mlx5_tcf_socket *tcf);
#endif /* RTE_PMD_MLX5_FLOW_H_ */
@@ -1552,8 +1552,8 @@ struct flow_tcf_ptoi {
/**
* Send Netlink message with acknowledgment.
*
- * @param nl
- * Libmnl socket to use.
+ * @param tcf
+ * Libmnl socket context to use.
* @param nlh
* Message to send. This function always raises the NLM_F_ACK flag before
* sending.
@@ -1562,26 +1562,108 @@ struct flow_tcf_ptoi {
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
-flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+flow_tcf_nl_ack(struct mlx5_tcf_socket *tcf, struct nlmsghdr *nlh)
{
alignas(struct nlmsghdr)
- uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
- nlh->nlmsg_len - sizeof(*nlh)];
- uint32_t seq = random();
- int ret;
-
+ uint8_t ans[MNL_SOCKET_BUFFER_SIZE];
+ unsigned int portid = mnl_socket_get_portid(tcf->nl);
+ uint32_t seq = tcf->seq++;
+ struct mnl_socket *nl = tcf->nl;
+ int err, ret;
+
+ assert(nl);
+ if (!seq)
+ seq = tcf->seq++;
nlh->nlmsg_flags |= NLM_F_ACK;
nlh->nlmsg_seq = seq;
ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
- if (ret != -1)
- ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
- if (ret != -1)
- ret = mnl_cb_run
- (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
+ err = (ret <= 0) ? -errno : 0;
+ nlh = (struct nlmsghdr *)ans;
+ /*
+ * The following loop postpones non-fatal errors until multipart
+ * messages are complete.
+ */
if (ret > 0)
+ while (true) {
+ ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
+ if (ret < 0) {
+ err = errno;
+ if (err != ENOSPC)
+ break;
+ }
+ if (!err) {
+ ret = mnl_cb_run(nlh, ret, seq, portid,
+ NULL, NULL);
+ if (ret < 0) {
+ err = errno;
+ break;
+ }
+ }
+ /* Will receive till end of multipart message */
+ if (!(nlh->nlmsg_flags & NLM_F_MULTI) ||
+ nlh->nlmsg_type == NLMSG_DONE)
+ break;
+ }
+ if (!err)
return 0;
- rte_errno = errno;
- return -rte_errno;
+ rte_errno = err;
+ return -err;
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param tcf
+ * Libmnl socket context object.
+ * @param ifindex
+ * Index of network interface to initialize.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_tcf_ifindex_init(struct mlx5_tcf_socket *tcf, unsigned int ifindex,
+ struct rte_flow_error *error)
+{
+ struct nlmsghdr *nlh;
+ struct tcmsg *tcm;
+ alignas(struct nlmsghdr)
+ uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
+
+ /* Destroy existing ingress qdisc and everything attached to it. */
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = RTM_DELQDISC;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+ tcm->tcm_family = AF_UNSPEC;
+ tcm->tcm_ifindex = ifindex;
+ tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+ tcm->tcm_parent = TC_H_INGRESS;
+ /* Ignore errors when qdisc is already absent. */
+ if (flow_tcf_nl_ack(tcf, nlh) &&
+ rte_errno != EINVAL && rte_errno != ENOENT)
+ return rte_flow_error_set(error, rte_errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "netlink: failed to remove ingress"
+ " qdisc");
+ /* Create fresh ingress qdisc. */
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = RTM_NEWQDISC;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+ tcm->tcm_family = AF_UNSPEC;
+ tcm->tcm_ifindex = ifindex;
+ tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+ tcm->tcm_parent = TC_H_INGRESS;
+ mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
+ if (flow_tcf_nl_ack(tcf, nlh))
+ return rte_flow_error_set(error, rte_errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "netlink: failed to create ingress"
+ " qdisc");
+ return 0;
}
/**
@@ -1602,18 +1684,25 @@ struct flow_tcf_ptoi {
struct rte_flow_error *error)
{
struct priv *priv = dev->data->dev_private;
- struct mnl_socket *nl = priv->mnl_socket;
+ struct mlx5_tcf_socket *tcf = &priv->tcf_socket;
struct mlx5_flow *dev_flow;
struct nlmsghdr *nlh;
+ int ret;
dev_flow = LIST_FIRST(&flow->dev_flows);
/* E-Switch flow can't be expanded. */
assert(!LIST_NEXT(dev_flow, next));
+ if (dev_flow->tcf.applied)
+ return 0;
nlh = dev_flow->tcf.nlh;
nlh->nlmsg_type = RTM_NEWTFILTER;
nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
- if (!flow_tcf_nl_ack(nl, nlh))
+ ret = flow_tcf_nl_ack(tcf, nlh);
+ if (!ret) {
+ dev_flow->tcf.applied = 1;
return 0;
+ }
+ DRV_LOG(WARNING, "Failed to create TC rule (%d)", rte_errno);
return rte_flow_error_set(error, rte_errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"netlink: failed to create TC flow rule");
@@ -1631,7 +1720,7 @@ struct flow_tcf_ptoi {
flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
{
struct priv *priv = dev->data->dev_private;
- struct mnl_socket *nl = priv->mnl_socket;
+ struct mlx5_tcf_socket *tcf = &priv->tcf_socket;
struct mlx5_flow *dev_flow;
struct nlmsghdr *nlh;
@@ -1645,7 +1734,8 @@ struct flow_tcf_ptoi {
nlh = dev_flow->tcf.nlh;
nlh->nlmsg_type = RTM_DELTFILTER;
nlh->nlmsg_flags = NLM_F_REQUEST;
- flow_tcf_nl_ack(nl, nlh);
+ flow_tcf_nl_ack(tcf, nlh);
+ dev_flow->tcf.applied = 0;
}
/**
@@ -1683,93 +1773,45 @@ struct flow_tcf_ptoi {
};
/**
- * Initialize ingress qdisc of a given network interface.
- *
- * @param nl
- * Libmnl socket of the @p NETLINK_ROUTE kind.
- * @param ifindex
- * Index of network interface to initialize.
- * @param[out] error
- * Perform verbose error reporting if not NULL.
+ * Creates and configures a libmnl socket for Netlink flow rules.
*
+ * @param tcf
+ * tcf socket object to be initialized by function.
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
-mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
- struct rte_flow_error *error)
-{
- struct nlmsghdr *nlh;
- struct tcmsg *tcm;
- alignas(struct nlmsghdr)
- uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
-
- /* Destroy existing ingress qdisc and everything attached to it. */
- nlh = mnl_nlmsg_put_header(buf);
- nlh->nlmsg_type = RTM_DELQDISC;
- nlh->nlmsg_flags = NLM_F_REQUEST;
- tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
- tcm->tcm_family = AF_UNSPEC;
- tcm->tcm_ifindex = ifindex;
- tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
- tcm->tcm_parent = TC_H_INGRESS;
- /* Ignore errors when qdisc is already absent. */
- if (flow_tcf_nl_ack(nl, nlh) &&
- rte_errno != EINVAL && rte_errno != ENOENT)
- return rte_flow_error_set(error, rte_errno,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
- "netlink: failed to remove ingress"
- " qdisc");
- /* Create fresh ingress qdisc. */
- nlh = mnl_nlmsg_put_header(buf);
- nlh->nlmsg_type = RTM_NEWQDISC;
- nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
- tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
- tcm->tcm_family = AF_UNSPEC;
- tcm->tcm_ifindex = ifindex;
- tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
- tcm->tcm_parent = TC_H_INGRESS;
- mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
- if (flow_tcf_nl_ack(nl, nlh))
- return rte_flow_error_set(error, rte_errno,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
- "netlink: failed to create ingress"
- " qdisc");
- return 0;
-}
-
-/**
- * Create and configure a libmnl socket for Netlink flow rules.
- *
- * @return
- * A valid libmnl socket object pointer on success, NULL otherwise and
- * rte_errno is set.
- */
-struct mnl_socket *
-mlx5_flow_tcf_socket_create(void)
+mlx5_flow_tcf_socket_open(struct mlx5_tcf_socket *tcf)
{
struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+ tcf->nl = NULL;
if (nl) {
mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
sizeof(int));
- if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
- return nl;
+ if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID)) {
+ tcf->nl = nl;
+ tcf->seq = random();
+ return 0;
+ }
}
rte_errno = errno;
if (nl)
mnl_socket_close(nl);
- return NULL;
+ return -rte_errno;
}
/**
- * Destroy a libmnl socket.
+ * Destroys tcf object (closes MNL socket).
*
- * @param nl
- * Libmnl socket of the @p NETLINK_ROUTE kind.
+ * @param tcf
+ * tcf socket object to be destroyed by function.
*/
void
-mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
+mlx5_flow_tcf_socket_close(struct mlx5_tcf_socket *tcf)
{
- mnl_socket_close(nl);
+ if (tcf->nl) {
+ mnl_socket_close(tcf->nl);
+ tcf->nl = NULL;
+ }
}