[v2,7/7] net/mlx5: e-switch VXLAN rule cleanup routines

Message ID 1539612815-47199-8-git-send-email-viacheslavo@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers
Series net/mlx5: e-switch VXLAN encap/decap hardware offload |

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Slava Ovsiienko Oct. 15, 2018, 2:13 p.m. UTC
  The last part of patchset contains the rule cleanup routines.
These ones is the part of outer interface initialization at
the moment of VXLAN VTEP attaching. These routines query
the list of attached VXLAN devices, the list of local IP
addresses with peer and link scope attribute and the list
of permanent neigh rules, then all found abovementioned
items on the specified outer device are flushed.

Suggested-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow_tcf.c | 505 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 499 insertions(+), 6 deletions(-)
  

Comments

Yongseok Koh Oct. 25, 2018, 12:36 a.m. UTC | #1
On Mon, Oct 15, 2018 at 02:13:35PM +0000, Viacheslav Ovsiienko wrote:
> The last part of patchset contains the rule cleanup routines.
> These ones is the part of outer interface initialization at
> the moment of VXLAN VTEP attaching. These routines query
> the list of attached VXLAN devices, the list of local IP
> addresses with peer and link scope attribute and the list
> of permanent neigh rules, then all found abovementioned
> items on the specified outer device are flushed.
> 
> Suggested-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_flow_tcf.c | 505 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 499 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> index a1d7733..a3348ea 100644
> --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> @@ -4012,6 +4012,502 @@ static LIST_HEAD(, mlx5_flow_tcf_vtep)
>  }
>  #endif /* HAVE_IFLA_VXLAN_COLLECT_METADATA */
>  
> +#define MNL_REQUEST_SIZE_MIN 256
> +#define MNL_REQUEST_SIZE_MAX 2048
> +#define MNL_REQUEST_SIZE RTE_MIN(RTE_MAX(sysconf(_SC_PAGESIZE), \
> +				 MNL_REQUEST_SIZE_MIN), MNL_REQUEST_SIZE_MAX)
> +
> +/* Data structures used by flow_tcf_xxx_cb() routines. */
> +struct tcf_nlcb_buf {
> +	LIST_ENTRY(tcf_nlcb_buf) next;
> +	uint32_t size;
> +	alignas(struct nlmsghdr)
> +	uint8_t msg[]; /**< Netlink message data. */
> +};
> +
> +struct tcf_nlcb_context {
> +	unsigned int ifindex; /**< Base interface index. */
> +	uint32_t bufsize;
> +	LIST_HEAD(, tcf_nlcb_buf) nlbuf;
> +};
> +
> +/**
> + * Allocate space for netlink command in buffer list
> + *
> + * @param[in, out] ctx
> + *   Pointer to callback context with command buffers list.
> + * @param[in] size
> + *   Required size of data buffer to be allocated.
> + *
> + * @return
> + *   Pointer to allocated memory, aligned as message header.
> + *   NULL if some error occurred.
> + */
> +static struct nlmsghdr *
> +flow_tcf_alloc_nlcmd(struct tcf_nlcb_context *ctx, uint32_t size)
> +{
> +	struct tcf_nlcb_buf *buf;
> +	struct nlmsghdr *nlh;
> +
> +	size = NLMSG_ALIGN(size);
> +	buf = LIST_FIRST(&ctx->nlbuf);
> +	if (buf && (buf->size + size) <= ctx->bufsize) {
> +		nlh = (struct nlmsghdr *)&buf->msg[buf->size];
> +		buf->size += size;
> +		return nlh;
> +	}
> +	if (size > ctx->bufsize) {
> +		DRV_LOG(WARNING, "netlink: too long command buffer requested");
> +		return NULL;
> +	}
> +	buf = rte_malloc(__func__,
> +			ctx->bufsize + sizeof(struct tcf_nlcb_buf),
> +			alignof(struct tcf_nlcb_buf));
> +	if (!buf) {
> +		DRV_LOG(WARNING, "netlink: no memory for command buffer");
> +		return NULL;
> +	}
> +	LIST_INSERT_HEAD(&ctx->nlbuf, buf, next);
> +	buf->size = size;
> +	nlh = (struct nlmsghdr *)&buf->msg[0];
> +	return nlh;
> +}
> +
> +/**
> + * Set NLM_F_ACK flags in the last netlink command in buffer.
> + * Only last command in the buffer will be acked by system.
> + *
> + * @param[in, out] buf
> + *   Pointer to buffer with netlink commands.
> + */
> +static void
> +flow_tcf_setack_nlcmd(struct tcf_nlcb_buf *buf)
> +{
> +	struct nlmsghdr *nlh;
> +	uint32_t size = 0;
> +
> +	assert(buf->size);
> +	do {
> +		nlh = (struct nlmsghdr *)&buf->msg[size];
> +		size += NLMSG_ALIGN(nlh->nlmsg_len);
> +		if (size >= buf->size) {
> +			nlh->nlmsg_flags |= NLM_F_ACK;
> +			break;
> +		}
> +	} while (true);
> +}
> +
> +/**
> + * Send the buffers with prepared netlink commands. Scans the list and
> + * sends all found buffers. Buffers are sent and freed anyway in order
> + * to prevent memory leakage if some every message in received packet.
> + *
> + * @param[in] tcf
> + *   Context object initialized by mlx5_flow_tcf_context_create().
> + * @param[in, out] ctx
> + *   Pointer to callback context with command buffers list.
> + *
> + * @return
> + *   Zero value on success, negative errno value otherwise
> + *   and rte_errno is set.
> + */
> +static int
> +flow_tcf_send_nlcmd(struct mlx5_flow_tcf_context *tcf,
> +		    struct tcf_nlcb_context *ctx)
> +{
> +	struct tcf_nlcb_buf *bc, *bn;
> +	struct nlmsghdr *nlh;
> +	int ret = 0;
> +
> +	bc = LIST_FIRST(&ctx->nlbuf);
> +	while (bc) {
> +		int rc;
> +
> +		bn = LIST_NEXT(bc, next);
> +		if (bc->size) {
> +			flow_tcf_setack_nlcmd(bc);
> +			nlh = (struct nlmsghdr *)&bc->msg;
> +			rc = flow_tcf_nl_ack(tcf, nlh, bc->size, NULL, NULL);
> +			if (rc && !ret)
> +				ret = rc;
> +		}
> +		rte_free(bc);
> +		bc = bn;
> +	}
> +	LIST_INIT(&ctx->nlbuf);
> +	return ret;
> +}
> +
> +/**
> + * Collect local IP address rules with scope link attribute  on specified
> + * network device. This is callback routine called by libmnl mnl_cb_run()
> + * in loop for every message in received packet.
> + *
> + * @param[in] nlh
> + *   Pointer to reply header.
> + * @param[in, out] arg
> + *   Opaque data pointer for this callback.
> + *
> + * @return
> + *   A positive, nonzero value on success, negative errno value otherwise
> + *   and rte_errno is set.
> + */
> +static int
> +flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg)
> +{
> +	struct tcf_nlcb_context *ctx = arg;
> +	struct nlmsghdr *cmd;
> +	struct ifaddrmsg *ifa;
> +	struct nlattr *na;
> +	struct nlattr *na_local = NULL;
> +	struct nlattr *na_peer = NULL;
> +	unsigned char family;
> +
> +	if (nlh->nlmsg_type != RTM_NEWADDR) {
> +		rte_errno = EINVAL;
> +		return -rte_errno;
> +	}
> +	ifa = mnl_nlmsg_get_payload(nlh);
> +	family = ifa->ifa_family;
> +	if (ifa->ifa_index != ctx->ifindex ||
> +	    ifa->ifa_scope != RT_SCOPE_LINK ||
> +	    !(ifa->ifa_flags & IFA_F_PERMANENT) ||
> +	    (family != AF_INET && family != AF_INET6))
> +		return 1;
> +	mnl_attr_for_each(na, nlh, sizeof(*ifa)) {
> +		switch (mnl_attr_get_type(na)) {
> +		case IFA_LOCAL:
> +			na_local = na;
> +			break;
> +		case IFA_ADDRESS:
> +			na_peer = na;
> +			break;
> +		}
> +		if (na_local && na_peer)
> +			break;
> +	}
> +	if (!na_local || !na_peer)
> +		return 1;
> +	/* Local rule found with scope link, permanent and assigned peer. */
> +	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
> +					MNL_ALIGN(sizeof(struct ifaddrmsg)) +
> +					(family == AF_INET6
> +					? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
> +					: 2 * SZ_NLATTR_TYPE_OF(uint32_t)));

Better to use IPV4_ADDR_LEN instead?

> +	if (!cmd) {
> +		rte_errno = ENOMEM;
> +		return -rte_errno;
> +	}
> +	cmd = mnl_nlmsg_put_header(cmd);
> +	cmd->nlmsg_type = RTM_DELADDR;
> +	cmd->nlmsg_flags = NLM_F_REQUEST;
> +	ifa = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifa));
> +	ifa->ifa_flags = IFA_F_PERMANENT;
> +	ifa->ifa_scope = RT_SCOPE_LINK;
> +	ifa->ifa_index = ctx->ifindex;
> +	if (family == AF_INET) {
> +		ifa->ifa_family = AF_INET;
> +		ifa->ifa_prefixlen = 32;
> +		mnl_attr_put_u32(cmd, IFA_LOCAL, mnl_attr_get_u32(na_local));
> +		mnl_attr_put_u32(cmd, IFA_ADDRESS, mnl_attr_get_u32(na_peer));
> +	} else {
> +		ifa->ifa_family = AF_INET6;
> +		ifa->ifa_prefixlen = 128;
> +		mnl_attr_put(cmd, IFA_LOCAL, IPV6_ADDR_LEN,
> +			mnl_attr_get_payload(na_local));
> +		mnl_attr_put(cmd, IFA_ADDRESS, IPV6_ADDR_LEN,
> +			mnl_attr_get_payload(na_peer));
> +	}
> +	return 1;
> +}
> +
> +/**
> + * Cleanup the local IP addresses on outer interface.
> + *
> + * @param[in] tcf
> + *   Context object initialized by mlx5_flow_tcf_context_create().
> + * @param[in] ifindex
> + *   Network inferface index to perform cleanup.
> + */
> +static void
> +flow_tcf_encap_local_cleanup(struct mlx5_flow_tcf_context *tcf,
> +			    unsigned int ifindex)
> +{
> +	struct nlmsghdr *nlh;
> +	struct ifaddrmsg *ifa;
> +	struct tcf_nlcb_context ctx = {
> +		.ifindex = ifindex,
> +		.bufsize = MNL_REQUEST_SIZE,
> +		.nlbuf = LIST_HEAD_INITIALIZER(),
> +	};
> +	int ret;
> +
> +	assert(ifindex);
> +	/*
> +	 * Seek and destroy leftovers of local IP addresses with
> +	 * matching properties "scope link".
> +	 */
> +	nlh = mnl_nlmsg_put_header(tcf->buf);
> +	nlh->nlmsg_type = RTM_GETADDR;
> +	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
> +	ifa = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifa));
> +	ifa->ifa_family = AF_UNSPEC;
> +	ifa->ifa_index = ifindex;
> +	ifa->ifa_scope = RT_SCOPE_LINK;
> +	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_local_cb, &ctx);
> +	if (ret)
> +		DRV_LOG(WARNING, "netlink: query device list error %d", ret);
> +	ret = flow_tcf_send_nlcmd(tcf, &ctx);
> +	if (ret)
> +		DRV_LOG(WARNING, "netlink: device delete error %d", ret);
> +}
> +
> +/**
> + * Collect neigh permament rules on specified network device.
> + * This is callback routine called by libmnl mnl_cb_run() in loop for
> + * every message in received packet.
> + *
> + * @param[in] nlh
> + *   Pointer to reply header.
> + * @param[in, out] arg
> + *   Opaque data pointer for this callback.
> + *
> + * @return
> + *   A positive, nonzero value on success, negative errno value otherwise
> + *   and rte_errno is set.
> + */
> +static int
> +flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg)
> +{
> +	struct tcf_nlcb_context *ctx = arg;
> +	struct nlmsghdr *cmd;
> +	struct ndmsg *ndm;
> +	struct nlattr *na;
> +	struct nlattr *na_ip = NULL;
> +	struct nlattr *na_mac = NULL;
> +	unsigned char family;
> +
> +	if (nlh->nlmsg_type != RTM_NEWNEIGH) {
> +		rte_errno = EINVAL;
> +		return -rte_errno;
> +	}
> +	ndm = mnl_nlmsg_get_payload(nlh);
> +	family = ndm->ndm_family;
> +	if (ndm->ndm_ifindex != (int)ctx->ifindex ||
> +	   !(ndm->ndm_state & NUD_PERMANENT) ||
> +	   (family != AF_INET && family != AF_INET6))
> +		return 1;
> +	mnl_attr_for_each(na, nlh, sizeof(*ndm)) {
> +		switch (mnl_attr_get_type(na)) {
> +		case NDA_DST:
> +			na_ip = na;
> +			break;
> +		case NDA_LLADDR:
> +			na_mac = na;
> +			break;
> +		}
> +		if (na_mac && na_ip)
> +			break;
> +	}
> +	if (!na_mac || !na_ip)
> +		return 1;
> +	/* Neigh rule with permenent attribute found. */
> +	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
> +					MNL_ALIGN(sizeof(struct ndmsg)) +
> +					SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) +
> +					(family == AF_INET6
> +					? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
> +					: SZ_NLATTR_TYPE_OF(uint32_t)));

Better to use IPV4_ADDR_LEN instead?

> +	if (!cmd) {
> +		rte_errno = ENOMEM;
> +		return -rte_errno;
> +	}
> +	cmd = mnl_nlmsg_put_header(cmd);
> +	cmd->nlmsg_type = RTM_DELNEIGH;
> +	cmd->nlmsg_flags = NLM_F_REQUEST;
> +	ndm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ndm));
> +	ndm->ndm_ifindex = ctx->ifindex;
> +	ndm->ndm_state = NUD_PERMANENT;
> +	ndm->ndm_flags = 0;
> +	ndm->ndm_type = 0;
> +	if (family == AF_INET) {
> +		ndm->ndm_family = AF_INET;
> +		mnl_attr_put_u32(cmd, NDA_DST, mnl_attr_get_u32(na_ip));
> +	} else {
> +		ndm->ndm_family = AF_INET6;
> +		mnl_attr_put(cmd, NDA_DST, IPV6_ADDR_LEN,
> +			     mnl_attr_get_payload(na_ip));
> +	}
> +	mnl_attr_put(cmd, NDA_LLADDR, ETHER_ADDR_LEN,
> +		     mnl_attr_get_payload(na_mac));
> +	return 1;
> +}
> +
> +/**
> + * Cleanup the neigh rules on outer interface.
> + *
> + * @param[in] tcf
> + *   Context object initialized by mlx5_flow_tcf_context_create().
> + * @param[in] ifindex
> + *   Network inferface index to perform cleanup.
> + */
> +static void
> +flow_tcf_encap_neigh_cleanup(struct mlx5_flow_tcf_context *tcf,
> +			    unsigned int ifindex)
> +{
> +	struct nlmsghdr *nlh;
> +	struct ndmsg *ndm;
> +	struct tcf_nlcb_context ctx = {
> +		.ifindex = ifindex,
> +		.bufsize = MNL_REQUEST_SIZE,
> +		.nlbuf = LIST_HEAD_INITIALIZER(),
> +	};
> +	int ret;
> +
> +	assert(ifindex);
> +	/* Seek and destroy leftovers of neigh rules. */
> +	nlh = mnl_nlmsg_put_header(tcf->buf);
> +	nlh->nlmsg_type = RTM_GETNEIGH;
> +	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
> +	ndm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ndm));
> +	ndm->ndm_family = AF_UNSPEC;
> +	ndm->ndm_ifindex = ifindex;
> +	ndm->ndm_state = NUD_PERMANENT;
> +	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_neigh_cb, &ctx);
> +	if (ret)
> +		DRV_LOG(WARNING, "netlink: query device list error %d", ret);
> +	ret = flow_tcf_send_nlcmd(tcf, &ctx);
> +	if (ret)
> +		DRV_LOG(WARNING, "netlink: device delete error %d", ret);
> +}
> +
> +/**
> + * Collect indices of VXLAN encap/decap interfaces associated with device.
> + * This is callback routine called by libmnl mnl_cb_run() in loop for
> + * every message in received packet.
> + *
> + * @param[in] nlh
> + *   Pointer to reply header.
> + * @param[in, out] arg
> + *   Opaque data pointer for this callback.
> + *
> + * @return
> + *   A positive, nonzero value on success, negative errno value otherwise
> + *   and rte_errno is set.
> + */
> +static int
> +flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg)
> +{
> +	struct tcf_nlcb_context *ctx = arg;
> +	struct nlmsghdr *cmd;
> +	struct ifinfomsg *ifm;
> +	struct nlattr *na;
> +	struct nlattr *na_info = NULL;
> +	struct nlattr *na_vxlan = NULL;
> +	bool found = false;
> +	unsigned int vxindex;
> +
> +	if (nlh->nlmsg_type != RTM_NEWLINK) {
> +		rte_errno = EINVAL;
> +		return -rte_errno;
> +	}
> +	ifm = mnl_nlmsg_get_payload(nlh);
> +	if (!ifm->ifi_index) {
> +		rte_errno = EINVAL;
> +		return -rte_errno;
> +	}
> +	mnl_attr_for_each(na, nlh, sizeof(*ifm))
> +		if (mnl_attr_get_type(na) == IFLA_LINKINFO) {
> +			na_info = na;
> +			break;
> +		}
> +	if (!na_info)
> +		return 1;
> +	mnl_attr_for_each_nested(na, na_info) {
> +		switch (mnl_attr_get_type(na)) {
> +		case IFLA_INFO_KIND:
> +			if (!strncmp("vxlan", mnl_attr_get_str(na),
> +				     mnl_attr_get_len(na)))
> +				found = true;
> +			break;
> +		case IFLA_INFO_DATA:
> +			na_vxlan = na;
> +			break;
> +		}
> +		if (found && na_vxlan)
> +			break;
> +	}
> +	if (!found || !na_vxlan)
> +		return 1;
> +	found = false;
> +	mnl_attr_for_each_nested(na, na_vxlan) {
> +		if (mnl_attr_get_type(na) == IFLA_VXLAN_LINK &&
> +		    mnl_attr_get_u32(na) == ctx->ifindex) {
> +			found = true;
> +			break;
> +		}
> +	}
> +	if (!found)
> +		return 1;
> +	/* Attached VXLAN device found, store the command to delete. */
> +	vxindex = ifm->ifi_index;
> +	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
> +					MNL_ALIGN(sizeof(struct ifinfomsg)));
> +	if (!nlh) {
> +		rte_errno = ENOMEM;
> +		return -rte_errno;
> +	}
> +	cmd = mnl_nlmsg_put_header(cmd);
> +	cmd->nlmsg_type = RTM_DELLINK;
> +	cmd->nlmsg_flags = NLM_F_REQUEST;
> +	ifm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifm));
> +	ifm->ifi_family = AF_UNSPEC;
> +	ifm->ifi_index = vxindex;
> +	return 1;
> +}
> +
> +/**
> + * Cleanup the outer interface. Removes all found vxlan devices
> + * attached to specified index, flushes the meigh and local IP
> + * datavase.
> + *
> + * @param[in] tcf
> + *   Context object initialized by mlx5_flow_tcf_context_create().
> + * @param[in] ifindex
> + *   Network inferface index to perform cleanup.
> + */
> +static void
> +flow_tcf_encap_iface_cleanup(struct mlx5_flow_tcf_context *tcf,
> +			    unsigned int ifindex)
> +{
> +	struct nlmsghdr *nlh;
> +	struct ifinfomsg *ifm;
> +	struct tcf_nlcb_context ctx = {
> +		.ifindex = ifindex,
> +		.bufsize = MNL_REQUEST_SIZE,
> +		.nlbuf = LIST_HEAD_INITIALIZER(),
> +	};
> +	int ret;
> +
> +	assert(ifindex);
> +	/*
> +	 * Seek and destroy leftover VXLAN encap/decap interfaces with
> +	 * matching properties.
> +	 */
> +	nlh = mnl_nlmsg_put_header(tcf->buf);
> +	nlh->nlmsg_type = RTM_GETLINK;
> +	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
> +	ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
> +	ifm->ifi_family = AF_UNSPEC;
> +	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_vxlan_cb, &ctx);
> +	if (ret)
> +		DRV_LOG(WARNING, "netlink: query device list error %d", ret);
> +	ret = flow_tcf_send_nlcmd(tcf, &ctx);
> +	if (ret)
> +		DRV_LOG(WARNING, "netlink: device delete error %d", ret);
> +}
> +
> +
>  /**
>   * Create target interface index for VXLAN tunneling decapsulation.
>   * In order to share the UDP port within the other interfaces the
> @@ -4100,12 +4596,9 @@ static LIST_HEAD(, mlx5_flow_tcf_vtep)
>  		uint16_t pcnt;
>  
>  		/* Not found, we should create the new attached VTEP. */
> -/*
> - * TODO: not implemented yet
> - * flow_tcf_encap_iface_cleanup(tcf, ifouter);
> - * flow_tcf_encap_local_cleanup(tcf, ifouter);
> - * flow_tcf_encap_neigh_cleanup(tcf, ifouter);
> - */
> +		flow_tcf_encap_iface_cleanup(tcf, ifouter);
> +		flow_tcf_encap_local_cleanup(tcf, ifouter);
> +		flow_tcf_encap_neigh_cleanup(tcf, ifouter);

I have a fundamental questioin. Why are these cleanups needed? If I read the
code correctly, it looks like cleaning up vtep, ip assginment and neigh entry
which are not created/set by PMD. The reason why we have to clean up things is that
PMD exclusively owns the interface (ifouter). Is my understanding correct?

Thanks,
Yongseok

>  		for (pcnt = 0; pcnt <= (MLX5_VXLAN_PORT_RANGE_MAX
>  				     - MLX5_VXLAN_PORT_RANGE_MIN); pcnt++) {
>  			encap_port++;
>
  
Slava Ovsiienko Oct. 25, 2018, 8:32 p.m. UTC | #2
> -----Original Message-----
> From: Yongseok Koh
> Sent: Thursday, October 25, 2018 3:37
> To: Slava Ovsiienko <viacheslavo@mellanox.com>
> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> Subject: Re: [PATCH v2 7/7] net/mlx5: e-switch VXLAN rule cleanup routines
> 
> On Mon, Oct 15, 2018 at 02:13:35PM +0000, Viacheslav Ovsiienko wrote:
> > The last part of patchset contains the rule cleanup routines.
> > These ones is the part of outer interface initialization at the moment
> > of VXLAN VTEP attaching. These routines query the list of attached
> > VXLAN devices, the list of local IP addresses with peer and link scope
> > attribute and the list of permanent neigh rules, then all found
> > abovementioned items on the specified outer device are flushed.
> >
> > Suggested-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
> > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> > ---
> >  drivers/net/mlx5/mlx5_flow_tcf.c | 505
> > ++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 499 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c
> > b/drivers/net/mlx5/mlx5_flow_tcf.c
> > index a1d7733..a3348ea 100644
> > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > @@ -4012,6 +4012,502 @@ static LIST_HEAD(, mlx5_flow_tcf_vtep)  }
> > #endif /* HAVE_IFLA_VXLAN_COLLECT_METADATA */
> >
> > +#define MNL_REQUEST_SIZE_MIN 256
> > +#define MNL_REQUEST_SIZE_MAX 2048
> > +#define MNL_REQUEST_SIZE RTE_MIN(RTE_MAX(sysconf(_SC_PAGESIZE),
> \
> > +				 MNL_REQUEST_SIZE_MIN),
> MNL_REQUEST_SIZE_MAX)
> > +
> > +/* Data structures used by flow_tcf_xxx_cb() routines. */ struct
> > +tcf_nlcb_buf {
> > +	LIST_ENTRY(tcf_nlcb_buf) next;
> > +	uint32_t size;
> > +	alignas(struct nlmsghdr)
> > +	uint8_t msg[]; /**< Netlink message data. */ };
> > +
> > +struct tcf_nlcb_context {
> > +	unsigned int ifindex; /**< Base interface index. */
> > +	uint32_t bufsize;
> > +	LIST_HEAD(, tcf_nlcb_buf) nlbuf;
> > +};
> > +
> > +/**
> > + * Allocate space for netlink command in buffer list
> > + *
> > + * @param[in, out] ctx
> > + *   Pointer to callback context with command buffers list.
> > + * @param[in] size
> > + *   Required size of data buffer to be allocated.
> > + *
> > + * @return
> > + *   Pointer to allocated memory, aligned as message header.
> > + *   NULL if some error occurred.
> > + */
> > +static struct nlmsghdr *
> > +flow_tcf_alloc_nlcmd(struct tcf_nlcb_context *ctx, uint32_t size) {
> > +	struct tcf_nlcb_buf *buf;
> > +	struct nlmsghdr *nlh;
> > +
> > +	size = NLMSG_ALIGN(size);
> > +	buf = LIST_FIRST(&ctx->nlbuf);
> > +	if (buf && (buf->size + size) <= ctx->bufsize) {
> > +		nlh = (struct nlmsghdr *)&buf->msg[buf->size];
> > +		buf->size += size;
> > +		return nlh;
> > +	}
> > +	if (size > ctx->bufsize) {
> > +		DRV_LOG(WARNING, "netlink: too long command buffer
> requested");
> > +		return NULL;
> > +	}
> > +	buf = rte_malloc(__func__,
> > +			ctx->bufsize + sizeof(struct tcf_nlcb_buf),
> > +			alignof(struct tcf_nlcb_buf));
> > +	if (!buf) {
> > +		DRV_LOG(WARNING, "netlink: no memory for command
> buffer");
> > +		return NULL;
> > +	}
> > +	LIST_INSERT_HEAD(&ctx->nlbuf, buf, next);
> > +	buf->size = size;
> > +	nlh = (struct nlmsghdr *)&buf->msg[0];
> > +	return nlh;
> > +}
> > +
> > +/**
> > + * Set NLM_F_ACK flags in the last netlink command in buffer.
> > + * Only last command in the buffer will be acked by system.
> > + *
> > + * @param[in, out] buf
> > + *   Pointer to buffer with netlink commands.
> > + */
> > +static void
> > +flow_tcf_setack_nlcmd(struct tcf_nlcb_buf *buf) {
> > +	struct nlmsghdr *nlh;
> > +	uint32_t size = 0;
> > +
> > +	assert(buf->size);
> > +	do {
> > +		nlh = (struct nlmsghdr *)&buf->msg[size];
> > +		size += NLMSG_ALIGN(nlh->nlmsg_len);
> > +		if (size >= buf->size) {
> > +			nlh->nlmsg_flags |= NLM_F_ACK;
> > +			break;
> > +		}
> > +	} while (true);
> > +}
> > +
> > +/**
> > + * Send the buffers with prepared netlink commands. Scans the list
> > +and
> > + * sends all found buffers. Buffers are sent and freed anyway in
> > +order
> > + * to prevent memory leakage if some every message in received packet.
> > + *
> > + * @param[in] tcf
> > + *   Context object initialized by mlx5_flow_tcf_context_create().
> > + * @param[in, out] ctx
> > + *   Pointer to callback context with command buffers list.
> > + *
> > + * @return
> > + *   Zero value on success, negative errno value otherwise
> > + *   and rte_errno is set.
> > + */
> > +static int
> > +flow_tcf_send_nlcmd(struct mlx5_flow_tcf_context *tcf,
> > +		    struct tcf_nlcb_context *ctx)
> > +{
> > +	struct tcf_nlcb_buf *bc, *bn;
> > +	struct nlmsghdr *nlh;
> > +	int ret = 0;
> > +
> > +	bc = LIST_FIRST(&ctx->nlbuf);
> > +	while (bc) {
> > +		int rc;
> > +
> > +		bn = LIST_NEXT(bc, next);
> > +		if (bc->size) {
> > +			flow_tcf_setack_nlcmd(bc);
> > +			nlh = (struct nlmsghdr *)&bc->msg;
> > +			rc = flow_tcf_nl_ack(tcf, nlh, bc->size, NULL, NULL);
> > +			if (rc && !ret)
> > +				ret = rc;
> > +		}
> > +		rte_free(bc);
> > +		bc = bn;
> > +	}
> > +	LIST_INIT(&ctx->nlbuf);
> > +	return ret;
> > +}
> > +
> > +/**
> > + * Collect local IP address rules with scope link attribute  on
> > +specified
> > + * network device. This is callback routine called by libmnl
> > +mnl_cb_run()
> > + * in loop for every message in received packet.
> > + *
> > + * @param[in] nlh
> > + *   Pointer to reply header.
> > + * @param[in, out] arg
> > + *   Opaque data pointer for this callback.
> > + *
> > + * @return
> > + *   A positive, nonzero value on success, negative errno value otherwise
> > + *   and rte_errno is set.
> > + */
> > +static int
> > +flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg) {
> > +	struct tcf_nlcb_context *ctx = arg;
> > +	struct nlmsghdr *cmd;
> > +	struct ifaddrmsg *ifa;
> > +	struct nlattr *na;
> > +	struct nlattr *na_local = NULL;
> > +	struct nlattr *na_peer = NULL;
> > +	unsigned char family;
> > +
> > +	if (nlh->nlmsg_type != RTM_NEWADDR) {
> > +		rte_errno = EINVAL;
> > +		return -rte_errno;
> > +	}
> > +	ifa = mnl_nlmsg_get_payload(nlh);
> > +	family = ifa->ifa_family;
> > +	if (ifa->ifa_index != ctx->ifindex ||
> > +	    ifa->ifa_scope != RT_SCOPE_LINK ||
> > +	    !(ifa->ifa_flags & IFA_F_PERMANENT) ||
> > +	    (family != AF_INET && family != AF_INET6))
> > +		return 1;
> > +	mnl_attr_for_each(na, nlh, sizeof(*ifa)) {
> > +		switch (mnl_attr_get_type(na)) {
> > +		case IFA_LOCAL:
> > +			na_local = na;
> > +			break;
> > +		case IFA_ADDRESS:
> > +			na_peer = na;
> > +			break;
> > +		}
> > +		if (na_local && na_peer)
> > +			break;
> > +	}
> > +	if (!na_local || !na_peer)
> > +		return 1;
> > +	/* Local rule found with scope link, permanent and assigned peer. */
> > +	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct
> nlmsghdr)) +
> > +					MNL_ALIGN(sizeof(struct ifaddrmsg))
> +
> > +					(family == AF_INET6
> > +					? 2 *
> SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
> > +					: 2 *
> SZ_NLATTR_TYPE_OF(uint32_t)));
> 
> Better to use IPV4_ADDR_LEN instead?
> 
OK.

> > +	if (!cmd) {
> > +		rte_errno = ENOMEM;
> > +		return -rte_errno;
> > +	}
> > +	cmd = mnl_nlmsg_put_header(cmd);
> > +	cmd->nlmsg_type = RTM_DELADDR;
> > +	cmd->nlmsg_flags = NLM_F_REQUEST;
> > +	ifa = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifa));
> > +	ifa->ifa_flags = IFA_F_PERMANENT;
> > +	ifa->ifa_scope = RT_SCOPE_LINK;
> > +	ifa->ifa_index = ctx->ifindex;
> > +	if (family == AF_INET) {
> > +		ifa->ifa_family = AF_INET;
> > +		ifa->ifa_prefixlen = 32;
> > +		mnl_attr_put_u32(cmd, IFA_LOCAL,
> mnl_attr_get_u32(na_local));
> > +		mnl_attr_put_u32(cmd, IFA_ADDRESS,
> mnl_attr_get_u32(na_peer));
> > +	} else {
> > +		ifa->ifa_family = AF_INET6;
> > +		ifa->ifa_prefixlen = 128;
> > +		mnl_attr_put(cmd, IFA_LOCAL, IPV6_ADDR_LEN,
> > +			mnl_attr_get_payload(na_local));
> > +		mnl_attr_put(cmd, IFA_ADDRESS, IPV6_ADDR_LEN,
> > +			mnl_attr_get_payload(na_peer));
> > +	}
> > +	return 1;
> > +}
> > +
> > +/**
> > + * Cleanup the local IP addresses on outer interface.
> > + *
> > + * @param[in] tcf
> > + *   Context object initialized by mlx5_flow_tcf_context_create().
> > + * @param[in] ifindex
> > + *   Network inferface index to perform cleanup.
> > + */
> > +static void
> > +flow_tcf_encap_local_cleanup(struct mlx5_flow_tcf_context *tcf,
> > +			    unsigned int ifindex)
> > +{
> > +	struct nlmsghdr *nlh;
> > +	struct ifaddrmsg *ifa;
> > +	struct tcf_nlcb_context ctx = {
> > +		.ifindex = ifindex,
> > +		.bufsize = MNL_REQUEST_SIZE,
> > +		.nlbuf = LIST_HEAD_INITIALIZER(),
> > +	};
> > +	int ret;
> > +
> > +	assert(ifindex);
> > +	/*
> > +	 * Seek and destroy leftovers of local IP addresses with
> > +	 * matching properties "scope link".
> > +	 */
> > +	nlh = mnl_nlmsg_put_header(tcf->buf);
> > +	nlh->nlmsg_type = RTM_GETADDR;
> > +	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
> > +	ifa = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifa));
> > +	ifa->ifa_family = AF_UNSPEC;
> > +	ifa->ifa_index = ifindex;
> > +	ifa->ifa_scope = RT_SCOPE_LINK;
> > +	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_local_cb, &ctx);
> > +	if (ret)
> > +		DRV_LOG(WARNING, "netlink: query device list error %d",
> ret);
> > +	ret = flow_tcf_send_nlcmd(tcf, &ctx);
> > +	if (ret)
> > +		DRV_LOG(WARNING, "netlink: device delete error %d", ret); }
> > +
> > +/**
> > + * Collect neigh permament rules on specified network device.
> > + * This is callback routine called by libmnl mnl_cb_run() in loop for
> > + * every message in received packet.
> > + *
> > + * @param[in] nlh
> > + *   Pointer to reply header.
> > + * @param[in, out] arg
> > + *   Opaque data pointer for this callback.
> > + *
> > + * @return
> > + *   A positive, nonzero value on success, negative errno value otherwise
> > + *   and rte_errno is set.
> > + */
> > +static int
> > +flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg) {
> > +	struct tcf_nlcb_context *ctx = arg;
> > +	struct nlmsghdr *cmd;
> > +	struct ndmsg *ndm;
> > +	struct nlattr *na;
> > +	struct nlattr *na_ip = NULL;
> > +	struct nlattr *na_mac = NULL;
> > +	unsigned char family;
> > +
> > +	if (nlh->nlmsg_type != RTM_NEWNEIGH) {
> > +		rte_errno = EINVAL;
> > +		return -rte_errno;
> > +	}
> > +	ndm = mnl_nlmsg_get_payload(nlh);
> > +	family = ndm->ndm_family;
> > +	if (ndm->ndm_ifindex != (int)ctx->ifindex ||
> > +	   !(ndm->ndm_state & NUD_PERMANENT) ||
> > +	   (family != AF_INET && family != AF_INET6))
> > +		return 1;
> > +	mnl_attr_for_each(na, nlh, sizeof(*ndm)) {
> > +		switch (mnl_attr_get_type(na)) {
> > +		case NDA_DST:
> > +			na_ip = na;
> > +			break;
> > +		case NDA_LLADDR:
> > +			na_mac = na;
> > +			break;
> > +		}
> > +		if (na_mac && na_ip)
> > +			break;
> > +	}
> > +	if (!na_mac || !na_ip)
> > +		return 1;
> > +	/* Neigh rule with permenent attribute found. */
> > +	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct
> nlmsghdr)) +
> > +					MNL_ALIGN(sizeof(struct ndmsg)) +
> > +
> 	SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) +
> > +					(family == AF_INET6
> > +					?
> SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
> > +					: SZ_NLATTR_TYPE_OF(uint32_t)));
> 
> Better to use IPV4_ADDR_LEN instead?
> 
> > +	if (!cmd) {
> > +		rte_errno = ENOMEM;
> > +		return -rte_errno;
> > +	}
> > +	cmd = mnl_nlmsg_put_header(cmd);
> > +	cmd->nlmsg_type = RTM_DELNEIGH;
> > +	cmd->nlmsg_flags = NLM_F_REQUEST;
> > +	ndm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ndm));
> > +	ndm->ndm_ifindex = ctx->ifindex;
> > +	ndm->ndm_state = NUD_PERMANENT;
> > +	ndm->ndm_flags = 0;
> > +	ndm->ndm_type = 0;
> > +	if (family == AF_INET) {
> > +		ndm->ndm_family = AF_INET;
> > +		mnl_attr_put_u32(cmd, NDA_DST,
> mnl_attr_get_u32(na_ip));
> > +	} else {
> > +		ndm->ndm_family = AF_INET6;
> > +		mnl_attr_put(cmd, NDA_DST, IPV6_ADDR_LEN,
> > +			     mnl_attr_get_payload(na_ip));
> > +	}
> > +	mnl_attr_put(cmd, NDA_LLADDR, ETHER_ADDR_LEN,
> > +		     mnl_attr_get_payload(na_mac));
> > +	return 1;
> > +}
> > +
> > +/**
> > + * Cleanup the neigh rules on outer interface.
> > + *
> > + * @param[in] tcf
> > + *   Context object initialized by mlx5_flow_tcf_context_create().
> > + * @param[in] ifindex
> > + *   Network inferface index to perform cleanup.
> > + */
> > +static void
> > +flow_tcf_encap_neigh_cleanup(struct mlx5_flow_tcf_context *tcf,
> > +			    unsigned int ifindex)
> > +{
> > +	struct nlmsghdr *nlh;
> > +	struct ndmsg *ndm;
> > +	struct tcf_nlcb_context ctx = {
> > +		.ifindex = ifindex,
> > +		.bufsize = MNL_REQUEST_SIZE,
> > +		.nlbuf = LIST_HEAD_INITIALIZER(),
> > +	};
> > +	int ret;
> > +
> > +	assert(ifindex);
> > +	/* Seek and destroy leftovers of neigh rules. */
> > +	nlh = mnl_nlmsg_put_header(tcf->buf);
> > +	nlh->nlmsg_type = RTM_GETNEIGH;
> > +	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
> > +	ndm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ndm));
> > +	ndm->ndm_family = AF_UNSPEC;
> > +	ndm->ndm_ifindex = ifindex;
> > +	ndm->ndm_state = NUD_PERMANENT;
> > +	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_neigh_cb, &ctx);
> > +	if (ret)
> > +		DRV_LOG(WARNING, "netlink: query device list error %d",
> ret);
> > +	ret = flow_tcf_send_nlcmd(tcf, &ctx);
> > +	if (ret)
> > +		DRV_LOG(WARNING, "netlink: device delete error %d", ret); }
> > +
> > +/**
> > + * Collect indices of VXLAN encap/decap interfaces associated with
> device.
> > + * This is callback routine called by libmnl mnl_cb_run() in loop for
> > + * every message in received packet.
> > + *
> > + * @param[in] nlh
> > + *   Pointer to reply header.
> > + * @param[in, out] arg
> > + *   Opaque data pointer for this callback.
> > + *
> > + * @return
> > + *   A positive, nonzero value on success, negative errno value otherwise
> > + *   and rte_errno is set.
> > + */
> > +static int
> > +flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg) {
> > +	struct tcf_nlcb_context *ctx = arg;
> > +	struct nlmsghdr *cmd;
> > +	struct ifinfomsg *ifm;
> > +	struct nlattr *na;
> > +	struct nlattr *na_info = NULL;
> > +	struct nlattr *na_vxlan = NULL;
> > +	bool found = false;
> > +	unsigned int vxindex;
> > +
> > +	if (nlh->nlmsg_type != RTM_NEWLINK) {
> > +		rte_errno = EINVAL;
> > +		return -rte_errno;
> > +	}
> > +	ifm = mnl_nlmsg_get_payload(nlh);
> > +	if (!ifm->ifi_index) {
> > +		rte_errno = EINVAL;
> > +		return -rte_errno;
> > +	}
> > +	mnl_attr_for_each(na, nlh, sizeof(*ifm))
> > +		if (mnl_attr_get_type(na) == IFLA_LINKINFO) {
> > +			na_info = na;
> > +			break;
> > +		}
> > +	if (!na_info)
> > +		return 1;
> > +	mnl_attr_for_each_nested(na, na_info) {
> > +		switch (mnl_attr_get_type(na)) {
> > +		case IFLA_INFO_KIND:
> > +			if (!strncmp("vxlan", mnl_attr_get_str(na),
> > +				     mnl_attr_get_len(na)))
> > +				found = true;
> > +			break;
> > +		case IFLA_INFO_DATA:
> > +			na_vxlan = na;
> > +			break;
> > +		}
> > +		if (found && na_vxlan)
> > +			break;
> > +	}
> > +	if (!found || !na_vxlan)
> > +		return 1;
> > +	found = false;
> > +	mnl_attr_for_each_nested(na, na_vxlan) {
> > +		if (mnl_attr_get_type(na) == IFLA_VXLAN_LINK &&
> > +		    mnl_attr_get_u32(na) == ctx->ifindex) {
> > +			found = true;
> > +			break;
> > +		}
> > +	}
> > +	if (!found)
> > +		return 1;
> > +	/* Attached VXLAN device found, store the command to delete. */
> > +	vxindex = ifm->ifi_index;
> > +	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct
> nlmsghdr)) +
> > +					MNL_ALIGN(sizeof(struct
> ifinfomsg)));
> > +	if (!nlh) {
> > +		rte_errno = ENOMEM;
> > +		return -rte_errno;
> > +	}
> > +	cmd = mnl_nlmsg_put_header(cmd);
> > +	cmd->nlmsg_type = RTM_DELLINK;
> > +	cmd->nlmsg_flags = NLM_F_REQUEST;
> > +	ifm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifm));
> > +	ifm->ifi_family = AF_UNSPEC;
> > +	ifm->ifi_index = vxindex;
> > +	return 1;
> > +}
> > +
> > +/**
> > + * Cleanup the outer interface. Removes all found vxlan devices
> > + * attached to specified index, flushes the meigh and local IP
> > + * datavase.
> > + *
> > + * @param[in] tcf
> > + *   Context object initialized by mlx5_flow_tcf_context_create().
> > + * @param[in] ifindex
> > + *   Network inferface index to perform cleanup.
> > + */
> > +static void
> > +flow_tcf_encap_iface_cleanup(struct mlx5_flow_tcf_context *tcf,
> > +			    unsigned int ifindex)
> > +{
> > +	struct nlmsghdr *nlh;
> > +	struct ifinfomsg *ifm;
> > +	struct tcf_nlcb_context ctx = {
> > +		.ifindex = ifindex,
> > +		.bufsize = MNL_REQUEST_SIZE,
> > +		.nlbuf = LIST_HEAD_INITIALIZER(),
> > +	};
> > +	int ret;
> > +
> > +	assert(ifindex);
> > +	/*
> > +	 * Seek and destroy leftover VXLAN encap/decap interfaces with
> > +	 * matching properties.
> > +	 */
> > +	nlh = mnl_nlmsg_put_header(tcf->buf);
> > +	nlh->nlmsg_type = RTM_GETLINK;
> > +	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
> > +	ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
> > +	ifm->ifi_family = AF_UNSPEC;
> > +	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_vxlan_cb, &ctx);
> > +	if (ret)
> > +		DRV_LOG(WARNING, "netlink: query device list error %d",
> ret);
> > +	ret = flow_tcf_send_nlcmd(tcf, &ctx);
> > +	if (ret)
> > +		DRV_LOG(WARNING, "netlink: device delete error %d", ret); }
> > +
> > +
> >  /**
> >   * Create target interface index for VXLAN tunneling decapsulation.
> >   * In order to share the UDP port within the other interfaces the @@
> > -4100,12 +4596,9 @@ static LIST_HEAD(, mlx5_flow_tcf_vtep)
> >  		uint16_t pcnt;
> >
> >  		/* Not found, we should create the new attached VTEP. */
> > -/*
> > - * TODO: not implemented yet
> > - * flow_tcf_encap_iface_cleanup(tcf, ifouter);
> > - * flow_tcf_encap_local_cleanup(tcf, ifouter);
> > - * flow_tcf_encap_neigh_cleanup(tcf, ifouter);
> > - */
> > +		flow_tcf_encap_iface_cleanup(tcf, ifouter);
> > +		flow_tcf_encap_local_cleanup(tcf, ifouter);
> > +		flow_tcf_encap_neigh_cleanup(tcf, ifouter);
> 
> I have a fundamental questioin. Why are these cleanups needed? If I read the
> code correctly, it looks like cleaning up vtep, ip assginment and neigh entry
> which are not created/set by PMD. The reason why we have to clean up
> things is that PMD exclusively owns the interface (ifouter). Is my
> understanding correct?

Because this is the most simple approach. I have no guess how
to co-exist with unknown pre-created rules and how to get into account
all their properties and side effects.

While debugging I see the situations when application crashes and
leaves a "leftovers" as VXLAN devices, neigh and local rules. If we run application again -
these leftovers were the sources of errors (EEXIST on rule creations and so on).

With best regards,
Slava
> 
> Thanks,
> Yongseok
> 
> >  		for (pcnt = 0; pcnt <= (MLX5_VXLAN_PORT_RANGE_MAX
> >  				     - MLX5_VXLAN_PORT_RANGE_MIN);
> pcnt++) {
> >  			encap_port++;
> >
  
Yongseok Koh Oct. 26, 2018, 6:30 a.m. UTC | #3
On Thu, Oct 25, 2018 at 01:32:23PM -0700, Slava Ovsiienko wrote:
> > -----Original Message-----
> > From: Yongseok Koh
> > Sent: Thursday, October 25, 2018 3:37
> > To: Slava Ovsiienko <viacheslavo@mellanox.com>
> > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > Subject: Re: [PATCH v2 7/7] net/mlx5: e-switch VXLAN rule cleanup routines
> > 
> > On Mon, Oct 15, 2018 at 02:13:35PM +0000, Viacheslav Ovsiienko wrote:
> > > The last part of patchset contains the rule cleanup routines.
> > > These ones is the part of outer interface initialization at the moment
> > > of VXLAN VTEP attaching. These routines query the list of attached
> > > VXLAN devices, the list of local IP addresses with peer and link scope
> > > attribute and the list of permanent neigh rules, then all found
> > > abovementioned items on the specified outer device are flushed.
> > >
> > > Suggested-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
> > > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> > > ---
[...]
> > > -4100,12 +4596,9 @@ static LIST_HEAD(, mlx5_flow_tcf_vtep)
> > >  		uint16_t pcnt;
> > >
> > >  		/* Not found, we should create the new attached VTEP. */
> > > -/*
> > > - * TODO: not implemented yet
> > > - * flow_tcf_encap_iface_cleanup(tcf, ifouter);
> > > - * flow_tcf_encap_local_cleanup(tcf, ifouter);
> > > - * flow_tcf_encap_neigh_cleanup(tcf, ifouter);
> > > - */
> > > +		flow_tcf_encap_iface_cleanup(tcf, ifouter);
> > > +		flow_tcf_encap_local_cleanup(tcf, ifouter);
> > > +		flow_tcf_encap_neigh_cleanup(tcf, ifouter);
> > 
> > I have a fundamental questioin. Why are these cleanups needed? If I read the
> > code correctly, it looks like cleaning up vtep, ip assginment and neigh entry
> > which are not created/set by PMD. The reason why we have to clean up
> > things is that PMD exclusively owns the interface (ifouter). Is my
> > understanding correct?
> 
> Because this is the most simple approach. I have no guess how
> to co-exist with unknown pre-created rules and how to get into account
> all their properties and side effects.
> 
> While debugging I see the situations when application crashes and
> leaves a "leftovers" as VXLAN devices, neigh and local rules. If we run application again -
> these leftovers were the sources of errors (EEXIST on rule creations and so on).

Okay, makes sense.
Thanks for clarification.

Yongseok
  

Patch

diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index a1d7733..a3348ea 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -4012,6 +4012,502 @@  static LIST_HEAD(, mlx5_flow_tcf_vtep)
 }
 #endif /* HAVE_IFLA_VXLAN_COLLECT_METADATA */
 
+#define MNL_REQUEST_SIZE_MIN 256
+#define MNL_REQUEST_SIZE_MAX 2048
+#define MNL_REQUEST_SIZE RTE_MIN(RTE_MAX(sysconf(_SC_PAGESIZE), \
+				 MNL_REQUEST_SIZE_MIN), MNL_REQUEST_SIZE_MAX)
+
+/* Data structures used by flow_tcf_xxx_cb() routines. */
+struct tcf_nlcb_buf {
+	LIST_ENTRY(tcf_nlcb_buf) next;
+	uint32_t size;
+	alignas(struct nlmsghdr)
+	uint8_t msg[]; /**< Netlink message data. */
+};
+
+struct tcf_nlcb_context {
+	unsigned int ifindex; /**< Base interface index. */
+	uint32_t bufsize;
+	LIST_HEAD(, tcf_nlcb_buf) nlbuf;
+};
+
+/**
+ * Allocate space for netlink command in buffer list
+ *
+ * @param[in, out] ctx
+ *   Pointer to callback context with command buffers list.
+ * @param[in] size
+ *   Required size of data buffer to be allocated.
+ *
+ * @return
+ *   Pointer to allocated memory, aligned as message header.
+ *   NULL if some error occurred.
+ */
+static struct nlmsghdr *
+flow_tcf_alloc_nlcmd(struct tcf_nlcb_context *ctx, uint32_t size)
+{
+	struct tcf_nlcb_buf *buf;
+	struct nlmsghdr *nlh;
+
+	size = NLMSG_ALIGN(size);
+	buf = LIST_FIRST(&ctx->nlbuf);
+	if (buf && (buf->size + size) <= ctx->bufsize) {
+		nlh = (struct nlmsghdr *)&buf->msg[buf->size];
+		buf->size += size;
+		return nlh;
+	}
+	if (size > ctx->bufsize) {
+		DRV_LOG(WARNING, "netlink: too long command buffer requested");
+		return NULL;
+	}
+	buf = rte_malloc(__func__,
+			ctx->bufsize + sizeof(struct tcf_nlcb_buf),
+			alignof(struct tcf_nlcb_buf));
+	if (!buf) {
+		DRV_LOG(WARNING, "netlink: no memory for command buffer");
+		return NULL;
+	}
+	LIST_INSERT_HEAD(&ctx->nlbuf, buf, next);
+	buf->size = size;
+	nlh = (struct nlmsghdr *)&buf->msg[0];
+	return nlh;
+}
+
+/**
+ * Set NLM_F_ACK flags in the last netlink command in buffer.
+ * Only last command in the buffer will be acked by system.
+ *
+ * @param[in, out] buf
+ *   Pointer to buffer with netlink commands.
+ */
+static void
+flow_tcf_setack_nlcmd(struct tcf_nlcb_buf *buf)
+{
+	struct nlmsghdr *nlh;
+	uint32_t size = 0;
+
+	assert(buf->size);
+	do {
+		nlh = (struct nlmsghdr *)&buf->msg[size];
+		size += NLMSG_ALIGN(nlh->nlmsg_len);
+		if (size >= buf->size) {
+			nlh->nlmsg_flags |= NLM_F_ACK;
+			break;
+		}
+	} while (true);
+}
+
+/**
+ * Send the buffers with prepared netlink commands. Scans the list and
+ * sends all found buffers. Buffers are sent and freed anyway in order
+ * to prevent memory leakage if some every message in received packet.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_flow_tcf_context_create().
+ * @param[in, out] ctx
+ *   Pointer to callback context with command buffers list.
+ *
+ * @return
+ *   Zero value on success, negative errno value otherwise
+ *   and rte_errno is set.
+ */
+static int
+flow_tcf_send_nlcmd(struct mlx5_flow_tcf_context *tcf,
+		    struct tcf_nlcb_context *ctx)
+{
+	struct tcf_nlcb_buf *bc, *bn;
+	struct nlmsghdr *nlh;
+	int ret = 0;
+
+	bc = LIST_FIRST(&ctx->nlbuf);
+	while (bc) {
+		int rc;
+
+		bn = LIST_NEXT(bc, next);
+		if (bc->size) {
+			flow_tcf_setack_nlcmd(bc);
+			nlh = (struct nlmsghdr *)&bc->msg;
+			rc = flow_tcf_nl_ack(tcf, nlh, bc->size, NULL, NULL);
+			if (rc && !ret)
+				ret = rc;
+		}
+		rte_free(bc);
+		bc = bn;
+	}
+	LIST_INIT(&ctx->nlbuf);
+	return ret;
+}
+
+/**
+ * Collect local IP address rules with scope link attribute  on specified
+ * network device. This is callback routine called by libmnl mnl_cb_run()
+ * in loop for every message in received packet.
+ *
+ * @param[in] nlh
+ *   Pointer to reply header.
+ * @param[in, out] arg
+ *   Opaque data pointer for this callback.
+ *
+ * @return
+ *   A positive, nonzero value on success, negative errno value otherwise
+ *   and rte_errno is set.
+ */
+static int
+flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg)
+{
+	struct tcf_nlcb_context *ctx = arg;
+	struct nlmsghdr *cmd;
+	struct ifaddrmsg *ifa;
+	struct nlattr *na;
+	struct nlattr *na_local = NULL;
+	struct nlattr *na_peer = NULL;
+	unsigned char family;
+
+	if (nlh->nlmsg_type != RTM_NEWADDR) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	ifa = mnl_nlmsg_get_payload(nlh);
+	family = ifa->ifa_family;
+	if (ifa->ifa_index != ctx->ifindex ||
+	    ifa->ifa_scope != RT_SCOPE_LINK ||
+	    !(ifa->ifa_flags & IFA_F_PERMANENT) ||
+	    (family != AF_INET && family != AF_INET6))
+		return 1;
+	mnl_attr_for_each(na, nlh, sizeof(*ifa)) {
+		switch (mnl_attr_get_type(na)) {
+		case IFA_LOCAL:
+			na_local = na;
+			break;
+		case IFA_ADDRESS:
+			na_peer = na;
+			break;
+		}
+		if (na_local && na_peer)
+			break;
+	}
+	if (!na_local || !na_peer)
+		return 1;
+	/* Local rule found with scope link, permanent and assigned peer. */
+	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
+					MNL_ALIGN(sizeof(struct ifaddrmsg)) +
+					(family == AF_INET6
+					? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
+					: 2 * SZ_NLATTR_TYPE_OF(uint32_t)));
+	if (!cmd) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	cmd = mnl_nlmsg_put_header(cmd);
+	cmd->nlmsg_type = RTM_DELADDR;
+	cmd->nlmsg_flags = NLM_F_REQUEST;
+	ifa = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifa));
+	ifa->ifa_flags = IFA_F_PERMANENT;
+	ifa->ifa_scope = RT_SCOPE_LINK;
+	ifa->ifa_index = ctx->ifindex;
+	if (family == AF_INET) {
+		ifa->ifa_family = AF_INET;
+		ifa->ifa_prefixlen = 32;
+		mnl_attr_put_u32(cmd, IFA_LOCAL, mnl_attr_get_u32(na_local));
+		mnl_attr_put_u32(cmd, IFA_ADDRESS, mnl_attr_get_u32(na_peer));
+	} else {
+		ifa->ifa_family = AF_INET6;
+		ifa->ifa_prefixlen = 128;
+		mnl_attr_put(cmd, IFA_LOCAL, IPV6_ADDR_LEN,
+			mnl_attr_get_payload(na_local));
+		mnl_attr_put(cmd, IFA_ADDRESS, IPV6_ADDR_LEN,
+			mnl_attr_get_payload(na_peer));
+	}
+	return 1;
+}
+
+/**
+ * Cleanup the local IP addresses on outer interface.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_flow_tcf_context_create().
+ * @param[in] ifindex
+ *   Network inferface index to perform cleanup.
+ */
+static void
+flow_tcf_encap_local_cleanup(struct mlx5_flow_tcf_context *tcf,
+			    unsigned int ifindex)
+{
+	struct nlmsghdr *nlh;
+	struct ifaddrmsg *ifa;
+	struct tcf_nlcb_context ctx = {
+		.ifindex = ifindex,
+		.bufsize = MNL_REQUEST_SIZE,
+		.nlbuf = LIST_HEAD_INITIALIZER(),
+	};
+	int ret;
+
+	assert(ifindex);
+	/*
+	 * Seek and destroy leftovers of local IP addresses with
+	 * matching properties "scope link".
+	 */
+	nlh = mnl_nlmsg_put_header(tcf->buf);
+	nlh->nlmsg_type = RTM_GETADDR;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	ifa = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifa));
+	ifa->ifa_family = AF_UNSPEC;
+	ifa->ifa_index = ifindex;
+	ifa->ifa_scope = RT_SCOPE_LINK;
+	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_local_cb, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: query device list error %d", ret);
+	ret = flow_tcf_send_nlcmd(tcf, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: device delete error %d", ret);
+}
+
+/**
+ * Collect neigh permament rules on specified network device.
+ * This is callback routine called by libmnl mnl_cb_run() in loop for
+ * every message in received packet.
+ *
+ * @param[in] nlh
+ *   Pointer to reply header.
+ * @param[in, out] arg
+ *   Opaque data pointer for this callback.
+ *
+ * @return
+ *   A positive, nonzero value on success, negative errno value otherwise
+ *   and rte_errno is set.
+ */
+static int
+flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg)
+{
+	struct tcf_nlcb_context *ctx = arg;
+	struct nlmsghdr *cmd;
+	struct ndmsg *ndm;
+	struct nlattr *na;
+	struct nlattr *na_ip = NULL;
+	struct nlattr *na_mac = NULL;
+	unsigned char family;
+
+	if (nlh->nlmsg_type != RTM_NEWNEIGH) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	ndm = mnl_nlmsg_get_payload(nlh);
+	family = ndm->ndm_family;
+	if (ndm->ndm_ifindex != (int)ctx->ifindex ||
+	   !(ndm->ndm_state & NUD_PERMANENT) ||
+	   (family != AF_INET && family != AF_INET6))
+		return 1;
+	mnl_attr_for_each(na, nlh, sizeof(*ndm)) {
+		switch (mnl_attr_get_type(na)) {
+		case NDA_DST:
+			na_ip = na;
+			break;
+		case NDA_LLADDR:
+			na_mac = na;
+			break;
+		}
+		if (na_mac && na_ip)
+			break;
+	}
+	if (!na_mac || !na_ip)
+		return 1;
+	/* Neigh rule with permenent attribute found. */
+	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
+					MNL_ALIGN(sizeof(struct ndmsg)) +
+					SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) +
+					(family == AF_INET6
+					? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
+					: SZ_NLATTR_TYPE_OF(uint32_t)));
+	if (!cmd) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	cmd = mnl_nlmsg_put_header(cmd);
+	cmd->nlmsg_type = RTM_DELNEIGH;
+	cmd->nlmsg_flags = NLM_F_REQUEST;
+	ndm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ndm));
+	ndm->ndm_ifindex = ctx->ifindex;
+	ndm->ndm_state = NUD_PERMANENT;
+	ndm->ndm_flags = 0;
+	ndm->ndm_type = 0;
+	if (family == AF_INET) {
+		ndm->ndm_family = AF_INET;
+		mnl_attr_put_u32(cmd, NDA_DST, mnl_attr_get_u32(na_ip));
+	} else {
+		ndm->ndm_family = AF_INET6;
+		mnl_attr_put(cmd, NDA_DST, IPV6_ADDR_LEN,
+			     mnl_attr_get_payload(na_ip));
+	}
+	mnl_attr_put(cmd, NDA_LLADDR, ETHER_ADDR_LEN,
+		     mnl_attr_get_payload(na_mac));
+	return 1;
+}
+
+/**
+ * Cleanup the neigh rules on outer interface.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_flow_tcf_context_create().
+ * @param[in] ifindex
+ *   Network inferface index to perform cleanup.
+ */
+static void
+flow_tcf_encap_neigh_cleanup(struct mlx5_flow_tcf_context *tcf,
+			    unsigned int ifindex)
+{
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+	struct tcf_nlcb_context ctx = {
+		.ifindex = ifindex,
+		.bufsize = MNL_REQUEST_SIZE,
+		.nlbuf = LIST_HEAD_INITIALIZER(),
+	};
+	int ret;
+
+	assert(ifindex);
+	/* Seek and destroy leftovers of neigh rules. */
+	nlh = mnl_nlmsg_put_header(tcf->buf);
+	nlh->nlmsg_type = RTM_GETNEIGH;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	ndm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ndm));
+	ndm->ndm_family = AF_UNSPEC;
+	ndm->ndm_ifindex = ifindex;
+	ndm->ndm_state = NUD_PERMANENT;
+	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_neigh_cb, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: query device list error %d", ret);
+	ret = flow_tcf_send_nlcmd(tcf, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: device delete error %d", ret);
+}
+
+/**
+ * Collect indices of VXLAN encap/decap interfaces associated with device.
+ * This is callback routine called by libmnl mnl_cb_run() in loop for
+ * every message in received packet.
+ *
+ * @param[in] nlh
+ *   Pointer to reply header.
+ * @param[in, out] arg
+ *   Opaque data pointer for this callback.
+ *
+ * @return
+ *   A positive, nonzero value on success, negative errno value otherwise
+ *   and rte_errno is set.
+ */
+static int
+flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg)
+{
+	struct tcf_nlcb_context *ctx = arg;
+	struct nlmsghdr *cmd;
+	struct ifinfomsg *ifm;
+	struct nlattr *na;
+	struct nlattr *na_info = NULL;
+	struct nlattr *na_vxlan = NULL;
+	bool found = false;
+	unsigned int vxindex;
+
+	if (nlh->nlmsg_type != RTM_NEWLINK) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	ifm = mnl_nlmsg_get_payload(nlh);
+	if (!ifm->ifi_index) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	mnl_attr_for_each(na, nlh, sizeof(*ifm))
+		if (mnl_attr_get_type(na) == IFLA_LINKINFO) {
+			na_info = na;
+			break;
+		}
+	if (!na_info)
+		return 1;
+	mnl_attr_for_each_nested(na, na_info) {
+		switch (mnl_attr_get_type(na)) {
+		case IFLA_INFO_KIND:
+			if (!strncmp("vxlan", mnl_attr_get_str(na),
+				     mnl_attr_get_len(na)))
+				found = true;
+			break;
+		case IFLA_INFO_DATA:
+			na_vxlan = na;
+			break;
+		}
+		if (found && na_vxlan)
+			break;
+	}
+	if (!found || !na_vxlan)
+		return 1;
+	found = false;
+	mnl_attr_for_each_nested(na, na_vxlan) {
+		if (mnl_attr_get_type(na) == IFLA_VXLAN_LINK &&
+		    mnl_attr_get_u32(na) == ctx->ifindex) {
+			found = true;
+			break;
+		}
+	}
+	if (!found)
+		return 1;
+	/* Attached VXLAN device found, store the command to delete. */
+	vxindex = ifm->ifi_index;
+	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
+					MNL_ALIGN(sizeof(struct ifinfomsg)));
+	if (!nlh) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	cmd = mnl_nlmsg_put_header(cmd);
+	cmd->nlmsg_type = RTM_DELLINK;
+	cmd->nlmsg_flags = NLM_F_REQUEST;
+	ifm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifm));
+	ifm->ifi_family = AF_UNSPEC;
+	ifm->ifi_index = vxindex;
+	return 1;
+}
+
+/**
+ * Cleanup the outer interface. Removes all found vxlan devices
+ * attached to specified index, flushes the meigh and local IP
+ * datavase.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_flow_tcf_context_create().
+ * @param[in] ifindex
+ *   Network inferface index to perform cleanup.
+ */
+static void
+flow_tcf_encap_iface_cleanup(struct mlx5_flow_tcf_context *tcf,
+			    unsigned int ifindex)
+{
+	struct nlmsghdr *nlh;
+	struct ifinfomsg *ifm;
+	struct tcf_nlcb_context ctx = {
+		.ifindex = ifindex,
+		.bufsize = MNL_REQUEST_SIZE,
+		.nlbuf = LIST_HEAD_INITIALIZER(),
+	};
+	int ret;
+
+	assert(ifindex);
+	/*
+	 * Seek and destroy leftover VXLAN encap/decap interfaces with
+	 * matching properties.
+	 */
+	nlh = mnl_nlmsg_put_header(tcf->buf);
+	nlh->nlmsg_type = RTM_GETLINK;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
+	ifm->ifi_family = AF_UNSPEC;
+	ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_vxlan_cb, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: query device list error %d", ret);
+	ret = flow_tcf_send_nlcmd(tcf, &ctx);
+	if (ret)
+		DRV_LOG(WARNING, "netlink: device delete error %d", ret);
+}
+
+
 /**
  * Create target interface index for VXLAN tunneling decapsulation.
  * In order to share the UDP port within the other interfaces the
@@ -4100,12 +4596,9 @@  static LIST_HEAD(, mlx5_flow_tcf_vtep)
 		uint16_t pcnt;
 
 		/* Not found, we should create the new attached VTEP. */
-/*
- * TODO: not implemented yet
- * flow_tcf_encap_iface_cleanup(tcf, ifouter);
- * flow_tcf_encap_local_cleanup(tcf, ifouter);
- * flow_tcf_encap_neigh_cleanup(tcf, ifouter);
- */
+		flow_tcf_encap_iface_cleanup(tcf, ifouter);
+		flow_tcf_encap_local_cleanup(tcf, ifouter);
+		flow_tcf_encap_neigh_cleanup(tcf, ifouter);
 		for (pcnt = 0; pcnt <= (MLX5_VXLAN_PORT_RANGE_MAX
 				     - MLX5_VXLAN_PORT_RANGE_MIN); pcnt++) {
 			encap_port++;