net/mlx5: eswitch-IP address UDP/TCP port rewrite

Message ID 20180925115107.12242-1-jackmin@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers
Series net/mlx5: eswitch-IP address UDP/TCP port rewrite |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Xiaoyu Min Sept. 25, 2018, 11:51 a.m. UTC
  Offload the following rte_flow actions by inserting accordingly
E-Switch rules via TC Flower driver

 - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
 - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
 - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
 - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
 - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
 - RTE_FLOW_ACTION_TYPE_SET_TP_DST

Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
---
This patch bases on Rahul Lakkireddy's patchs[1][2] and
Yongseok Koh's patchset [3]

[1] https://patches.dpdk.org/patch/45191/
[2] https://patches.dpdk.org/patch/45192/
[3] https://patches.dpdk.org/project/dpdk/list/?series=1474


 drivers/net/mlx5/Makefile        |   5 +
 drivers/net/mlx5/meson.build     |   2 +
 drivers/net/mlx5/mlx5_flow.h     |   6 +
 drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
 4 files changed, 369 insertions(+)
  

Comments

Yongseok Koh Sept. 28, 2018, 11:03 p.m. UTC | #1
On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> Offload the following rte_flow actions by inserting accordingly
> E-Switch rules via TC Flower driver
> 
>  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
>  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
>  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_TP_DST

Can you put an example command of testpmd for the reference?

> Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> ---
> This patch bases on Rahul Lakkireddy's patchs[1][2] and
> Yongseok Koh's patchset [3]
> 
> [1] https://patches.dpdk.org/patch/45191/
> [2] https://patches.dpdk.org/patch/45192/
> [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> 
> 
>  drivers/net/mlx5/Makefile        |   5 +
>  drivers/net/mlx5/meson.build     |   2 +
>  drivers/net/mlx5/mlx5_flow.h     |   6 +
>  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
>  4 files changed, 369 insertions(+)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> index ca1de9f21..49b95e78e 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
>  		linux/tc_act/tc_vlan.h \
>  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
>  		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TC_ACT_PEDIT \
> +		linux/tc_act/tc_pedit.h \
> +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> +		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
>  		HAVE_SUPPORTED_40000baseKR4_Full \
>  		/usr/include/linux/ethtool.h \
> diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> index fd93ac162..ef6a85101 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -182,6 +182,8 @@ if build
>  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
>  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
>  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
>  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
>  		'RDMA_NL_NLDEV' ],
>  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 10d700a7f..be182a643 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -87,6 +87,12 @@
>  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
>  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
>  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> +#define MLX5_ACTION_SET_TP_DST (1u << 16)
>  
>  /* possible L3 layers protocols filtering. */
>  #define MLX5_IP_PROTOCOL_TCP 6
> diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> index 14376188e..85c92f369 100644
> --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> @@ -53,6 +53,62 @@ struct tc_vlan {
>  
>  #endif /* HAVE_TC_ACT_VLAN */
>  
> +#ifdef HAVE_TC_ACT_PEDIT
> +
> +#include <linux/tc_act/tc_pedit.h>
> +
> +#else /* HAVE_TC_ACT_VLAN */
> +enum {
> +	TCA_PEDIT_UNSPEC,
> +	TCA_PEDIT_TM,
> +	TCA_PEDIT_PARMS,
> +	TCA_PEDIT_PAD,
> +	TCA_PEDIT_PARMS_EX,
> +	TCA_PEDIT_KEYS_EX,
> +	TCA_PEDIT_KEY_EX,
> +	__TCA_PEDIT_MAX
> +};
> +
> +enum {
> +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> +	TCA_PEDIT_KEY_EX_CMD = 2,
> +	__TCA_PEDIT_KEY_EX_MAX
> +};
> +
> +enum pedit_header_type {
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> +	__PEDIT_HDR_TYPE_MAX,
> +};
> +
> +enum pedit_cmd {
> +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> +	__PEDIT_CMD_MAX,
> +};
> +
> +struct tc_pedit_key {
> +	__u32           mask;  /* AND */
> +	__u32           val;   /*XOR */
> +	__u32           off;  /*offset */
> +	__u32           at;
> +	__u32           offmask;
> +	__u32           shift;
> +};
> +
> +struct tc_pedit_sel {
> +	tc_gen;
> +	unsigned char           nkeys;
> +	unsigned char           flags;
> +	struct tc_pedit_key     keys[0];
> +};
> +
> +#endif /* HAVE_TC_ACT_VLAN */
> +
>  /* Normally found in linux/netlink.h. */
>  #ifndef NETLINK_CAP_ACK
>  #define NETLINK_CAP_ACK 10
> @@ -153,6 +209,14 @@ struct tc_vlan {
>  #define IPV6_ADDR_LEN 16
>  #endif
>  
> +#ifndef IPV4_ADDR_LEN
> +#define IPV4_ADDR_LEN 4
> +#endif
> +
> +#ifndef TP_PORT_LEN
> +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> +#endif
> +
>  /** Empty masks for known item types. */
>  static const union {
>  	struct rte_flow_item_port_id port_id;
> @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
>  
>  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
>  
> +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> +		1 : 0; })

The reason why you need this complex multi-conditional macro is that
RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
would be enough to be:

#define MLX5_TCF_SET_ACTIONS \
	(MLX5_ACTION_SET_IPV4_SRC | ...)

And in the flow_tcf_validate() below,
	if (action_flags & MLX5_TCF_SET_ACTIONS) {
		...
	}

And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
rebase it again.

> +#define MAX_PEDIT_KEYS (128)
> +#define SZ_PEDIT_KEY_VAL (4)
> +
> +struct pedit_key_ex {
> +	enum pedit_header_type htype;
> +	enum pedit_cmd cmd;
> +};
> +
> +struct pedit_parser {
> +	struct tc_pedit_sel sel;
> +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> +};
> +
> +static int
> +flow_tcf_calc_pedit_keys(const uint64_t size)

Please add documentation by comment for every funcs you add.
Refer to the other existing ones for formality.

> +{
> +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);

Indentation.

> +	return keys;
> +}
> +
> +static void
> +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> +				struct pedit_parser *p_parser,
> +				uint64_t item_flags)
> +{
> +	int idx = p_parser->sel.nkeys;
> +
> +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> +	p_parser->keys[idx].off =
> +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;

	assert(offsetof(struct tcp_hdr, src_port) ==
	       offsetof(struct udp_hdr, src_port));
	assert(offsetof(struct tcp_hdr, dst_port) ==
	       offsetof(struct udp_hdr, dst_port));
	p_parser->keys[idx].off =
		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
		offsetof(struct tcp_hdr, src_port) :
		offsetof(struct tcp_hdr, dst_port);

assert() is just to be informative.
And how about src first like others below?

> +	p_parser->keys[idx].mask = 0xFFFF0000;
> +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> +			actions->conf)->port;

Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
above - 0xffff0000.

> +	p_parser->sel.nkeys = (++idx);
> +}
> +
> +static void
> +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> +				 struct pedit_parser *p_parser)
> +{
> +	int idx = p_parser->sel.nkeys;
> +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> +	int off_base =
> +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;

offsetof(struct ipv6_hdr, src_addr) :
offsetof(struct ipv6_hdr, dst_addr);

> +	const struct rte_flow_action_set_ipv6 *conf =
> +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> +
> +	for (int i = 0; i < keys; i++, idx++) {
> +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> +		p_parser->keys[idx].mask = ~UINT32_MAX;
> +		memcpy(&p_parser->keys[idx].val,
> +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> +			SZ_PEDIT_KEY_VAL);
> +	}
> +	p_parser->sel.nkeys += keys;
> +}
> +
> +static void
> +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,

How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
Same comment for ipv6 and tp_port.

> +				 struct pedit_parser *p_parser)
> +{
> +	int idx = p_parser->sel.nkeys;
> +
> +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> +	p_parser->keys[idx].off =
> +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);

offsetof(struct ipv4_hdr, src_addr) :
offsetof(struct ipv4_hdr, dst_addr);

> +	p_parser->keys[idx].mask = ~UINT32_MAX;
> +	p_parser->keys[idx].val =
> +		((const struct rte_flow_action_set_ipv4 *)
> +		 actions->conf)->ipv4_addr;
> +	p_parser->sel.nkeys = (++idx);
> +}
> +
> +static int
> +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> +			      const struct rte_flow_action **actions,
> +			      uint64_t item_flags)
> +{
> +	struct pedit_parser p_parser;
> +
> +	memset(&p_parser, 0, sizeof(p_parser));
> +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> +							    TCA_ACT_OPTIONS);
> +	/* all modify header actions should be in one tc-pedit action */
> +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {

It seems that you want to aggregate all the pedit actions and form a single
na attr. But what if rte_flow_action_set_* are not contiguous? E.g.

flow create ... actions set1 / set2 / port_id / set3 / end

Then, it would have two pedit na attrs. Is that okay?
Or, need to think about another way?

Same will happen in flow_tcf_get_pedit_actions_size().

> +		switch ((*actions)->type) {
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			flow_tcf_pedit_key_set_tp_port(*actions,
> +							&p_parser, item_flags);
> +			break;
> +		default:
> +			goto pedit_mnl_msg_done;
> +		}
> +	}
> +pedit_mnl_msg_done:
> +	p_parser.sel.action = TC_ACT_PIPE;
> +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> +			sizeof(p_parser.sel) +
> +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> +			&p_parser);
> +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> +				 p_parser.keys_ex[i].htype);
> +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> +				 p_parser.keys_ex[i].cmd);
> +		mnl_attr_nest_end(nl, na_pedit_key);
> +	}
> +	mnl_attr_nest_end(nl, na_pedit_keys);
> +	mnl_attr_nest_end(nl, na_act_options);
> +	(*actions)--;
> +	return 0;
> +}
> +
> +/**
> + * Calculate max memory size of one TC-pedit actions.
> + * One TC-pedit action can contain set of keys each defining
> + * a rewrite element (rte_flow action)
> + *
> + * @param[in] actions
> + *   actions specification.
> + * @param[inout] action_flags
> + *   actions flags
> + * @param[inout] size
> + *   accumulated size
> + * @return
> + *   Max memory size of one TC-pedit action
> + */
> +static int
> +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> +				uint64_t *action_flags)
> +{
> +	int pedit_size = 0;
> +	int keys = 0;
> +	uint64_t flags = 0;
> +
> +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> +		      SZ_NLATTR_STRZ_OF("pedit") +
> +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> +		switch ((*actions)->type) {
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> +			flags |= MLX5_ACTION_SET_IPV4_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> +			flags |= MLX5_ACTION_SET_IPV6_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +			/* TCP is as same as UDP */
> +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> +			flags |= MLX5_ACTION_SET_TP_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			/* TCP is as same as UDP */
> +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> +			flags |= MLX5_ACTION_SET_TP_DST;
> +			break;
> +		default:
> +			goto get_pedit_action_size_done;
> +		}
> +	}
> +get_pedit_action_size_done:
> +	/* TCA_PEDIT_PARAMS_EX */
> +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> +			keys * sizeof(struct tc_pedit_key));

> +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> +	pedit_size += keys *
> +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> +	(*action_flags) |= flags;
> +	(*actions)--;
> +	return pedit_size;
> +}
> +
>  /**
>   * Retrieve mask for pattern item.
>   *
> @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>  			of_set_vlan_vid;
>  		const struct rte_flow_action_of_set_vlan_pcp *
>  			of_set_vlan_pcp;
> +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> +		const struct rte_flow_action_set_ipv6 *set_ipv6;
>  	} conf;
>  	uint32_t item_flags = 0;
>  	uint32_t action_flags = 0;
> @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
>  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			action_flags |= MLX5_ACTION_SET_TP_DST;
> +			break;
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP,
>  						  RTE_FLOW_ERROR_TYPE_ACTION,
>  						  actions,
>  						  "action not supported");
>  		}
> +		if (IS_MODIFY_ACTION(actions->type)) {
> +			if (!actions->conf)
> +				return rte_flow_error_set(error, ENOTSUP,
> +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> +						actions,
> +						"action configuration not set");
> +		}
> +	}
> +	if (action_flags &
> +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "no ipv4 item found in"
> +						  " pattern");
> +	}
> +	if (action_flags &
> +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> +			return rte_flow_error_set(error, ENOTSUP,
> +				RTE_FLOW_ERROR_TYPE_ACTION,
> +				actions,
> +				"no ipv6 item found in pattern");
> +	}
> +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> +		if (!(item_flags &
> +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> +			return rte_flow_error_set(error, ENOTSUP,
> +						RTE_FLOW_ERROR_TYPE_ACTION,
> +						actions,
> +						"no TCP/UDP item found in"
> +						" pattern");

Isn't this 'set' action compatible with drop action? No point of modifying
packet which will be dropped, isn't it?

>  	}
>  	return 0;
>  }
> @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
>  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
>  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			size += flow_tcf_get_pedit_actions_size(&actions,
> +								&flags);
> +			break;
>  		default:
>  			DRV_LOG(WARNING,
>  				"unsupported action %p type %d,"
> @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  	struct nlattr *na_flower_act;
>  	struct nlattr *na_vlan_id = NULL;
>  	struct nlattr *na_vlan_priority = NULL;
> +	uint64_t item_flags = 0;
>  
>  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
>  						PTOI_TABLE_SZ_MAX(dev)));
> @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  			}
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_UDP:
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;

Let's add the same to the rest of items like flow_tcf_validate().


Thanks,
Yongseok

>  			mask.udp = flow_tcf_item_mask
>  				(items, &rte_flow_item_udp_mask,
>  				 &flow_tcf_mask_supported.udp,
> @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  			}
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_TCP:
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
>  			mask.tcp = flow_tcf_item_mask
>  				(items, &rte_flow_item_tcp_mask,
>  				 &flow_tcf_mask_supported.tcp,
> @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  					conf.of_set_vlan_pcp->vlan_pcp;
>  			}
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			na_act_index =
> +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> +			flow_tcf_create_pedit_mnl_msg(nlh,
> +						      &actions, item_flags);
> +			mnl_attr_nest_end(nlh, na_act_index);
> +			break;
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP,
>  						  RTE_FLOW_ERROR_TYPE_ACTION,
> -- 
> 2.17.1
>
  
Xiaoyu Min Sept. 30, 2018, 7:21 a.m. UTC | #2
On 18-09-29 07:03:33, Yongseok Koh wrote:
> On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> > Offload the following rte_flow actions by inserting accordingly
> > E-Switch rules via TC Flower driver
> > 
> >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
> >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
> >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
> >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
> >  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
> >  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> 
> Can you put an example command of testpmd for the reference?
> 
Sure, I'll add.

> > Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> > ---
> > This patch bases on Rahul Lakkireddy's patchs[1][2] and
> > Yongseok Koh's patchset [3]
> > 
> > [1] https://patches.dpdk.org/patch/45191/
> > [2] https://patches.dpdk.org/patch/45192/
> > [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> > 
> > 
> >  drivers/net/mlx5/Makefile        |   5 +
> >  drivers/net/mlx5/meson.build     |   2 +
> >  drivers/net/mlx5/mlx5_flow.h     |   6 +
> >  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
> >  4 files changed, 369 insertions(+)
> > 
> > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > index ca1de9f21..49b95e78e 100644
> > --- a/drivers/net/mlx5/Makefile
> > +++ b/drivers/net/mlx5/Makefile
> > @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> >  		linux/tc_act/tc_vlan.h \
> >  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
> >  		$(AUTOCONF_OUTPUT)
> > +	$Q sh -- '$<' '$@' \
> > +		HAVE_TC_ACT_PEDIT \
> > +		linux/tc_act/tc_pedit.h \
> > +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> > +		$(AUTOCONF_OUTPUT)
> >  	$Q sh -- '$<' '$@' \
> >  		HAVE_SUPPORTED_40000baseKR4_Full \
> >  		/usr/include/linux/ethtool.h \
> > diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> > index fd93ac162..ef6a85101 100644
> > --- a/drivers/net/mlx5/meson.build
> > +++ b/drivers/net/mlx5/meson.build
> > @@ -182,6 +182,8 @@ if build
> >  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
> >  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
> >  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> > +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> > +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
> >  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
> >  		'RDMA_NL_NLDEV' ],
> >  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> > diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> > index 10d700a7f..be182a643 100644
> > --- a/drivers/net/mlx5/mlx5_flow.h
> > +++ b/drivers/net/mlx5/mlx5_flow.h
> > @@ -87,6 +87,12 @@
> >  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
> >  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
> >  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> > +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> > +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> > +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> > +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> > +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> > +#define MLX5_ACTION_SET_TP_DST (1u << 16)
> >  
> >  /* possible L3 layers protocols filtering. */
> >  #define MLX5_IP_PROTOCOL_TCP 6
> > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> > index 14376188e..85c92f369 100644
> > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > @@ -53,6 +53,62 @@ struct tc_vlan {
> >  
> >  #endif /* HAVE_TC_ACT_VLAN */
> >  
> > +#ifdef HAVE_TC_ACT_PEDIT
> > +
> > +#include <linux/tc_act/tc_pedit.h>
> > +
> > +#else /* HAVE_TC_ACT_VLAN */
> > +enum {
> > +	TCA_PEDIT_UNSPEC,
> > +	TCA_PEDIT_TM,
> > +	TCA_PEDIT_PARMS,
> > +	TCA_PEDIT_PAD,
> > +	TCA_PEDIT_PARMS_EX,
> > +	TCA_PEDIT_KEYS_EX,
> > +	TCA_PEDIT_KEY_EX,
> > +	__TCA_PEDIT_MAX
> > +};
> > +
> > +enum {
> > +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> > +	TCA_PEDIT_KEY_EX_CMD = 2,
> > +	__TCA_PEDIT_KEY_EX_MAX
> > +};
> > +
> > +enum pedit_header_type {
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> > +	__PEDIT_HDR_TYPE_MAX,
> > +};
> > +
> > +enum pedit_cmd {
> > +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> > +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> > +	__PEDIT_CMD_MAX,
> > +};
> > +
> > +struct tc_pedit_key {
> > +	__u32           mask;  /* AND */
> > +	__u32           val;   /*XOR */
> > +	__u32           off;  /*offset */
> > +	__u32           at;
> > +	__u32           offmask;
> > +	__u32           shift;
> > +};
> > +
> > +struct tc_pedit_sel {
> > +	tc_gen;
> > +	unsigned char           nkeys;
> > +	unsigned char           flags;
> > +	struct tc_pedit_key     keys[0];
> > +};
> > +
> > +#endif /* HAVE_TC_ACT_VLAN */
> > +
> >  /* Normally found in linux/netlink.h. */
> >  #ifndef NETLINK_CAP_ACK
> >  #define NETLINK_CAP_ACK 10
> > @@ -153,6 +209,14 @@ struct tc_vlan {
> >  #define IPV6_ADDR_LEN 16
> >  #endif
> >  
> > +#ifndef IPV4_ADDR_LEN
> > +#define IPV4_ADDR_LEN 4
> > +#endif
> > +
> > +#ifndef TP_PORT_LEN
> > +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> > +#endif
> > +
> >  /** Empty masks for known item types. */
> >  static const union {
> >  	struct rte_flow_item_port_id port_id;
> > @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
> >  
> >  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
> >  
> > +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> > +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> > +		1 : 0; })
> 
> The reason why you need this complex multi-conditional macro is that
> RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
> to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
> would be enough to be:
> 
> #define MLX5_TCF_SET_ACTIONS \
> 	(MLX5_ACTION_SET_IPV4_SRC | ...)
> 
> And in the flow_tcf_validate() below,
> 	if (action_flags & MLX5_TCF_SET_ACTIONS) {
> 		...
> 	}
> 
Well, I did consider using bitmask but action_flags is an _accumulated_ variable,
records all the actions parsed so far.
But, here, I need to know what is the _current_ action and whether it belongs to modify
actions. If using bitmask, Looks like a new variable (i.e current_action) needed (?)

case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
    current_action = MLX5_ACTION_SET_IPV4_SRC;
    .....

if (current_action & MLX5_TCF_SET_ACTIONS) ...
 ...

action_flags |= current_action;

I feel more code change needed or you think it's worth?

> And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
> Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
> rebase it again.
> 
Sure, I'll rebase on it

> > +#define MAX_PEDIT_KEYS (128)
> > +#define SZ_PEDIT_KEY_VAL (4)
> > +
> > +struct pedit_key_ex {
> > +	enum pedit_header_type htype;
> > +	enum pedit_cmd cmd;
> > +};
> > +
> > +struct pedit_parser {
> > +	struct tc_pedit_sel sel;
> > +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> > +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> > +};
> > +
> > +static int
> > +flow_tcf_calc_pedit_keys(const uint64_t size)
> 
> Please add documentation by comment for every funcs you add.
> Refer to the other existing ones for formality.
> 
Sure!
> > +{
> > +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> > +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
> 
> Indentation.
> 
Will fix it
> > +	return keys;
> > +}
> > +
> > +static void
> > +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> > +				struct pedit_parser *p_parser,
> > +				uint64_t item_flags)
> > +{
> > +	int idx = p_parser->sel.nkeys;
> > +
> > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > +	p_parser->keys[idx].off =
> > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
> 
> 	assert(offsetof(struct tcp_hdr, src_port) ==
> 	       offsetof(struct udp_hdr, src_port));
> 	assert(offsetof(struct tcp_hdr, dst_port) ==
> 	       offsetof(struct udp_hdr, dst_port));
> 	p_parser->keys[idx].off =
> 		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
> 		offsetof(struct tcp_hdr, src_port) :
> 		offsetof(struct tcp_hdr, dst_port);
> 
> assert() is just to be informative.
> And how about src first like others below?
> 
Yes, I will update above.

> > +	p_parser->keys[idx].mask = 0xFFFF0000;
> > +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> > +			actions->conf)->port;
> 
> Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
> above - 0xffff0000.
> 
So it should be as following ?
p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
                actions->conf)->port;

> > +	p_parser->sel.nkeys = (++idx);
> > +}
> > +
> > +static void
> > +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> > +				 struct pedit_parser *p_parser)
> > +{
> > +	int idx = p_parser->sel.nkeys;
> > +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > +	int off_base =
> > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
> 
> offsetof(struct ipv6_hdr, src_addr) :
> offsetof(struct ipv6_hdr, dst_addr);
> 
Got it!
> > +	const struct rte_flow_action_set_ipv6 *conf =
> > +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> > +
> > +	for (int i = 0; i < keys; i++, idx++) {
> > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> > +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> > +		p_parser->keys[idx].mask = ~UINT32_MAX;
> > +		memcpy(&p_parser->keys[idx].val,
> > +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> > +			SZ_PEDIT_KEY_VAL);
> > +	}
> > +	p_parser->sel.nkeys += keys;
> > +}
> > +
> > +static void
> > +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
> 
> How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
> Same comment for ipv6 and tp_port.
> 
What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
for src or dst address ?
> > +				 struct pedit_parser *p_parser)
> > +{
> > +	int idx = p_parser->sel.nkeys;
> > +
> > +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > +	p_parser->keys[idx].off =
> > +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
> 
> offsetof(struct ipv4_hdr, src_addr) :
> offsetof(struct ipv4_hdr, dst_addr);
> 
Got it!
> > +	p_parser->keys[idx].mask = ~UINT32_MAX;
> > +	p_parser->keys[idx].val =
> > +		((const struct rte_flow_action_set_ipv4 *)
> > +		 actions->conf)->ipv4_addr;
> > +	p_parser->sel.nkeys = (++idx);
> > +}
> > +
> > +static int
> > +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> > +			      const struct rte_flow_action **actions,
> > +			      uint64_t item_flags)
> > +{
> > +	struct pedit_parser p_parser;
> > +
> > +	memset(&p_parser, 0, sizeof(p_parser));
> > +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> > +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> > +							    TCA_ACT_OPTIONS);
> > +	/* all modify header actions should be in one tc-pedit action */
> > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> 
> It seems that you want to aggregate all the pedit actions and form a single
> na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
> 
> flow create ... actions set1 / set2 / port_id / set3 / end
> 
> Then, it would have two pedit na attrs. Is that okay?
> Or, need to think about another way?
> 
> Same will happen in flow_tcf_get_pedit_actions_size().
> 
It's OK if we have more than one pedit na attrs.
_BUT_ only last pedit take effect based on my experiment

> > +		switch ((*actions)->type) {
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > +			flow_tcf_pedit_key_set_tp_port(*actions,
> > +							&p_parser, item_flags);
> > +			break;
> > +		default:
> > +			goto pedit_mnl_msg_done;
> > +		}
> > +	}
> > +pedit_mnl_msg_done:
> > +	p_parser.sel.action = TC_ACT_PIPE;
> > +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> > +			sizeof(p_parser.sel) +
> > +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> > +			&p_parser);
> > +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> > +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> > +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> > +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> > +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> > +				 p_parser.keys_ex[i].htype);
> > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> > +				 p_parser.keys_ex[i].cmd);
> > +		mnl_attr_nest_end(nl, na_pedit_key);
> > +	}
> > +	mnl_attr_nest_end(nl, na_pedit_keys);
> > +	mnl_attr_nest_end(nl, na_act_options);
> > +	(*actions)--;
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Calculate max memory size of one TC-pedit actions.
> > + * One TC-pedit action can contain set of keys each defining
> > + * a rewrite element (rte_flow action)
> > + *
> > + * @param[in] actions
> > + *   actions specification.
> > + * @param[inout] action_flags
> > + *   actions flags
> > + * @param[inout] size
> > + *   accumulated size
> > + * @return
> > + *   Max memory size of one TC-pedit action
> > + */
> > +static int
> > +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> > +				uint64_t *action_flags)
> > +{
> > +	int pedit_size = 0;
> > +	int keys = 0;
> > +	uint64_t flags = 0;
> > +
> > +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> > +		      SZ_NLATTR_STRZ_OF("pedit") +
> > +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > +		switch ((*actions)->type) {
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > +			flags |= MLX5_ACTION_SET_IPV4_DST;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > +			flags |= MLX5_ACTION_SET_IPV6_DST;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > +			/* TCP is as same as UDP */
> > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > +			flags |= MLX5_ACTION_SET_TP_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > +			/* TCP is as same as UDP */
> > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > +			flags |= MLX5_ACTION_SET_TP_DST;
> > +			break;
> > +		default:
> > +			goto get_pedit_action_size_done;
> > +		}
> > +	}
> > +get_pedit_action_size_done:
> > +	/* TCA_PEDIT_PARAMS_EX */
> > +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> > +			keys * sizeof(struct tc_pedit_key));
> 
> > +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> > +	pedit_size += keys *
> > +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> > +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> > +	(*action_flags) |= flags;
> > +	(*actions)--;
> > +	return pedit_size;
> > +}
> > +
> >  /**
> >   * Retrieve mask for pattern item.
> >   *
> > @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> >  			of_set_vlan_vid;
> >  		const struct rte_flow_action_of_set_vlan_pcp *
> >  			of_set_vlan_pcp;
> > +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> > +		const struct rte_flow_action_set_ipv6 *set_ipv6;
> >  	} conf;
> >  	uint32_t item_flags = 0;
> >  	uint32_t action_flags = 0;
> > @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> >  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
> >  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
> >  			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > +			action_flags |= MLX5_ACTION_SET_TP_DST;
> > +			break;
> >  		default:
> >  			return rte_flow_error_set(error, ENOTSUP,
> >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> >  						  actions,
> >  						  "action not supported");
> >  		}
> > +		if (IS_MODIFY_ACTION(actions->type)) {
> > +			if (!actions->conf)
> > +				return rte_flow_error_set(error, ENOTSUP,
> > +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> > +						actions,
> > +						"action configuration not set");
> > +		}
> > +	}
> > +	if (action_flags &
> > +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +						  RTE_FLOW_ERROR_TYPE_ACTION,
> > +						  actions,
> > +						  "no ipv4 item found in"
> > +						  " pattern");
> > +	}
> > +	if (action_flags &
> > +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				RTE_FLOW_ERROR_TYPE_ACTION,
> > +				actions,
> > +				"no ipv6 item found in pattern");
> > +	}
> > +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> > +		if (!(item_flags &
> > +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> > +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +						RTE_FLOW_ERROR_TYPE_ACTION,
> > +						actions,
> > +						"no TCP/UDP item found in"
> > +						" pattern");
> 
> Isn't this 'set' action compatible with drop action? No point of modifying
> packet which will be dropped, isn't it?
> 
Yes, you are absolutely right :-)

> >  	}
> >  	return 0;
> >  }
> > @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
> >  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
> >  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
> >  			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > +			size += flow_tcf_get_pedit_actions_size(&actions,
> > +								&flags);
> > +			break;
> >  		default:
> >  			DRV_LOG(WARNING,
> >  				"unsupported action %p type %d,"
> > @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> >  	struct nlattr *na_flower_act;
> >  	struct nlattr *na_vlan_id = NULL;
> >  	struct nlattr *na_vlan_priority = NULL;
> > +	uint64_t item_flags = 0;
> >  
> >  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
> >  						PTOI_TABLE_SZ_MAX(dev)));
> > @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> >  			}
> >  			break;
> >  		case RTE_FLOW_ITEM_TYPE_UDP:
> > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> 
> Let's add the same to the rest of items like flow_tcf_validate().
> 
OK!
> 
> Thanks,
> Yongseok
> 
> >  			mask.udp = flow_tcf_item_mask
> >  				(items, &rte_flow_item_udp_mask,
> >  				 &flow_tcf_mask_supported.udp,
> > @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> >  			}
> >  			break;
> >  		case RTE_FLOW_ITEM_TYPE_TCP:
> > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> >  			mask.tcp = flow_tcf_item_mask
> >  				(items, &rte_flow_item_tcp_mask,
> >  				 &flow_tcf_mask_supported.tcp,
> > @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> >  					conf.of_set_vlan_pcp->vlan_pcp;
> >  			}
> >  			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > +			na_act_index =
> > +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> > +			flow_tcf_create_pedit_mnl_msg(nlh,
> > +						      &actions, item_flags);
> > +			mnl_attr_nest_end(nlh, na_act_index);
> > +			break;
> >  		default:
> >  			return rte_flow_error_set(error, ENOTSUP,
> >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > -- 
> > 2.17.1
> >
  
Yongseok Koh Oct. 1, 2018, 8:19 p.m. UTC | #3
On Sun, Sep 30, 2018 at 03:21:04PM +0800, Xiaoyu Min wrote:
> On 18-09-29 07:03:33, Yongseok Koh wrote:
> > On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> > > Offload the following rte_flow actions by inserting accordingly
> > > E-Switch rules via TC Flower driver
> > > 
> > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
> > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
> > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
> > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
> > >  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
> > >  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> > 
> > Can you put an example command of testpmd for the reference?
> > 
> Sure, I'll add.
> 
> > > Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> > > ---
> > > This patch bases on Rahul Lakkireddy's patchs[1][2] and
> > > Yongseok Koh's patchset [3]
> > > 
> > > [1] https://patches.dpdk.org/patch/45191/
> > > [2] https://patches.dpdk.org/patch/45192/
> > > [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> > > 
> > > 
> > >  drivers/net/mlx5/Makefile        |   5 +
> > >  drivers/net/mlx5/meson.build     |   2 +
> > >  drivers/net/mlx5/mlx5_flow.h     |   6 +
> > >  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
> > >  4 files changed, 369 insertions(+)
> > > 
> > > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > > index ca1de9f21..49b95e78e 100644
> > > --- a/drivers/net/mlx5/Makefile
> > > +++ b/drivers/net/mlx5/Makefile
> > > @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> > >  		linux/tc_act/tc_vlan.h \
> > >  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
> > >  		$(AUTOCONF_OUTPUT)
> > > +	$Q sh -- '$<' '$@' \
> > > +		HAVE_TC_ACT_PEDIT \
> > > +		linux/tc_act/tc_pedit.h \
> > > +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> > > +		$(AUTOCONF_OUTPUT)
> > >  	$Q sh -- '$<' '$@' \
> > >  		HAVE_SUPPORTED_40000baseKR4_Full \
> > >  		/usr/include/linux/ethtool.h \
> > > diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> > > index fd93ac162..ef6a85101 100644
> > > --- a/drivers/net/mlx5/meson.build
> > > +++ b/drivers/net/mlx5/meson.build
> > > @@ -182,6 +182,8 @@ if build
> > >  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
> > >  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
> > >  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> > > +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> > > +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
> > >  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
> > >  		'RDMA_NL_NLDEV' ],
> > >  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> > > diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> > > index 10d700a7f..be182a643 100644
> > > --- a/drivers/net/mlx5/mlx5_flow.h
> > > +++ b/drivers/net/mlx5/mlx5_flow.h
> > > @@ -87,6 +87,12 @@
> > >  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
> > >  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
> > >  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> > > +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> > > +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> > > +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> > > +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> > > +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> > > +#define MLX5_ACTION_SET_TP_DST (1u << 16)
> > >  
> > >  /* possible L3 layers protocols filtering. */
> > >  #define MLX5_IP_PROTOCOL_TCP 6
> > > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > index 14376188e..85c92f369 100644
> > > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > @@ -53,6 +53,62 @@ struct tc_vlan {
> > >  
> > >  #endif /* HAVE_TC_ACT_VLAN */
> > >  
> > > +#ifdef HAVE_TC_ACT_PEDIT
> > > +
> > > +#include <linux/tc_act/tc_pedit.h>
> > > +
> > > +#else /* HAVE_TC_ACT_VLAN */
> > > +enum {
> > > +	TCA_PEDIT_UNSPEC,
> > > +	TCA_PEDIT_TM,
> > > +	TCA_PEDIT_PARMS,
> > > +	TCA_PEDIT_PAD,
> > > +	TCA_PEDIT_PARMS_EX,
> > > +	TCA_PEDIT_KEYS_EX,
> > > +	TCA_PEDIT_KEY_EX,
> > > +	__TCA_PEDIT_MAX
> > > +};
> > > +
> > > +enum {
> > > +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> > > +	TCA_PEDIT_KEY_EX_CMD = 2,
> > > +	__TCA_PEDIT_KEY_EX_MAX
> > > +};
> > > +
> > > +enum pedit_header_type {
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> > > +	__PEDIT_HDR_TYPE_MAX,
> > > +};
> > > +
> > > +enum pedit_cmd {
> > > +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> > > +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> > > +	__PEDIT_CMD_MAX,
> > > +};
> > > +
> > > +struct tc_pedit_key {
> > > +	__u32           mask;  /* AND */
> > > +	__u32           val;   /*XOR */
> > > +	__u32           off;  /*offset */
> > > +	__u32           at;
> > > +	__u32           offmask;
> > > +	__u32           shift;
> > > +};
> > > +
> > > +struct tc_pedit_sel {
> > > +	tc_gen;
> > > +	unsigned char           nkeys;
> > > +	unsigned char           flags;
> > > +	struct tc_pedit_key     keys[0];
> > > +};
> > > +
> > > +#endif /* HAVE_TC_ACT_VLAN */
> > > +
> > >  /* Normally found in linux/netlink.h. */
> > >  #ifndef NETLINK_CAP_ACK
> > >  #define NETLINK_CAP_ACK 10
> > > @@ -153,6 +209,14 @@ struct tc_vlan {
> > >  #define IPV6_ADDR_LEN 16
> > >  #endif
> > >  
> > > +#ifndef IPV4_ADDR_LEN
> > > +#define IPV4_ADDR_LEN 4
> > > +#endif
> > > +
> > > +#ifndef TP_PORT_LEN
> > > +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> > > +#endif
> > > +
> > >  /** Empty masks for known item types. */
> > >  static const union {
> > >  	struct rte_flow_item_port_id port_id;
> > > @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
> > >  
> > >  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
> > >  
> > > +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> > > +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> > > +		1 : 0; })
> > 
> > The reason why you need this complex multi-conditional macro is that
> > RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
> > to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
> > would be enough to be:
> > 
> > #define MLX5_TCF_SET_ACTIONS \
> > 	(MLX5_ACTION_SET_IPV4_SRC | ...)
> > 
> > And in the flow_tcf_validate() below,
> > 	if (action_flags & MLX5_TCF_SET_ACTIONS) {
> > 		...
> > 	}
> > 
> Well, I did consider using bitmask but action_flags is an _accumulated_ variable,
> records all the actions parsed so far.
> But, here, I need to know what is the _current_ action and whether it belongs to modify
> actions. If using bitmask, Looks like a new variable (i.e current_action) needed (?)
> 
> case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
>     current_action = MLX5_ACTION_SET_IPV4_SRC;
>     .....
> 
> if (current_action & MLX5_TCF_SET_ACTIONS) ...
>  ...
> 
> action_flags |= current_action;
> 
> I feel more code change needed or you think it's worth?

Understood what you meant.
Please see my comment below in the flow_tcf_validate().

> > And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
> > Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
> > rebase it again.
> > 
> Sure, I'll rebase on it
> 
> > > +#define MAX_PEDIT_KEYS (128)
> > > +#define SZ_PEDIT_KEY_VAL (4)
> > > +
> > > +struct pedit_key_ex {
> > > +	enum pedit_header_type htype;
> > > +	enum pedit_cmd cmd;
> > > +};
> > > +
> > > +struct pedit_parser {
> > > +	struct tc_pedit_sel sel;
> > > +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> > > +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> > > +};
> > > +
> > > +static int
> > > +flow_tcf_calc_pedit_keys(const uint64_t size)
> > 
> > Please add documentation by comment for every funcs you add.
> > Refer to the other existing ones for formality.
> > 
> Sure!
> > > +{
> > > +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> > > +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
> > 
> > Indentation.
> > 
> Will fix it
> > > +	return keys;
> > > +}
> > > +
> > > +static void
> > > +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> > > +				struct pedit_parser *p_parser,
> > > +				uint64_t item_flags)
> > > +{
> > > +	int idx = p_parser->sel.nkeys;
> > > +
> > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > +	p_parser->keys[idx].off =
> > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
> > 
> > 	assert(offsetof(struct tcp_hdr, src_port) ==
> > 	       offsetof(struct udp_hdr, src_port));
> > 	assert(offsetof(struct tcp_hdr, dst_port) ==
> > 	       offsetof(struct udp_hdr, dst_port));
> > 	p_parser->keys[idx].off =
> > 		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
> > 		offsetof(struct tcp_hdr, src_port) :
> > 		offsetof(struct tcp_hdr, dst_port);
> > 
> > assert() is just to be informative.
> > And how about src first like others below?
> > 
> Yes, I will update above.
> 
> > > +	p_parser->keys[idx].mask = 0xFFFF0000;
> > > +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> > > +			actions->conf)->port;
> > 
> > Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
> > above - 0xffff0000.
> > 
> So it should be as following ?
> p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
>                 actions->conf)->port;

You can figure it out by actual tests but I think the following would be right.
	p_parser->keys[idx].val =
		rte_cpu_to_be_32(((const struct rte_flow_action_set_tp *)
				  actions->conf)->port);

Please verify it by testing anyway.

> > > +	p_parser->sel.nkeys = (++idx);
> > > +}
> > > +
> > > +static void
> > > +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> > > +				 struct pedit_parser *p_parser)
> > > +{
> > > +	int idx = p_parser->sel.nkeys;
> > > +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > +	int off_base =
> > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
> > 
> > offsetof(struct ipv6_hdr, src_addr) :
> > offsetof(struct ipv6_hdr, dst_addr);
> > 
> Got it!
> > > +	const struct rte_flow_action_set_ipv6 *conf =
> > > +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> > > +
> > > +	for (int i = 0; i < keys; i++, idx++) {
> > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> > > +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> > > +		p_parser->keys[idx].mask = ~UINT32_MAX;
> > > +		memcpy(&p_parser->keys[idx].val,
> > > +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> > > +			SZ_PEDIT_KEY_VAL);
> > > +	}
> > > +	p_parser->sel.nkeys += keys;
> > > +}
> > > +
> > > +static void
> > > +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
> > 
> > How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
> > Same comment for ipv6 and tp_port.
> > 
> What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
> for src or dst address ?

Just to make the function neat but I overlooked that you still need
actions->type. Please disregard my previous comment.

> > > +				 struct pedit_parser *p_parser)
> > > +{
> > > +	int idx = p_parser->sel.nkeys;
> > > +
> > > +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > +	p_parser->keys[idx].off =
> > > +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
> > 
> > offsetof(struct ipv4_hdr, src_addr) :
> > offsetof(struct ipv4_hdr, dst_addr);
> > 
> Got it!
> > > +	p_parser->keys[idx].mask = ~UINT32_MAX;
> > > +	p_parser->keys[idx].val =
> > > +		((const struct rte_flow_action_set_ipv4 *)
> > > +		 actions->conf)->ipv4_addr;
> > > +	p_parser->sel.nkeys = (++idx);
> > > +}
> > > +
> > > +static int
> > > +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> > > +			      const struct rte_flow_action **actions,
> > > +			      uint64_t item_flags)
> > > +{
> > > +	struct pedit_parser p_parser;
> > > +
> > > +	memset(&p_parser, 0, sizeof(p_parser));
> > > +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> > > +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> > > +							    TCA_ACT_OPTIONS);
> > > +	/* all modify header actions should be in one tc-pedit action */
> > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > 
> > It seems that you want to aggregate all the pedit actions and form a single
> > na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
> > 
> > flow create ... actions set1 / set2 / port_id / set3 / end
> > 
> > Then, it would have two pedit na attrs. Is that okay?
> > Or, need to think about another way?
> > 
> > Same will happen in flow_tcf_get_pedit_actions_size().
> > 
> It's OK if we have more than one pedit na attrs.
> _BUT_ only last pedit take effect based on my experiment

Then, shouldn't we give some warning to user in validation? So that user can
have right expectation and reorder the actions as their intention like:
	flow create ... actions set1 / set2 / set3 / port_id / end

Otherwise set1 and set2 will be lost according to your comment.

Or, how about making PMD do the right thing. I mean, even if the set actions are
scattered, PMD can collect it and apply in a single na attr?

> > > +		switch ((*actions)->type) {
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > +			flow_tcf_pedit_key_set_tp_port(*actions,
> > > +							&p_parser, item_flags);
> > > +			break;
> > > +		default:
> > > +			goto pedit_mnl_msg_done;
> > > +		}
> > > +	}
> > > +pedit_mnl_msg_done:
> > > +	p_parser.sel.action = TC_ACT_PIPE;
> > > +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> > > +			sizeof(p_parser.sel) +
> > > +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> > > +			&p_parser);
> > > +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> > > +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> > > +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> > > +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> > > +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> > > +				 p_parser.keys_ex[i].htype);
> > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> > > +				 p_parser.keys_ex[i].cmd);
> > > +		mnl_attr_nest_end(nl, na_pedit_key);
> > > +	}
> > > +	mnl_attr_nest_end(nl, na_pedit_keys);
> > > +	mnl_attr_nest_end(nl, na_act_options);
> > > +	(*actions)--;
> > > +	return 0;
> > > +}
> > > +
> > > +/**
> > > + * Calculate max memory size of one TC-pedit actions.
> > > + * One TC-pedit action can contain set of keys each defining
> > > + * a rewrite element (rte_flow action)
> > > + *
> > > + * @param[in] actions
> > > + *   actions specification.
> > > + * @param[inout] action_flags
> > > + *   actions flags
> > > + * @param[inout] size
> > > + *   accumulated size
> > > + * @return
> > > + *   Max memory size of one TC-pedit action
> > > + */
> > > +static int
> > > +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> > > +				uint64_t *action_flags)
> > > +{
> > > +	int pedit_size = 0;
> > > +	int keys = 0;
> > > +	uint64_t flags = 0;
> > > +
> > > +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> > > +		      SZ_NLATTR_STRZ_OF("pedit") +
> > > +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > +		switch ((*actions)->type) {
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > +			flags |= MLX5_ACTION_SET_IPV4_DST;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > +			flags |= MLX5_ACTION_SET_IPV6_DST;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > +			/* TCP is as same as UDP */
> > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > +			flags |= MLX5_ACTION_SET_TP_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > +			/* TCP is as same as UDP */
> > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > +			flags |= MLX5_ACTION_SET_TP_DST;
> > > +			break;
> > > +		default:
> > > +			goto get_pedit_action_size_done;
> > > +		}
> > > +	}
> > > +get_pedit_action_size_done:
> > > +	/* TCA_PEDIT_PARAMS_EX */
> > > +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> > > +			keys * sizeof(struct tc_pedit_key));
> > 
> > > +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> > > +	pedit_size += keys *
> > > +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> > > +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> > > +	(*action_flags) |= flags;
> > > +	(*actions)--;
> > > +	return pedit_size;
> > > +}
> > > +
> > >  /**
> > >   * Retrieve mask for pattern item.
> > >   *
> > > @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > >  			of_set_vlan_vid;
> > >  		const struct rte_flow_action_of_set_vlan_pcp *
> > >  			of_set_vlan_pcp;
> > > +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> > > +		const struct rte_flow_action_set_ipv6 *set_ipv6;
> > >  	} conf;
> > >  	uint32_t item_flags = 0;
> > >  	uint32_t action_flags = 0;
> > > @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > >  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
> > >  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
> > >  			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > +			action_flags |= MLX5_ACTION_SET_TP_DST;
> > > +			break;
> > >  		default:
> > >  			return rte_flow_error_set(error, ENOTSUP,
> > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > >  						  actions,
> > >  						  "action not supported");
> > >  		}
> > > +		if (IS_MODIFY_ACTION(actions->type)) {

This would be a redundant 'if' as classification is already done above. So, how
about adding a goto label at the end of this code - 'err_no_action_conf:', and
use goto above.  E.g.,

		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
			action_flags |= MLX5_ACTION_SET_TP_DST;
			if (!actions->conf)
				goto err_no_action_conf;
			break;

And if I may, can I ask you to add the same to RTE_FLOW_ACTION_TYPE_PORT_ID?

> > > +			if (!actions->conf)
> > > +				return rte_flow_error_set(error, ENOTSUP,
> > > +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> > > +						actions,
> > > +						"action configuration not set");
> > > +		}
> > > +	}
> > > +	if (action_flags &
> > > +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> > > +			return rte_flow_error_set(error, ENOTSUP,
> > > +						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > +						  actions,
> > > +						  "no ipv4 item found in"
> > > +						  " pattern");
> > > +	}
> > > +	if (action_flags &
> > > +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> > > +			return rte_flow_error_set(error, ENOTSUP,
> > > +				RTE_FLOW_ERROR_TYPE_ACTION,
> > > +				actions,
> > > +				"no ipv6 item found in pattern");
> > > +	}
> > > +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> > > +		if (!(item_flags &
> > > +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> > > +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> > > +			return rte_flow_error_set(error, ENOTSUP,
> > > +						RTE_FLOW_ERROR_TYPE_ACTION,
> > > +						actions,
> > > +						"no TCP/UDP item found in"
> > > +						" pattern");

All the errors you added, I think EINVAL would be a better fit?

> > Isn't this 'set' action compatible with drop action? No point of modifying
> > packet which will be dropped, isn't it?
> > 
> Yes, you are absolutely right :-)

I believe you'll add a validation code for that in the next version. :-)

> > >  	}
> > >  	return 0;
> > >  }
> > > @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
> > >  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
> > >  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
> > >  			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > +			size += flow_tcf_get_pedit_actions_size(&actions,
> > > +								&flags);
> > > +			break;
> > >  		default:
> > >  			DRV_LOG(WARNING,
> > >  				"unsupported action %p type %d,"
> > > @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > >  	struct nlattr *na_flower_act;
> > >  	struct nlattr *na_vlan_id = NULL;
> > >  	struct nlattr *na_vlan_priority = NULL;
> > > +	uint64_t item_flags = 0;
> > >  
> > >  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
> > >  						PTOI_TABLE_SZ_MAX(dev)));
> > > @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > >  			}
> > >  			break;
> > >  		case RTE_FLOW_ITEM_TYPE_UDP:
> > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> > 
> > Let's add the same to the rest of items like flow_tcf_validate().
> > 
> OK!
> > 
> > Thanks,
> > Yongseok
> > 
> > >  			mask.udp = flow_tcf_item_mask
> > >  				(items, &rte_flow_item_udp_mask,
> > >  				 &flow_tcf_mask_supported.udp,
> > > @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > >  			}
> > >  			break;
> > >  		case RTE_FLOW_ITEM_TYPE_TCP:
> > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> > >  			mask.tcp = flow_tcf_item_mask
> > >  				(items, &rte_flow_item_tcp_mask,
> > >  				 &flow_tcf_mask_supported.tcp,
> > > @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > >  					conf.of_set_vlan_pcp->vlan_pcp;
> > >  			}
> > >  			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > +			na_act_index =
> > > +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> > > +			flow_tcf_create_pedit_mnl_msg(nlh,
> > > +						      &actions, item_flags);
> > > +			mnl_attr_nest_end(nlh, na_act_index);
> > > +			break;
> > >  		default:
> > >  			return rte_flow_error_set(error, ENOTSUP,
> > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > -- 
> > > 2.17.1
> > >
  
Xiaoyu Min Oct. 8, 2018, 11:22 a.m. UTC | #4
On 18-10-02 04:19:00, Yongseok Koh wrote:
> On Sun, Sep 30, 2018 at 03:21:04PM +0800, Xiaoyu Min wrote:
> > On 18-09-29 07:03:33, Yongseok Koh wrote:
> > > On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> > > > Offload the following rte_flow actions by inserting accordingly
> > > > E-Switch rules via TC Flower driver
> > > > 
> > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
> > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
> > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
> > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
> > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
> > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> > > 
> > > Can you put an example command of testpmd for the reference?
> > > 
> > Sure, I'll add.
> > 
> > > > Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> > > > ---
> > > > This patch bases on Rahul Lakkireddy's patchs[1][2] and
> > > > Yongseok Koh's patchset [3]
> > > > 
> > > > [1] https://patches.dpdk.org/patch/45191/
> > > > [2] https://patches.dpdk.org/patch/45192/
> > > > [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> > > > 
> > > > 
> > > >  drivers/net/mlx5/Makefile        |   5 +
> > > >  drivers/net/mlx5/meson.build     |   2 +
> > > >  drivers/net/mlx5/mlx5_flow.h     |   6 +
> > > >  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
> > > >  4 files changed, 369 insertions(+)
> > > > 
> > > > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > > > index ca1de9f21..49b95e78e 100644
> > > > --- a/drivers/net/mlx5/Makefile
> > > > +++ b/drivers/net/mlx5/Makefile
> > > > @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> > > >  		linux/tc_act/tc_vlan.h \
> > > >  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
> > > >  		$(AUTOCONF_OUTPUT)
> > > > +	$Q sh -- '$<' '$@' \
> > > > +		HAVE_TC_ACT_PEDIT \
> > > > +		linux/tc_act/tc_pedit.h \
> > > > +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> > > > +		$(AUTOCONF_OUTPUT)
> > > >  	$Q sh -- '$<' '$@' \
> > > >  		HAVE_SUPPORTED_40000baseKR4_Full \
> > > >  		/usr/include/linux/ethtool.h \
> > > > diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> > > > index fd93ac162..ef6a85101 100644
> > > > --- a/drivers/net/mlx5/meson.build
> > > > +++ b/drivers/net/mlx5/meson.build
> > > > @@ -182,6 +182,8 @@ if build
> > > >  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
> > > >  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
> > > >  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> > > > +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> > > > +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
> > > >  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
> > > >  		'RDMA_NL_NLDEV' ],
> > > >  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> > > > diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> > > > index 10d700a7f..be182a643 100644
> > > > --- a/drivers/net/mlx5/mlx5_flow.h
> > > > +++ b/drivers/net/mlx5/mlx5_flow.h
> > > > @@ -87,6 +87,12 @@
> > > >  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
> > > >  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
> > > >  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> > > > +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> > > > +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> > > > +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> > > > +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> > > > +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> > > > +#define MLX5_ACTION_SET_TP_DST (1u << 16)
> > > >  
> > > >  /* possible L3 layers protocols filtering. */
> > > >  #define MLX5_IP_PROTOCOL_TCP 6
> > > > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > index 14376188e..85c92f369 100644
> > > > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > @@ -53,6 +53,62 @@ struct tc_vlan {
> > > >  
> > > >  #endif /* HAVE_TC_ACT_VLAN */
> > > >  
> > > > +#ifdef HAVE_TC_ACT_PEDIT
> > > > +
> > > > +#include <linux/tc_act/tc_pedit.h>
> > > > +
> > > > +#else /* HAVE_TC_ACT_VLAN */
> > > > +enum {
> > > > +	TCA_PEDIT_UNSPEC,
> > > > +	TCA_PEDIT_TM,
> > > > +	TCA_PEDIT_PARMS,
> > > > +	TCA_PEDIT_PAD,
> > > > +	TCA_PEDIT_PARMS_EX,
> > > > +	TCA_PEDIT_KEYS_EX,
> > > > +	TCA_PEDIT_KEY_EX,
> > > > +	__TCA_PEDIT_MAX
> > > > +};
> > > > +
> > > > +enum {
> > > > +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> > > > +	TCA_PEDIT_KEY_EX_CMD = 2,
> > > > +	__TCA_PEDIT_KEY_EX_MAX
> > > > +};
> > > > +
> > > > +enum pedit_header_type {
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> > > > +	__PEDIT_HDR_TYPE_MAX,
> > > > +};
> > > > +
> > > > +enum pedit_cmd {
> > > > +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> > > > +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> > > > +	__PEDIT_CMD_MAX,
> > > > +};
> > > > +
> > > > +struct tc_pedit_key {
> > > > +	__u32           mask;  /* AND */
> > > > +	__u32           val;   /*XOR */
> > > > +	__u32           off;  /*offset */
> > > > +	__u32           at;
> > > > +	__u32           offmask;
> > > > +	__u32           shift;
> > > > +};
> > > > +
> > > > +struct tc_pedit_sel {
> > > > +	tc_gen;
> > > > +	unsigned char           nkeys;
> > > > +	unsigned char           flags;
> > > > +	struct tc_pedit_key     keys[0];
> > > > +};
> > > > +
> > > > +#endif /* HAVE_TC_ACT_VLAN */
> > > > +
> > > >  /* Normally found in linux/netlink.h. */
> > > >  #ifndef NETLINK_CAP_ACK
> > > >  #define NETLINK_CAP_ACK 10
> > > > @@ -153,6 +209,14 @@ struct tc_vlan {
> > > >  #define IPV6_ADDR_LEN 16
> > > >  #endif
> > > >  
> > > > +#ifndef IPV4_ADDR_LEN
> > > > +#define IPV4_ADDR_LEN 4
> > > > +#endif
> > > > +
> > > > +#ifndef TP_PORT_LEN
> > > > +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> > > > +#endif
> > > > +
> > > >  /** Empty masks for known item types. */
> > > >  static const union {
> > > >  	struct rte_flow_item_port_id port_id;
> > > > @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
> > > >  
> > > >  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
> > > >  
> > > > +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> > > > +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> > > > +		1 : 0; })
> > > 
> > > The reason why you need this complex multi-conditional macro is that
> > > RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
> > > to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
> > > would be enough to be:
> > > 
> > > #define MLX5_TCF_SET_ACTIONS \
> > > 	(MLX5_ACTION_SET_IPV4_SRC | ...)
> > > 
> > > And in the flow_tcf_validate() below,
> > > 	if (action_flags & MLX5_TCF_SET_ACTIONS) {
> > > 		...
> > > 	}
> > > 
> > Well, I did consider using bitmask but action_flags is an _accumulated_ variable,
> > records all the actions parsed so far.
> > But, here, I need to know what is the _current_ action and whether it belongs to modify
> > actions. If using bitmask, Looks like a new variable (i.e current_action) needed (?)
> > 
> > case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> >     current_action = MLX5_ACTION_SET_IPV4_SRC;
> >     .....
> > 
> > if (current_action & MLX5_TCF_SET_ACTIONS) ...
> >  ...
> > 
> > action_flags |= current_action;
> > 
> > I feel more code change needed or you think it's worth?
> 
> Understood what you meant.
> Please see my comment below in the flow_tcf_validate().
> 
> > > And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
> > > Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
> > > rebase it again.
> > > 
> > Sure, I'll rebase on it
> > 
> > > > +#define MAX_PEDIT_KEYS (128)
> > > > +#define SZ_PEDIT_KEY_VAL (4)
> > > > +
> > > > +struct pedit_key_ex {
> > > > +	enum pedit_header_type htype;
> > > > +	enum pedit_cmd cmd;
> > > > +};
> > > > +
> > > > +struct pedit_parser {
> > > > +	struct tc_pedit_sel sel;
> > > > +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> > > > +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> > > > +};
> > > > +
> > > > +static int
> > > > +flow_tcf_calc_pedit_keys(const uint64_t size)
> > > 
> > > Please add documentation by comment for every funcs you add.
> > > Refer to the other existing ones for formality.
> > > 
> > Sure!
> > > > +{
> > > > +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> > > > +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
> > > 
> > > Indentation.
> > > 
> > Will fix it
> > > > +	return keys;
> > > > +}
> > > > +
> > > > +static void
> > > > +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> > > > +				struct pedit_parser *p_parser,
> > > > +				uint64_t item_flags)
> > > > +{
> > > > +	int idx = p_parser->sel.nkeys;
> > > > +
> > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > +	p_parser->keys[idx].off =
> > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
> > > 
> > > 	assert(offsetof(struct tcp_hdr, src_port) ==
> > > 	       offsetof(struct udp_hdr, src_port));
> > > 	assert(offsetof(struct tcp_hdr, dst_port) ==
> > > 	       offsetof(struct udp_hdr, dst_port));
> > > 	p_parser->keys[idx].off =
> > > 		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
> > > 		offsetof(struct tcp_hdr, src_port) :
> > > 		offsetof(struct tcp_hdr, dst_port);
> > > 
> > > assert() is just to be informative.
> > > And how about src first like others below?
> > > 
> > Yes, I will update above.
> > 
> > > > +	p_parser->keys[idx].mask = 0xFFFF0000;
> > > > +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> > > > +			actions->conf)->port;
> > > 
> > > Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
> > > above - 0xffff0000.
> > > 
> > So it should be as following ?
> > p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
> >                 actions->conf)->port;
> 
> You can figure it out by actual tests but I think the following would be right.
> 	p_parser->keys[idx].val =
> 		rte_cpu_to_be_32(((const struct rte_flow_action_set_tp *)
> 				  actions->conf)->port);
> 
> Please verify it by testing anyway.
> 
No, it doesn't work correctly if it's converted to BE.
As my understanding the Netlink message should be expressed in host-byte order (?)

> > > > +	p_parser->sel.nkeys = (++idx);
> > > > +}
> > > > +
> > > > +static void
> > > > +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> > > > +				 struct pedit_parser *p_parser)
> > > > +{
> > > > +	int idx = p_parser->sel.nkeys;
> > > > +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > +	int off_base =
> > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
> > > 
> > > offsetof(struct ipv6_hdr, src_addr) :
> > > offsetof(struct ipv6_hdr, dst_addr);
> > > 
> > Got it!
> > > > +	const struct rte_flow_action_set_ipv6 *conf =
> > > > +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> > > > +
> > > > +	for (int i = 0; i < keys; i++, idx++) {
> > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> > > > +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> > > > +		p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > +		memcpy(&p_parser->keys[idx].val,
> > > > +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> > > > +			SZ_PEDIT_KEY_VAL);
> > > > +	}
> > > > +	p_parser->sel.nkeys += keys;
> > > > +}
> > > > +
> > > > +static void
> > > > +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
> > > 
> > > How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
> > > Same comment for ipv6 and tp_port.
> > > 
> > What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
> > for src or dst address ?
> 
> Just to make the function neat but I overlooked that you still need
> actions->type. Please disregard my previous comment.
> 
OK~
> > > > +				 struct pedit_parser *p_parser)
> > > > +{
> > > > +	int idx = p_parser->sel.nkeys;
> > > > +
> > > > +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > +	p_parser->keys[idx].off =
> > > > +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
> > > 
> > > offsetof(struct ipv4_hdr, src_addr) :
> > > offsetof(struct ipv4_hdr, dst_addr);
> > > 
> > Got it!
> > > > +	p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > +	p_parser->keys[idx].val =
> > > > +		((const struct rte_flow_action_set_ipv4 *)
> > > > +		 actions->conf)->ipv4_addr;
> > > > +	p_parser->sel.nkeys = (++idx);
> > > > +}
> > > > +
> > > > +static int
> > > > +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> > > > +			      const struct rte_flow_action **actions,
> > > > +			      uint64_t item_flags)
> > > > +{
> > > > +	struct pedit_parser p_parser;
> > > > +
> > > > +	memset(&p_parser, 0, sizeof(p_parser));
> > > > +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> > > > +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> > > > +							    TCA_ACT_OPTIONS);
> > > > +	/* all modify header actions should be in one tc-pedit action */
> > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > 
> > > It seems that you want to aggregate all the pedit actions and form a single
> > > na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
> > > 
> > > flow create ... actions set1 / set2 / port_id / set3 / end
> > > 
> > > Then, it would have two pedit na attrs. Is that okay?
> > > Or, need to think about another way?
> > > 
> > > Same will happen in flow_tcf_get_pedit_actions_size().
> > > 
> > It's OK if we have more than one pedit na attrs.
> > _BUT_ only last pedit take effect based on my experiment
> 
> Then, shouldn't we give some warning to user in validation? So that user can
> have right expectation and reorder the actions as their intention like:
> 	flow create ... actions set1 / set2 / set3 / port_id / end
> 
> Otherwise set1 and set2 will be lost according to your comment.
> 
I prefer to give error to user in validation because this is simple.
> Or, how about making PMD do the right thing. I mean, even if the set actions are
> scattered, PMD can collect it and apply in a single na attr?
> 
My feeling is the above approach will be (become) complex. It looks like we introduce
new functionality which re-order all actions, something like rss_expand. 
> > > > +		switch ((*actions)->type) {
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > +			flow_tcf_pedit_key_set_tp_port(*actions,
> > > > +							&p_parser, item_flags);
> > > > +			break;
> > > > +		default:
> > > > +			goto pedit_mnl_msg_done;
> > > > +		}
> > > > +	}
> > > > +pedit_mnl_msg_done:
> > > > +	p_parser.sel.action = TC_ACT_PIPE;
> > > > +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> > > > +			sizeof(p_parser.sel) +
> > > > +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> > > > +			&p_parser);
> > > > +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> > > > +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> > > > +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> > > > +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> > > > +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> > > > +				 p_parser.keys_ex[i].htype);
> > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> > > > +				 p_parser.keys_ex[i].cmd);
> > > > +		mnl_attr_nest_end(nl, na_pedit_key);
> > > > +	}
> > > > +	mnl_attr_nest_end(nl, na_pedit_keys);
> > > > +	mnl_attr_nest_end(nl, na_act_options);
> > > > +	(*actions)--;
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +/**
> > > > + * Calculate max memory size of one TC-pedit actions.
> > > > + * One TC-pedit action can contain set of keys each defining
> > > > + * a rewrite element (rte_flow action)
> > > > + *
> > > > + * @param[in] actions
> > > > + *   actions specification.
> > > > + * @param[inout] action_flags
> > > > + *   actions flags
> > > > + * @param[inout] size
> > > > + *   accumulated size
> > > > + * @return
> > > > + *   Max memory size of one TC-pedit action
> > > > + */
> > > > +static int
> > > > +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> > > > +				uint64_t *action_flags)
> > > > +{
> > > > +	int pedit_size = 0;
> > > > +	int keys = 0;
> > > > +	uint64_t flags = 0;
> > > > +
> > > > +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> > > > +		      SZ_NLATTR_STRZ_OF("pedit") +
> > > > +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > > +		switch ((*actions)->type) {
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > +			flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > +			flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > +			/* TCP is as same as UDP */
> > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > +			flags |= MLX5_ACTION_SET_TP_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > +			/* TCP is as same as UDP */
> > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > +			flags |= MLX5_ACTION_SET_TP_DST;
> > > > +			break;
> > > > +		default:
> > > > +			goto get_pedit_action_size_done;
> > > > +		}
> > > > +	}
> > > > +get_pedit_action_size_done:
> > > > +	/* TCA_PEDIT_PARAMS_EX */
> > > > +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> > > > +			keys * sizeof(struct tc_pedit_key));
> > > 
> > > > +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> > > > +	pedit_size += keys *
> > > > +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> > > > +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> > > > +	(*action_flags) |= flags;
> > > > +	(*actions)--;
> > > > +	return pedit_size;
> > > > +}
> > > > +
> > > >  /**
> > > >   * Retrieve mask for pattern item.
> > > >   *
> > > > @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > >  			of_set_vlan_vid;
> > > >  		const struct rte_flow_action_of_set_vlan_pcp *
> > > >  			of_set_vlan_pcp;
> > > > +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> > > > +		const struct rte_flow_action_set_ipv6 *set_ipv6;
> > > >  	} conf;
> > > >  	uint32_t item_flags = 0;
> > > >  	uint32_t action_flags = 0;
> > > > @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > >  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
> > > >  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
> > > >  			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > +			action_flags |= MLX5_ACTION_SET_TP_DST;
> > > > +			break;
> > > >  		default:
> > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > >  						  actions,
> > > >  						  "action not supported");
> > > >  		}
> > > > +		if (IS_MODIFY_ACTION(actions->type)) {
> 
> This would be a redundant 'if' as classification is already done above. So, how
> about adding a goto label at the end of this code - 'err_no_action_conf:', and
> use goto above.  E.g.,
> 
> 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> 			action_flags |= MLX5_ACTION_SET_TP_DST;
> 			if (!actions->conf)
> 				goto err_no_action_conf;
> 			break;
> 
In some level I do agree with you it's redundant. But things like this kind of
redundancy is not avoidable. I mean if we use "goto err_no_action_conf", the
"if (!actions->conf) goto err_no_action_conf" has to be repeated in each "case"
which needs to check conf or you think it's acceptable ?
> And if I may, can I ask you to add the same to RTE_FLOW_ACTION_TYPE_PORT_ID?
> 
Yes, I will add.
> > > > +			if (!actions->conf)
> > > > +				return rte_flow_error_set(error, ENOTSUP,
> > > > +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> > > > +						actions,
> > > > +						"action configuration not set");
> > > > +		}
> > > > +	}
> > > > +	if (action_flags &
> > > > +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > +						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > +						  actions,
> > > > +						  "no ipv4 item found in"
> > > > +						  " pattern");
> > > > +	}
> > > > +	if (action_flags &
> > > > +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > +				RTE_FLOW_ERROR_TYPE_ACTION,
> > > > +				actions,
> > > > +				"no ipv6 item found in pattern");
> > > > +	}
> > > > +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> > > > +		if (!(item_flags &
> > > > +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> > > > +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > +						RTE_FLOW_ERROR_TYPE_ACTION,
> > > > +						actions,
> > > > +						"no TCP/UDP item found in"
> > > > +						" pattern");
> 
> All the errors you added, I think EINVAL would be a better fit?
> 
Yes, EINVAL should be better.
> > > Isn't this 'set' action compatible with drop action? No point of modifying
> > > packet which will be dropped, isn't it?
> > > 
> > Yes, you are absolutely right :-)
> 
> I believe you'll add a validation code for that in the next version. :-)
> 
Of course ;-)
> > > >  	}
> > > >  	return 0;
> > > >  }
> > > > @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
> > > >  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
> > > >  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
> > > >  			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > +			size += flow_tcf_get_pedit_actions_size(&actions,
> > > > +								&flags);
> > > > +			break;
> > > >  		default:
> > > >  			DRV_LOG(WARNING,
> > > >  				"unsupported action %p type %d,"
> > > > @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > >  	struct nlattr *na_flower_act;
> > > >  	struct nlattr *na_vlan_id = NULL;
> > > >  	struct nlattr *na_vlan_priority = NULL;
> > > > +	uint64_t item_flags = 0;
> > > >  
> > > >  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
> > > >  						PTOI_TABLE_SZ_MAX(dev)));
> > > > @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > >  			}
> > > >  			break;
> > > >  		case RTE_FLOW_ITEM_TYPE_UDP:
> > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> > > 
> > > Let's add the same to the rest of items like flow_tcf_validate().
> > > 
> > OK!
> > > 
> > > Thanks,
> > > Yongseok
> > > 
> > > >  			mask.udp = flow_tcf_item_mask
> > > >  				(items, &rte_flow_item_udp_mask,
> > > >  				 &flow_tcf_mask_supported.udp,
> > > > @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > >  			}
> > > >  			break;
> > > >  		case RTE_FLOW_ITEM_TYPE_TCP:
> > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> > > >  			mask.tcp = flow_tcf_item_mask
> > > >  				(items, &rte_flow_item_tcp_mask,
> > > >  				 &flow_tcf_mask_supported.tcp,
> > > > @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > >  					conf.of_set_vlan_pcp->vlan_pcp;
> > > >  			}
> > > >  			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > +			na_act_index =
> > > > +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> > > > +			flow_tcf_create_pedit_mnl_msg(nlh,
> > > > +						      &actions, item_flags);
> > > > +			mnl_attr_nest_end(nlh, na_act_index);
> > > > +			break;
> > > >  		default:
> > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > -- 
> > > > 2.17.1
> > > >
  
Yongseok Koh Oct. 8, 2018, 10:08 p.m. UTC | #5
On Mon, Oct 08, 2018 at 07:22:03PM +0800, Xiaoyu Min wrote:
> On 18-10-02 04:19:00, Yongseok Koh wrote:
> > On Sun, Sep 30, 2018 at 03:21:04PM +0800, Xiaoyu Min wrote:
> > > On 18-09-29 07:03:33, Yongseok Koh wrote:
> > > > On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> > > > > Offload the following rte_flow actions by inserting accordingly
> > > > > E-Switch rules via TC Flower driver
> > > > > 
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> > > > 
> > > > Can you put an example command of testpmd for the reference?
> > > > 
> > > Sure, I'll add.
> > > 
> > > > > Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> > > > > ---
> > > > > This patch bases on Rahul Lakkireddy's patchs[1][2] and
> > > > > Yongseok Koh's patchset [3]
> > > > > 
> > > > > [1] https://patches.dpdk.org/patch/45191/
> > > > > [2] https://patches.dpdk.org/patch/45192/
> > > > > [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> > > > > 
> > > > > 
> > > > >  drivers/net/mlx5/Makefile        |   5 +
> > > > >  drivers/net/mlx5/meson.build     |   2 +
> > > > >  drivers/net/mlx5/mlx5_flow.h     |   6 +
> > > > >  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
> > > > >  4 files changed, 369 insertions(+)
> > > > > 
> > > > > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > > > > index ca1de9f21..49b95e78e 100644
> > > > > --- a/drivers/net/mlx5/Makefile
> > > > > +++ b/drivers/net/mlx5/Makefile
> > > > > @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> > > > >  		linux/tc_act/tc_vlan.h \
> > > > >  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
> > > > >  		$(AUTOCONF_OUTPUT)
> > > > > +	$Q sh -- '$<' '$@' \
> > > > > +		HAVE_TC_ACT_PEDIT \
> > > > > +		linux/tc_act/tc_pedit.h \
> > > > > +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> > > > > +		$(AUTOCONF_OUTPUT)
> > > > >  	$Q sh -- '$<' '$@' \
> > > > >  		HAVE_SUPPORTED_40000baseKR4_Full \
> > > > >  		/usr/include/linux/ethtool.h \
> > > > > diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> > > > > index fd93ac162..ef6a85101 100644
> > > > > --- a/drivers/net/mlx5/meson.build
> > > > > +++ b/drivers/net/mlx5/meson.build
> > > > > @@ -182,6 +182,8 @@ if build
> > > > >  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
> > > > >  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
> > > > >  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> > > > > +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> > > > > +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
> > > > >  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
> > > > >  		'RDMA_NL_NLDEV' ],
> > > > >  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> > > > > diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> > > > > index 10d700a7f..be182a643 100644
> > > > > --- a/drivers/net/mlx5/mlx5_flow.h
> > > > > +++ b/drivers/net/mlx5/mlx5_flow.h
> > > > > @@ -87,6 +87,12 @@
> > > > >  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
> > > > >  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
> > > > >  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> > > > > +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> > > > > +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> > > > > +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> > > > > +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> > > > > +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> > > > > +#define MLX5_ACTION_SET_TP_DST (1u << 16)
> > > > >  
> > > > >  /* possible L3 layers protocols filtering. */
> > > > >  #define MLX5_IP_PROTOCOL_TCP 6
> > > > > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > index 14376188e..85c92f369 100644
> > > > > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > @@ -53,6 +53,62 @@ struct tc_vlan {
> > > > >  
> > > > >  #endif /* HAVE_TC_ACT_VLAN */
> > > > >  
> > > > > +#ifdef HAVE_TC_ACT_PEDIT
> > > > > +
> > > > > +#include <linux/tc_act/tc_pedit.h>
> > > > > +
> > > > > +#else /* HAVE_TC_ACT_VLAN */
> > > > > +enum {
> > > > > +	TCA_PEDIT_UNSPEC,
> > > > > +	TCA_PEDIT_TM,
> > > > > +	TCA_PEDIT_PARMS,
> > > > > +	TCA_PEDIT_PAD,
> > > > > +	TCA_PEDIT_PARMS_EX,
> > > > > +	TCA_PEDIT_KEYS_EX,
> > > > > +	TCA_PEDIT_KEY_EX,
> > > > > +	__TCA_PEDIT_MAX
> > > > > +};
> > > > > +
> > > > > +enum {
> > > > > +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> > > > > +	TCA_PEDIT_KEY_EX_CMD = 2,
> > > > > +	__TCA_PEDIT_KEY_EX_MAX
> > > > > +};
> > > > > +
> > > > > +enum pedit_header_type {
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> > > > > +	__PEDIT_HDR_TYPE_MAX,
> > > > > +};
> > > > > +
> > > > > +enum pedit_cmd {
> > > > > +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> > > > > +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> > > > > +	__PEDIT_CMD_MAX,
> > > > > +};
> > > > > +
> > > > > +struct tc_pedit_key {
> > > > > +	__u32           mask;  /* AND */
> > > > > +	__u32           val;   /*XOR */
> > > > > +	__u32           off;  /*offset */
> > > > > +	__u32           at;
> > > > > +	__u32           offmask;
> > > > > +	__u32           shift;
> > > > > +};
> > > > > +
> > > > > +struct tc_pedit_sel {
> > > > > +	tc_gen;
> > > > > +	unsigned char           nkeys;
> > > > > +	unsigned char           flags;
> > > > > +	struct tc_pedit_key     keys[0];
> > > > > +};
> > > > > +
> > > > > +#endif /* HAVE_TC_ACT_VLAN */
> > > > > +
> > > > >  /* Normally found in linux/netlink.h. */
> > > > >  #ifndef NETLINK_CAP_ACK
> > > > >  #define NETLINK_CAP_ACK 10
> > > > > @@ -153,6 +209,14 @@ struct tc_vlan {
> > > > >  #define IPV6_ADDR_LEN 16
> > > > >  #endif
> > > > >  
> > > > > +#ifndef IPV4_ADDR_LEN
> > > > > +#define IPV4_ADDR_LEN 4
> > > > > +#endif
> > > > > +
> > > > > +#ifndef TP_PORT_LEN
> > > > > +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> > > > > +#endif
> > > > > +
> > > > >  /** Empty masks for known item types. */
> > > > >  static const union {
> > > > >  	struct rte_flow_item_port_id port_id;
> > > > > @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
> > > > >  
> > > > >  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
> > > > >  
> > > > > +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> > > > > +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> > > > > +		1 : 0; })
> > > > 
> > > > The reason why you need this complex multi-conditional macro is that
> > > > RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
> > > > to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
> > > > would be enough to be:
> > > > 
> > > > #define MLX5_TCF_SET_ACTIONS \
> > > > 	(MLX5_ACTION_SET_IPV4_SRC | ...)
> > > > 
> > > > And in the flow_tcf_validate() below,
> > > > 	if (action_flags & MLX5_TCF_SET_ACTIONS) {
> > > > 		...
> > > > 	}
> > > > 
> > > Well, I did consider using bitmask but action_flags is an _accumulated_ variable,
> > > records all the actions parsed so far.
> > > But, here, I need to know what is the _current_ action and whether it belongs to modify
> > > actions. If using bitmask, Looks like a new variable (i.e current_action) needed (?)
> > > 
> > > case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > >     current_action = MLX5_ACTION_SET_IPV4_SRC;
> > >     .....
> > > 
> > > if (current_action & MLX5_TCF_SET_ACTIONS) ...
> > >  ...
> > > 
> > > action_flags |= current_action;
> > > 
> > > I feel more code change needed or you think it's worth?
> > 
> > Understood what you meant.
> > Please see my comment below in the flow_tcf_validate().
> > 
> > > > And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
> > > > Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
> > > > rebase it again.
> > > > 
> > > Sure, I'll rebase on it
> > > 
> > > > > +#define MAX_PEDIT_KEYS (128)
> > > > > +#define SZ_PEDIT_KEY_VAL (4)
> > > > > +
> > > > > +struct pedit_key_ex {
> > > > > +	enum pedit_header_type htype;
> > > > > +	enum pedit_cmd cmd;
> > > > > +};
> > > > > +
> > > > > +struct pedit_parser {
> > > > > +	struct tc_pedit_sel sel;
> > > > > +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> > > > > +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> > > > > +};
> > > > > +
> > > > > +static int
> > > > > +flow_tcf_calc_pedit_keys(const uint64_t size)
> > > > 
> > > > Please add documentation by comment for every funcs you add.
> > > > Refer to the other existing ones for formality.
> > > > 
> > > Sure!
> > > > > +{
> > > > > +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> > > > > +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
> > > > 
> > > > Indentation.
> > > > 
> > > Will fix it
> > > > > +	return keys;
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> > > > > +				struct pedit_parser *p_parser,
> > > > > +				uint64_t item_flags)
> > > > > +{
> > > > > +	int idx = p_parser->sel.nkeys;
> > > > > +
> > > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> > > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> > > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > +	p_parser->keys[idx].off =
> > > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
> > > > 
> > > > 	assert(offsetof(struct tcp_hdr, src_port) ==
> > > > 	       offsetof(struct udp_hdr, src_port));
> > > > 	assert(offsetof(struct tcp_hdr, dst_port) ==
> > > > 	       offsetof(struct udp_hdr, dst_port));
> > > > 	p_parser->keys[idx].off =
> > > > 		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
> > > > 		offsetof(struct tcp_hdr, src_port) :
> > > > 		offsetof(struct tcp_hdr, dst_port);
> > > > 
> > > > assert() is just to be informative.
> > > > And how about src first like others below?
> > > > 
> > > Yes, I will update above.
> > > 
> > > > > +	p_parser->keys[idx].mask = 0xFFFF0000;
> > > > > +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> > > > > +			actions->conf)->port;
> > > > 
> > > > Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
> > > > above - 0xffff0000.
> > > > 
> > > So it should be as following ?
> > > p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
> > >                 actions->conf)->port;
> > 
> > You can figure it out by actual tests but I think the following would be right.
> > 	p_parser->keys[idx].val =
> > 		rte_cpu_to_be_32(((const struct rte_flow_action_set_tp *)
> > 				  actions->conf)->port);
> > 
> > Please verify it by testing anyway.
> > 
> No, it doesn't work correctly if it's converted to BE.
> As my understanding the Netlink message should be expressed in host-byte order (?)

As far as I know rte_flow takes network-byte order for args and tcf na msg takes
host-byte order. Please refer to the "override_na_vlan_id:" in mlx5_flow_tcf.c,
rte_be_to_cpu_16() is used there.  I'm still confusing why the mask is
0xffff0000 above. Please make sure your code works correctly by multiple test
cases and hopefully I can hear clear explanation what is right and why.

> > > > > +	p_parser->sel.nkeys = (++idx);
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> > > > > +				 struct pedit_parser *p_parser)
> > > > > +{
> > > > > +	int idx = p_parser->sel.nkeys;
> > > > > +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > +	int off_base =
> > > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
> > > > 
> > > > offsetof(struct ipv6_hdr, src_addr) :
> > > > offsetof(struct ipv6_hdr, dst_addr);
> > > > 
> > > Got it!
> > > > > +	const struct rte_flow_action_set_ipv6 *conf =
> > > > > +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> > > > > +
> > > > > +	for (int i = 0; i < keys; i++, idx++) {
> > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> > > > > +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> > > > > +		p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > > +		memcpy(&p_parser->keys[idx].val,
> > > > > +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> > > > > +			SZ_PEDIT_KEY_VAL);
> > > > > +	}
> > > > > +	p_parser->sel.nkeys += keys;
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
> > > > 
> > > > How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
> > > > Same comment for ipv6 and tp_port.
> > > > 
> > > What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
> > > for src or dst address ?
> > 
> > Just to make the function neat but I overlooked that you still need
> > actions->type. Please disregard my previous comment.
> > 
> OK~
> > > > > +				 struct pedit_parser *p_parser)
> > > > > +{
> > > > > +	int idx = p_parser->sel.nkeys;
> > > > > +
> > > > > +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> > > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > +	p_parser->keys[idx].off =
> > > > > +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
> > > > 
> > > > offsetof(struct ipv4_hdr, src_addr) :
> > > > offsetof(struct ipv4_hdr, dst_addr);
> > > > 
> > > Got it!
> > > > > +	p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > > +	p_parser->keys[idx].val =
> > > > > +		((const struct rte_flow_action_set_ipv4 *)
> > > > > +		 actions->conf)->ipv4_addr;
> > > > > +	p_parser->sel.nkeys = (++idx);
> > > > > +}
> > > > > +
> > > > > +static int
> > > > > +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> > > > > +			      const struct rte_flow_action **actions,
> > > > > +			      uint64_t item_flags)
> > > > > +{
> > > > > +	struct pedit_parser p_parser;
> > > > > +
> > > > > +	memset(&p_parser, 0, sizeof(p_parser));
> > > > > +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> > > > > +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> > > > > +							    TCA_ACT_OPTIONS);
> > > > > +	/* all modify header actions should be in one tc-pedit action */
> > > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > > 
> > > > It seems that you want to aggregate all the pedit actions and form a single
> > > > na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
> > > > 
> > > > flow create ... actions set1 / set2 / port_id / set3 / end
> > > > 
> > > > Then, it would have two pedit na attrs. Is that okay?
> > > > Or, need to think about another way?
> > > > 
> > > > Same will happen in flow_tcf_get_pedit_actions_size().
> > > > 
> > > It's OK if we have more than one pedit na attrs.
> > > _BUT_ only last pedit take effect based on my experiment
> > 
> > Then, shouldn't we give some warning to user in validation? So that user can
> > have right expectation and reorder the actions as their intention like:
> > 	flow create ... actions set1 / set2 / set3 / port_id / end
> > 
> > Otherwise set1 and set2 will be lost according to your comment.
> > 
> I prefer to give error to user in validation because this is simple.

Good.

> > Or, how about making PMD do the right thing. I mean, even if the set actions are
> > scattered, PMD can collect it and apply in a single na attr?
> > 
> My feeling is the above approach will be (become) complex. It looks like we introduce
> new functionality which re-order all actions, something like rss_expand. 

+1

> > > > > +		switch ((*actions)->type) {
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > +			flow_tcf_pedit_key_set_tp_port(*actions,
> > > > > +							&p_parser, item_flags);
> > > > > +			break;
> > > > > +		default:
> > > > > +			goto pedit_mnl_msg_done;
> > > > > +		}
> > > > > +	}
> > > > > +pedit_mnl_msg_done:
> > > > > +	p_parser.sel.action = TC_ACT_PIPE;
> > > > > +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> > > > > +			sizeof(p_parser.sel) +
> > > > > +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> > > > > +			&p_parser);
> > > > > +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> > > > > +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> > > > > +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> > > > > +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> > > > > +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> > > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> > > > > +				 p_parser.keys_ex[i].htype);
> > > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> > > > > +				 p_parser.keys_ex[i].cmd);
> > > > > +		mnl_attr_nest_end(nl, na_pedit_key);
> > > > > +	}
> > > > > +	mnl_attr_nest_end(nl, na_pedit_keys);
> > > > > +	mnl_attr_nest_end(nl, na_act_options);
> > > > > +	(*actions)--;
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * Calculate max memory size of one TC-pedit actions.
> > > > > + * One TC-pedit action can contain set of keys each defining
> > > > > + * a rewrite element (rte_flow action)
> > > > > + *
> > > > > + * @param[in] actions
> > > > > + *   actions specification.
> > > > > + * @param[inout] action_flags
> > > > > + *   actions flags
> > > > > + * @param[inout] size
> > > > > + *   accumulated size
> > > > > + * @return
> > > > > + *   Max memory size of one TC-pedit action
> > > > > + */
> > > > > +static int
> > > > > +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> > > > > +				uint64_t *action_flags)
> > > > > +{
> > > > > +	int pedit_size = 0;
> > > > > +	int keys = 0;
> > > > > +	uint64_t flags = 0;
> > > > > +
> > > > > +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> > > > > +		      SZ_NLATTR_STRZ_OF("pedit") +
> > > > > +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> > > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > > > +		switch ((*actions)->type) {
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > +			/* TCP is as same as UDP */
> > > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_TP_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > +			/* TCP is as same as UDP */
> > > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_TP_DST;
> > > > > +			break;
> > > > > +		default:
> > > > > +			goto get_pedit_action_size_done;
> > > > > +		}
> > > > > +	}
> > > > > +get_pedit_action_size_done:
> > > > > +	/* TCA_PEDIT_PARAMS_EX */
> > > > > +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> > > > > +			keys * sizeof(struct tc_pedit_key));
> > > > 
> > > > > +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> > > > > +	pedit_size += keys *
> > > > > +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> > > > > +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> > > > > +	(*action_flags) |= flags;
> > > > > +	(*actions)--;
> > > > > +	return pedit_size;
> > > > > +}
> > > > > +
> > > > >  /**
> > > > >   * Retrieve mask for pattern item.
> > > > >   *
> > > > > @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > > >  			of_set_vlan_vid;
> > > > >  		const struct rte_flow_action_of_set_vlan_pcp *
> > > > >  			of_set_vlan_pcp;
> > > > > +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> > > > > +		const struct rte_flow_action_set_ipv6 *set_ipv6;
> > > > >  	} conf;
> > > > >  	uint32_t item_flags = 0;
> > > > >  	uint32_t action_flags = 0;
> > > > > @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > > >  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
> > > > >  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
> > > > >  			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > +			action_flags |= MLX5_ACTION_SET_TP_DST;
> > > > > +			break;
> > > > >  		default:
> > > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > >  						  actions,
> > > > >  						  "action not supported");
> > > > >  		}
> > > > > +		if (IS_MODIFY_ACTION(actions->type)) {
> > 
> > This would be a redundant 'if' as classification is already done above. So, how
> > about adding a goto label at the end of this code - 'err_no_action_conf:', and
> > use goto above.  E.g.,
> > 
> > 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > 			action_flags |= MLX5_ACTION_SET_TP_DST;
> > 			if (!actions->conf)
> > 				goto err_no_action_conf;
> > 			break;
> > 
> In some level I do agree with you it's redundant. But things like this kind of
> redundancy is not avoidable. I mean if we use "goto err_no_action_conf", the
> "if (!actions->conf) goto err_no_action_conf" has to be repeated in each "case"
> which needs to check conf or you think it's acceptable ?

But in your approach, the redundant check (IS_MODIFY_ACTION) is inside a loop
and it has to be checked even if there's no 'set' action in the action list.

> > And if I may, can I ask you to add the same to RTE_FLOW_ACTION_TYPE_PORT_ID?
> > 
> Yes, I will add.
> > > > > +			if (!actions->conf)
> > > > > +				return rte_flow_error_set(error, ENOTSUP,
> > > > > +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> > > > > +						actions,
> > > > > +						"action configuration not set");
> > > > > +		}
> > > > > +	}
> > > > > +	if (action_flags &
> > > > > +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> > > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > +						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > +						  actions,
> > > > > +						  "no ipv4 item found in"
> > > > > +						  " pattern");
> > > > > +	}
> > > > > +	if (action_flags &
> > > > > +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> > > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > +				RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > +				actions,
> > > > > +				"no ipv6 item found in pattern");
> > > > > +	}
> > > > > +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> > > > > +		if (!(item_flags &
> > > > > +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> > > > > +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > +						RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > +						actions,
> > > > > +						"no TCP/UDP item found in"
> > > > > +						" pattern");
> > 
> > All the errors you added, I think EINVAL would be a better fit?
> > 
> Yes, EINVAL should be better.
> > > > Isn't this 'set' action compatible with drop action? No point of modifying
> > > > packet which will be dropped, isn't it?
> > > > 
> > > Yes, you are absolutely right :-)
> > 
> > I believe you'll add a validation code for that in the next version. :-)
> > 
> Of course ;-)
> > > > >  	}
> > > > >  	return 0;
> > > > >  }
> > > > > @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
> > > > >  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
> > > > >  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
> > > > >  			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > +			size += flow_tcf_get_pedit_actions_size(&actions,
> > > > > +								&flags);
> > > > > +			break;
> > > > >  		default:
> > > > >  			DRV_LOG(WARNING,
> > > > >  				"unsupported action %p type %d,"
> > > > > @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > >  	struct nlattr *na_flower_act;
> > > > >  	struct nlattr *na_vlan_id = NULL;
> > > > >  	struct nlattr *na_vlan_priority = NULL;
> > > > > +	uint64_t item_flags = 0;
> > > > >  
> > > > >  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
> > > > >  						PTOI_TABLE_SZ_MAX(dev)));
> > > > > @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > >  			}
> > > > >  			break;
> > > > >  		case RTE_FLOW_ITEM_TYPE_UDP:
> > > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> > > > 
> > > > Let's add the same to the rest of items like flow_tcf_validate().
> > > > 
> > > OK!
> > > > 
> > > > Thanks,
> > > > Yongseok
> > > > 
> > > > >  			mask.udp = flow_tcf_item_mask
> > > > >  				(items, &rte_flow_item_udp_mask,
> > > > >  				 &flow_tcf_mask_supported.udp,
> > > > > @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > >  			}
> > > > >  			break;
> > > > >  		case RTE_FLOW_ITEM_TYPE_TCP:
> > > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> > > > >  			mask.tcp = flow_tcf_item_mask
> > > > >  				(items, &rte_flow_item_tcp_mask,
> > > > >  				 &flow_tcf_mask_supported.tcp,
> > > > > @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > >  					conf.of_set_vlan_pcp->vlan_pcp;
> > > > >  			}
> > > > >  			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > +			na_act_index =
> > > > > +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> > > > > +			flow_tcf_create_pedit_mnl_msg(nlh,
> > > > > +						      &actions, item_flags);
> > > > > +			mnl_attr_nest_end(nlh, na_act_index);
> > > > > +			break;
> > > > >  		default:
> > > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > -- 
> > > > > 2.17.1
> > > > >
  
Xiaoyu Min Oct. 9, 2018, 7:03 a.m. UTC | #6
On 18-10-09 06:08:32, Yongseok Koh wrote:
> On Mon, Oct 08, 2018 at 07:22:03PM +0800, Xiaoyu Min wrote:
> > On 18-10-02 04:19:00, Yongseok Koh wrote:
> > > On Sun, Sep 30, 2018 at 03:21:04PM +0800, Xiaoyu Min wrote:
> > > > On 18-09-29 07:03:33, Yongseok Koh wrote:
> > > > > On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> > > > > > Offload the following rte_flow actions by inserting accordingly
> > > > > > E-Switch rules via TC Flower driver
> > > > > > 
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> > > > > 
> > > > > Can you put an example command of testpmd for the reference?
> > > > > 
> > > > Sure, I'll add.
> > > > 
> > > > > > Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> > > > > > ---
> > > > > > This patch bases on Rahul Lakkireddy's patchs[1][2] and
> > > > > > Yongseok Koh's patchset [3]
> > > > > > 
> > > > > > [1] https://patches.dpdk.org/patch/45191/
> > > > > > [2] https://patches.dpdk.org/patch/45192/
> > > > > > [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> > > > > > 
> > > > > > 
> > > > > >  drivers/net/mlx5/Makefile        |   5 +
> > > > > >  drivers/net/mlx5/meson.build     |   2 +
> > > > > >  drivers/net/mlx5/mlx5_flow.h     |   6 +
> > > > > >  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
> > > > > >  4 files changed, 369 insertions(+)
> > > > > > 
> > > > > > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > > > > > index ca1de9f21..49b95e78e 100644
> > > > > > --- a/drivers/net/mlx5/Makefile
> > > > > > +++ b/drivers/net/mlx5/Makefile
> > > > > > @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> > > > > >  		linux/tc_act/tc_vlan.h \
> > > > > >  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
> > > > > >  		$(AUTOCONF_OUTPUT)
> > > > > > +	$Q sh -- '$<' '$@' \
> > > > > > +		HAVE_TC_ACT_PEDIT \
> > > > > > +		linux/tc_act/tc_pedit.h \
> > > > > > +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> > > > > > +		$(AUTOCONF_OUTPUT)
> > > > > >  	$Q sh -- '$<' '$@' \
> > > > > >  		HAVE_SUPPORTED_40000baseKR4_Full \
> > > > > >  		/usr/include/linux/ethtool.h \
> > > > > > diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> > > > > > index fd93ac162..ef6a85101 100644
> > > > > > --- a/drivers/net/mlx5/meson.build
> > > > > > +++ b/drivers/net/mlx5/meson.build
> > > > > > @@ -182,6 +182,8 @@ if build
> > > > > >  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
> > > > > >  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
> > > > > >  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> > > > > > +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> > > > > > +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
> > > > > >  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
> > > > > >  		'RDMA_NL_NLDEV' ],
> > > > > >  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> > > > > > diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> > > > > > index 10d700a7f..be182a643 100644
> > > > > > --- a/drivers/net/mlx5/mlx5_flow.h
> > > > > > +++ b/drivers/net/mlx5/mlx5_flow.h
> > > > > > @@ -87,6 +87,12 @@
> > > > > >  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
> > > > > >  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
> > > > > >  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> > > > > > +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> > > > > > +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> > > > > > +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> > > > > > +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> > > > > > +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> > > > > > +#define MLX5_ACTION_SET_TP_DST (1u << 16)
> > > > > >  
> > > > > >  /* possible L3 layers protocols filtering. */
> > > > > >  #define MLX5_IP_PROTOCOL_TCP 6
> > > > > > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > > index 14376188e..85c92f369 100644
> > > > > > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > > @@ -53,6 +53,62 @@ struct tc_vlan {
> > > > > >  
> > > > > >  #endif /* HAVE_TC_ACT_VLAN */
> > > > > >  
> > > > > > +#ifdef HAVE_TC_ACT_PEDIT
> > > > > > +
> > > > > > +#include <linux/tc_act/tc_pedit.h>
> > > > > > +
> > > > > > +#else /* HAVE_TC_ACT_VLAN */
> > > > > > +enum {
> > > > > > +	TCA_PEDIT_UNSPEC,
> > > > > > +	TCA_PEDIT_TM,
> > > > > > +	TCA_PEDIT_PARMS,
> > > > > > +	TCA_PEDIT_PAD,
> > > > > > +	TCA_PEDIT_PARMS_EX,
> > > > > > +	TCA_PEDIT_KEYS_EX,
> > > > > > +	TCA_PEDIT_KEY_EX,
> > > > > > +	__TCA_PEDIT_MAX
> > > > > > +};
> > > > > > +
> > > > > > +enum {
> > > > > > +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> > > > > > +	TCA_PEDIT_KEY_EX_CMD = 2,
> > > > > > +	__TCA_PEDIT_KEY_EX_MAX
> > > > > > +};
> > > > > > +
> > > > > > +enum pedit_header_type {
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> > > > > > +	__PEDIT_HDR_TYPE_MAX,
> > > > > > +};
> > > > > > +
> > > > > > +enum pedit_cmd {
> > > > > > +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> > > > > > +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> > > > > > +	__PEDIT_CMD_MAX,
> > > > > > +};
> > > > > > +
> > > > > > +struct tc_pedit_key {
> > > > > > +	__u32           mask;  /* AND */
> > > > > > +	__u32           val;   /*XOR */
> > > > > > +	__u32           off;  /*offset */
> > > > > > +	__u32           at;
> > > > > > +	__u32           offmask;
> > > > > > +	__u32           shift;
> > > > > > +};
> > > > > > +
> > > > > > +struct tc_pedit_sel {
> > > > > > +	tc_gen;
> > > > > > +	unsigned char           nkeys;
> > > > > > +	unsigned char           flags;
> > > > > > +	struct tc_pedit_key     keys[0];
> > > > > > +};
> > > > > > +
> > > > > > +#endif /* HAVE_TC_ACT_VLAN */
> > > > > > +
> > > > > >  /* Normally found in linux/netlink.h. */
> > > > > >  #ifndef NETLINK_CAP_ACK
> > > > > >  #define NETLINK_CAP_ACK 10
> > > > > > @@ -153,6 +209,14 @@ struct tc_vlan {
> > > > > >  #define IPV6_ADDR_LEN 16
> > > > > >  #endif
> > > > > >  
> > > > > > +#ifndef IPV4_ADDR_LEN
> > > > > > +#define IPV4_ADDR_LEN 4
> > > > > > +#endif
> > > > > > +
> > > > > > +#ifndef TP_PORT_LEN
> > > > > > +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> > > > > > +#endif
> > > > > > +
> > > > > >  /** Empty masks for known item types. */
> > > > > >  static const union {
> > > > > >  	struct rte_flow_item_port_id port_id;
> > > > > > @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
> > > > > >  
> > > > > >  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
> > > > > >  
> > > > > > +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> > > > > > +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> > > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> > > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> > > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> > > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> > > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> > > > > > +		1 : 0; })
> > > > > 
> > > > > The reason why you need this complex multi-conditional macro is that
> > > > > RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
> > > > > to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
> > > > > would be enough to be:
> > > > > 
> > > > > #define MLX5_TCF_SET_ACTIONS \
> > > > > 	(MLX5_ACTION_SET_IPV4_SRC | ...)
> > > > > 
> > > > > And in the flow_tcf_validate() below,
> > > > > 	if (action_flags & MLX5_TCF_SET_ACTIONS) {
> > > > > 		...
> > > > > 	}
> > > > > 
> > > > Well, I did consider using bitmask but action_flags is an _accumulated_ variable,
> > > > records all the actions parsed so far.
> > > > But, here, I need to know what is the _current_ action and whether it belongs to modify
> > > > actions. If using bitmask, Looks like a new variable (i.e current_action) needed (?)
> > > > 
> > > > case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > >     current_action = MLX5_ACTION_SET_IPV4_SRC;
> > > >     .....
> > > > 
> > > > if (current_action & MLX5_TCF_SET_ACTIONS) ...
> > > >  ...
> > > > 
> > > > action_flags |= current_action;
> > > > 
> > > > I feel more code change needed or you think it's worth?
> > > 
> > > Understood what you meant.
> > > Please see my comment below in the flow_tcf_validate().
> > > 
> > > > > And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
> > > > > Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
> > > > > rebase it again.
> > > > > 
> > > > Sure, I'll rebase on it
> > > > 
> > > > > > +#define MAX_PEDIT_KEYS (128)
> > > > > > +#define SZ_PEDIT_KEY_VAL (4)
> > > > > > +
> > > > > > +struct pedit_key_ex {
> > > > > > +	enum pedit_header_type htype;
> > > > > > +	enum pedit_cmd cmd;
> > > > > > +};
> > > > > > +
> > > > > > +struct pedit_parser {
> > > > > > +	struct tc_pedit_sel sel;
> > > > > > +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> > > > > > +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> > > > > > +};
> > > > > > +
> > > > > > +static int
> > > > > > +flow_tcf_calc_pedit_keys(const uint64_t size)
> > > > > 
> > > > > Please add documentation by comment for every funcs you add.
> > > > > Refer to the other existing ones for formality.
> > > > > 
> > > > Sure!
> > > > > > +{
> > > > > > +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> > > > > > +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
> > > > > 
> > > > > Indentation.
> > > > > 
> > > > Will fix it
> > > > > > +	return keys;
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> > > > > > +				struct pedit_parser *p_parser,
> > > > > > +				uint64_t item_flags)
> > > > > > +{
> > > > > > +	int idx = p_parser->sel.nkeys;
> > > > > > +
> > > > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> > > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> > > > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> > > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> > > > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > > +	p_parser->keys[idx].off =
> > > > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
> > > > > 
> > > > > 	assert(offsetof(struct tcp_hdr, src_port) ==
> > > > > 	       offsetof(struct udp_hdr, src_port));
> > > > > 	assert(offsetof(struct tcp_hdr, dst_port) ==
> > > > > 	       offsetof(struct udp_hdr, dst_port));
> > > > > 	p_parser->keys[idx].off =
> > > > > 		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
> > > > > 		offsetof(struct tcp_hdr, src_port) :
> > > > > 		offsetof(struct tcp_hdr, dst_port);
> > > > > 
> > > > > assert() is just to be informative.
> > > > > And how about src first like others below?
> > > > > 
> > > > Yes, I will update above.
> > > > 
> > > > > > +	p_parser->keys[idx].mask = 0xFFFF0000;
> > > > > > +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> > > > > > +			actions->conf)->port;
> > > > > 
> > > > > Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
> > > > > above - 0xffff0000.
> > > > > 
> > > > So it should be as following ?
> > > > p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
> > > >                 actions->conf)->port;
> > > 
> > > You can figure it out by actual tests but I think the following would be right.
> > > 	p_parser->keys[idx].val =
> > > 		rte_cpu_to_be_32(((const struct rte_flow_action_set_tp *)
> > > 				  actions->conf)->port);
> > > 
> > > Please verify it by testing anyway.
> > > 
> > No, it doesn't work correctly if it's converted to BE.
> > As my understanding the Netlink message should be expressed in host-byte order (?)
> 
> As far as I know rte_flow takes network-byte order for args and tcf na msg takes
> host-byte order. Please refer to the "override_na_vlan_id:" in mlx5_flow_tcf.c,
> rte_be_to_cpu_16() is used there.  I'm still confusing why the mask is
> 0xffff0000 above. Please make sure your code works correctly by multiple test
> cases and hopefully I can hear clear explanation what is right and why.
> 
Hey Koh,
1. yes, rte_flow takes network-byte order. Here the 'port' is already converted to
BE by testpmd.
If the rte_be_to_cpu_16 is used just like "override_na_vlan_id" does the result
is not correct. Things like TC-pedit takes network-byte order.
2. For the mask, honestly, I'm not very clear. The only reference is from [1] and 
my verification by using TC show command.
[1] https://www.netdevconf.org/2.1/slides/apr7/tc-workshop/vadai-tc-workshop-update.pdf

> > > > > > +	p_parser->sel.nkeys = (++idx);
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> > > > > > +				 struct pedit_parser *p_parser)
> > > > > > +{
> > > > > > +	int idx = p_parser->sel.nkeys;
> > > > > > +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > > +	int off_base =
> > > > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
> > > > > 
> > > > > offsetof(struct ipv6_hdr, src_addr) :
> > > > > offsetof(struct ipv6_hdr, dst_addr);
> > > > > 
> > > > Got it!
> > > > > > +	const struct rte_flow_action_set_ipv6 *conf =
> > > > > > +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> > > > > > +
> > > > > > +	for (int i = 0; i < keys; i++, idx++) {
> > > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> > > > > > +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > > +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> > > > > > +		p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > > > +		memcpy(&p_parser->keys[idx].val,
> > > > > > +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> > > > > > +			SZ_PEDIT_KEY_VAL);
> > > > > > +	}
> > > > > > +	p_parser->sel.nkeys += keys;
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
> > > > > 
> > > > > How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
> > > > > Same comment for ipv6 and tp_port.
> > > > > 
> > > > What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
> > > > for src or dst address ?
> > > 
> > > Just to make the function neat but I overlooked that you still need
> > > actions->type. Please disregard my previous comment.
> > > 
> > OK~
> > > > > > +				 struct pedit_parser *p_parser)
> > > > > > +{
> > > > > > +	int idx = p_parser->sel.nkeys;
> > > > > > +
> > > > > > +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> > > > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > > +	p_parser->keys[idx].off =
> > > > > > +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
> > > > > 
> > > > > offsetof(struct ipv4_hdr, src_addr) :
> > > > > offsetof(struct ipv4_hdr, dst_addr);
> > > > > 
> > > > Got it!
> > > > > > +	p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > > > +	p_parser->keys[idx].val =
> > > > > > +		((const struct rte_flow_action_set_ipv4 *)
> > > > > > +		 actions->conf)->ipv4_addr;
> > > > > > +	p_parser->sel.nkeys = (++idx);
> > > > > > +}
> > > > > > +
> > > > > > +static int
> > > > > > +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> > > > > > +			      const struct rte_flow_action **actions,
> > > > > > +			      uint64_t item_flags)
> > > > > > +{
> > > > > > +	struct pedit_parser p_parser;
> > > > > > +
> > > > > > +	memset(&p_parser, 0, sizeof(p_parser));
> > > > > > +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> > > > > > +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> > > > > > +							    TCA_ACT_OPTIONS);
> > > > > > +	/* all modify header actions should be in one tc-pedit action */
> > > > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > > > 
> > > > > It seems that you want to aggregate all the pedit actions and form a single
> > > > > na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
> > > > > 
> > > > > flow create ... actions set1 / set2 / port_id / set3 / end
> > > > > 
> > > > > Then, it would have two pedit na attrs. Is that okay?
> > > > > Or, need to think about another way?
> > > > > 
> > > > > Same will happen in flow_tcf_get_pedit_actions_size().
> > > > > 
> > > > It's OK if we have more than one pedit na attrs.
> > > > _BUT_ only last pedit take effect based on my experiment
> > > 
> > > Then, shouldn't we give some warning to user in validation? So that user can
> > > have right expectation and reorder the actions as their intention like:
> > > 	flow create ... actions set1 / set2 / set3 / port_id / end
> > > 
> > > Otherwise set1 and set2 will be lost according to your comment.
> > > 
> > I prefer to give error to user in validation because this is simple.
> 
> Good.
> 
> > > Or, how about making PMD do the right thing. I mean, even if the set actions are
> > > scattered, PMD can collect it and apply in a single na attr?
> > > 
> > My feeling is the above approach will be (become) complex. It looks like we introduce
> > new functionality which re-order all actions, something like rss_expand. 
> 
> +1
> 
> > > > > > +		switch ((*actions)->type) {
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > > +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > > +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > > +			flow_tcf_pedit_key_set_tp_port(*actions,
> > > > > > +							&p_parser, item_flags);
> > > > > > +			break;
> > > > > > +		default:
> > > > > > +			goto pedit_mnl_msg_done;
> > > > > > +		}
> > > > > > +	}
> > > > > > +pedit_mnl_msg_done:
> > > > > > +	p_parser.sel.action = TC_ACT_PIPE;
> > > > > > +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> > > > > > +			sizeof(p_parser.sel) +
> > > > > > +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> > > > > > +			&p_parser);
> > > > > > +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> > > > > > +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> > > > > > +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> > > > > > +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> > > > > > +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> > > > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> > > > > > +				 p_parser.keys_ex[i].htype);
> > > > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> > > > > > +				 p_parser.keys_ex[i].cmd);
> > > > > > +		mnl_attr_nest_end(nl, na_pedit_key);
> > > > > > +	}
> > > > > > +	mnl_attr_nest_end(nl, na_pedit_keys);
> > > > > > +	mnl_attr_nest_end(nl, na_act_options);
> > > > > > +	(*actions)--;
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > > > > +/**
> > > > > > + * Calculate max memory size of one TC-pedit actions.
> > > > > > + * One TC-pedit action can contain set of keys each defining
> > > > > > + * a rewrite element (rte_flow action)
> > > > > > + *
> > > > > > + * @param[in] actions
> > > > > > + *   actions specification.
> > > > > > + * @param[inout] action_flags
> > > > > > + *   actions flags
> > > > > > + * @param[inout] size
> > > > > > + *   accumulated size
> > > > > > + * @return
> > > > > > + *   Max memory size of one TC-pedit action
> > > > > > + */
> > > > > > +static int
> > > > > > +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> > > > > > +				uint64_t *action_flags)
> > > > > > +{
> > > > > > +	int pedit_size = 0;
> > > > > > +	int keys = 0;
> > > > > > +	uint64_t flags = 0;
> > > > > > +
> > > > > > +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> > > > > > +		      SZ_NLATTR_STRZ_OF("pedit") +
> > > > > > +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> > > > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > > > > +		switch ((*actions)->type) {
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > > +			/* TCP is as same as UDP */
> > > > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_TP_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > > +			/* TCP is as same as UDP */
> > > > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_TP_DST;
> > > > > > +			break;
> > > > > > +		default:
> > > > > > +			goto get_pedit_action_size_done;
> > > > > > +		}
> > > > > > +	}
> > > > > > +get_pedit_action_size_done:
> > > > > > +	/* TCA_PEDIT_PARAMS_EX */
> > > > > > +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> > > > > > +			keys * sizeof(struct tc_pedit_key));
> > > > > 
> > > > > > +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> > > > > > +	pedit_size += keys *
> > > > > > +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> > > > > > +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> > > > > > +	(*action_flags) |= flags;
> > > > > > +	(*actions)--;
> > > > > > +	return pedit_size;
> > > > > > +}
> > > > > > +
> > > > > >  /**
> > > > > >   * Retrieve mask for pattern item.
> > > > > >   *
> > > > > > @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > > > >  			of_set_vlan_vid;
> > > > > >  		const struct rte_flow_action_of_set_vlan_pcp *
> > > > > >  			of_set_vlan_pcp;
> > > > > > +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> > > > > > +		const struct rte_flow_action_set_ipv6 *set_ipv6;
> > > > > >  	} conf;
> > > > > >  	uint32_t item_flags = 0;
> > > > > >  	uint32_t action_flags = 0;
> > > > > > @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > > > >  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
> > > > > >  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
> > > > > >  			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > > +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > > +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > > +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > > +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > > +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > > +			action_flags |= MLX5_ACTION_SET_TP_DST;
> > > > > > +			break;
> > > > > >  		default:
> > > > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > >  						  actions,
> > > > > >  						  "action not supported");
> > > > > >  		}
> > > > > > +		if (IS_MODIFY_ACTION(actions->type)) {
> > > 
> > > This would be a redundant 'if' as classification is already done above. So, how
> > > about adding a goto label at the end of this code - 'err_no_action_conf:', and
> > > use goto above.  E.g.,
> > > 
> > > 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > 			action_flags |= MLX5_ACTION_SET_TP_DST;
> > > 			if (!actions->conf)
> > > 				goto err_no_action_conf;
> > > 			break;
> > > 
> > In some level I do agree with you it's redundant. But things like this kind of
> > redundancy is not avoidable. I mean if we use "goto err_no_action_conf", the
> > "if (!actions->conf) goto err_no_action_conf" has to be repeated in each "case"
> > which needs to check conf or you think it's acceptable ?
> 
> But in your approach, the redundant check (IS_MODIFY_ACTION) is inside a loop
> and it has to be checked even if there's no 'set' action in the action list.
> 
Yes, it will check very time. But I doubt it will impact performace significantly.

Another thing bother me is, if taking 'goto' approach, beside the 'if (!actions->conf)...',
at this moment, we also need to add 'if (set action is discontinued)' for
each modification case. I'm not sure if this will keep code clear
considering many repeated code there...

> > > And if I may, can I ask you to add the same to RTE_FLOW_ACTION_TYPE_PORT_ID?
> > > 
> > Yes, I will add.
> > > > > > +			if (!actions->conf)
> > > > > > +				return rte_flow_error_set(error, ENOTSUP,
> > > > > > +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> > > > > > +						actions,
> > > > > > +						"action configuration not set");
> > > > > > +		}
> > > > > > +	}
> > > > > > +	if (action_flags &
> > > > > > +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> > > > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> > > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > > +						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > > +						  actions,
> > > > > > +						  "no ipv4 item found in"
> > > > > > +						  " pattern");
> > > > > > +	}
> > > > > > +	if (action_flags &
> > > > > > +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> > > > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> > > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > > +				RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > > +				actions,
> > > > > > +				"no ipv6 item found in pattern");
> > > > > > +	}
> > > > > > +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> > > > > > +		if (!(item_flags &
> > > > > > +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> > > > > > +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> > > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > > +						RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > > +						actions,
> > > > > > +						"no TCP/UDP item found in"
> > > > > > +						" pattern");
> > > 
> > > All the errors you added, I think EINVAL would be a better fit?
> > > 
> > Yes, EINVAL should be better.
> > > > > Isn't this 'set' action compatible with drop action? No point of modifying
> > > > > packet which will be dropped, isn't it?
> > > > > 
> > > > Yes, you are absolutely right :-)
> > > 
> > > I believe you'll add a validation code for that in the next version. :-)
> > > 
> > Of course ;-)
> > > > > >  	}
> > > > > >  	return 0;
> > > > > >  }
> > > > > > @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
> > > > > >  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
> > > > > >  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
> > > > > >  			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > > +			size += flow_tcf_get_pedit_actions_size(&actions,
> > > > > > +								&flags);
> > > > > > +			break;
> > > > > >  		default:
> > > > > >  			DRV_LOG(WARNING,
> > > > > >  				"unsupported action %p type %d,"
> > > > > > @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > > >  	struct nlattr *na_flower_act;
> > > > > >  	struct nlattr *na_vlan_id = NULL;
> > > > > >  	struct nlattr *na_vlan_priority = NULL;
> > > > > > +	uint64_t item_flags = 0;
> > > > > >  
> > > > > >  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
> > > > > >  						PTOI_TABLE_SZ_MAX(dev)));
> > > > > > @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > > >  			}
> > > > > >  			break;
> > > > > >  		case RTE_FLOW_ITEM_TYPE_UDP:
> > > > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> > > > > 
> > > > > Let's add the same to the rest of items like flow_tcf_validate().
> > > > > 
> > > > OK!
> > > > > 
> > > > > Thanks,
> > > > > Yongseok
> > > > > 
> > > > > >  			mask.udp = flow_tcf_item_mask
> > > > > >  				(items, &rte_flow_item_udp_mask,
> > > > > >  				 &flow_tcf_mask_supported.udp,
> > > > > > @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > > >  			}
> > > > > >  			break;
> > > > > >  		case RTE_FLOW_ITEM_TYPE_TCP:
> > > > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> > > > > >  			mask.tcp = flow_tcf_item_mask
> > > > > >  				(items, &rte_flow_item_tcp_mask,
> > > > > >  				 &flow_tcf_mask_supported.tcp,
> > > > > > @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > > >  					conf.of_set_vlan_pcp->vlan_pcp;
> > > > > >  			}
> > > > > >  			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > > +			na_act_index =
> > > > > > +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> > > > > > +			flow_tcf_create_pedit_mnl_msg(nlh,
> > > > > > +						      &actions, item_flags);
> > > > > > +			mnl_attr_nest_end(nlh, na_act_index);
> > > > > > +			break;
> > > > > >  		default:
> > > > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > > -- 
> > > > > > 2.17.1
> > > > > >
  
Yongseok Koh Oct. 9, 2018, 8:55 a.m. UTC | #7
> On Oct 9, 2018, at 12:03 AM, Jack Min <jackmin@mellanox.com> wrote:
> 
> On 18-10-09 06:08:32, Yongseok Koh wrote:
>> On Mon, Oct 08, 2018 at 07:22:03PM +0800, Xiaoyu Min wrote:
>>> On 18-10-02 04:19:00, Yongseok Koh wrote:
>>>> On Sun, Sep 30, 2018 at 03:21:04PM +0800, Xiaoyu Min wrote:
>>>>> On 18-09-29 07:03:33, Yongseok Koh wrote:
>>>>>> On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
[...]
>>>>> 
>>>>>>> +	p_parser->keys[idx].mask = 0xFFFF0000;
>>>>>>> +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
>>>>>>> +			actions->conf)->port;
>>>>>> 
>>>>>> Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
>>>>>> above - 0xffff0000.
>>>>>> 
>>>>> So it should be as following ?
>>>>> p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
>>>>>                actions->conf)->port;
>>>> 
>>>> You can figure it out by actual tests but I think the following would be right.
>>>> 	p_parser->keys[idx].val =
>>>> 		rte_cpu_to_be_32(((const struct rte_flow_action_set_tp *)
>>>> 				  actions->conf)->port);
>>>> 
>>>> Please verify it by testing anyway.
>>>> 
>>> No, it doesn't work correctly if it's converted to BE.
>>> As my understanding the Netlink message should be expressed in host-byte order (?)
>> 
>> As far as I know rte_flow takes network-byte order for args and tcf na msg takes
>> host-byte order. Please refer to the "override_na_vlan_id:" in mlx5_flow_tcf.c,
>> rte_be_to_cpu_16() is used there.  I'm still confusing why the mask is
>> 0xffff0000 above. Please make sure your code works correctly by multiple test
>> cases and hopefully I can hear clear explanation what is right and why.
>> 
> Hey Koh,
> 1. yes, rte_flow takes network-byte order. Here the 'port' is already converted to
> BE by testpmd.
> If the rte_be_to_cpu_16 is used just like "override_na_vlan_id" does the result
> is not correct. Things like TC-pedit takes network-byte order.
> 2. For the mask, honestly, I'm not very clear. The only reference is from [1] and 
> my verification by using TC show command.
> [1] https://www.netdevconf.org/2.1/slides/apr7/tc-workshop/vadai-tc-workshop-update.pdf

Interesting.
Please go ahead with your code because you have already verified it.

>>>>>>> +	p_parser->sel.nkeys = (++idx);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void
>>>>>>> +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
>>>>>>> +				 struct pedit_parser *p_parser)
>>>>>>> +{
>>>>>>> +	int idx = p_parser->sel.nkeys;
>>>>>>> +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
>>>>>>> +	int off_base =
>>>>>>> +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
>>>>>> 
>>>>>> offsetof(struct ipv6_hdr, src_addr) :
>>>>>> offsetof(struct ipv6_hdr, dst_addr);
>>>>>> 
>>>>> Got it!
>>>>>>> +	const struct rte_flow_action_set_ipv6 *conf =
>>>>>>> +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
>>>>>>> +
>>>>>>> +	for (int i = 0; i < keys; i++, idx++) {
>>>>>>> +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
>>>>>>> +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
>>>>>>> +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
>>>>>>> +		p_parser->keys[idx].mask = ~UINT32_MAX;
>>>>>>> +		memcpy(&p_parser->keys[idx].val,
>>>>>>> +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
>>>>>>> +			SZ_PEDIT_KEY_VAL);
>>>>>>> +	}
>>>>>>> +	p_parser->sel.nkeys += keys;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void
>>>>>>> +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
>>>>>> 
>>>>>> How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
>>>>>> Same comment for ipv6 and tp_port.
>>>>>> 
>>>>> What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
>>>>> for src or dst address ?
>>>> 
>>>> Just to make the function neat but I overlooked that you still need
>>>> actions->type. Please disregard my previous comment.
>>>> 
>>> OK~
>>>>>>> +				 struct pedit_parser *p_parser)
>>>>>>> +{
>>>>>>> +	int idx = p_parser->sel.nkeys;
>>>>>>> +
>>>>>>> +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
>>>>>>> +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
>>>>>>> +	p_parser->keys[idx].off =
>>>>>>> +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
>>>>>> 
>>>>>> offsetof(struct ipv4_hdr, src_addr) :
>>>>>> offsetof(struct ipv4_hdr, dst_addr);
>>>>>> 
>>>>> Got it!
>>>>>>> +	p_parser->keys[idx].mask = ~UINT32_MAX;
>>>>>>> +	p_parser->keys[idx].val =
>>>>>>> +		((const struct rte_flow_action_set_ipv4 *)
>>>>>>> +		 actions->conf)->ipv4_addr;
>>>>>>> +	p_parser->sel.nkeys = (++idx);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int
>>>>>>> +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
>>>>>>> +			      const struct rte_flow_action **actions,
>>>>>>> +			      uint64_t item_flags)
>>>>>>> +{
>>>>>>> +	struct pedit_parser p_parser;
>>>>>>> +
>>>>>>> +	memset(&p_parser, 0, sizeof(p_parser));
>>>>>>> +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
>>>>>>> +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
>>>>>>> +							    TCA_ACT_OPTIONS);
>>>>>>> +	/* all modify header actions should be in one tc-pedit action */
>>>>>>> +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
>>>>>> 
>>>>>> It seems that you want to aggregate all the pedit actions and form a single
>>>>>> na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
>>>>>> 
>>>>>> flow create ... actions set1 / set2 / port_id / set3 / end
>>>>>> 
>>>>>> Then, it would have two pedit na attrs. Is that okay?
>>>>>> Or, need to think about another way?
>>>>>> 
>>>>>> Same will happen in flow_tcf_get_pedit_actions_size().
>>>>>> 
>>>>> It's OK if we have more than one pedit na attrs.
>>>>> _BUT_ only last pedit take effect based on my experiment
>>>> 
>>>> Then, shouldn't we give some warning to user in validation? So that user can
>>>> have right expectation and reorder the actions as their intention like:
>>>> 	flow create ... actions set1 / set2 / set3 / port_id / end
>>>> 
>>>> Otherwise set1 and set2 will be lost according to your comment.
>>>> 
>>> I prefer to give error to user in validation because this is simple.
>> 
>> Good.
>> 
>>>> Or, how about making PMD do the right thing. I mean, even if the set actions are
>>>> scattered, PMD can collect it and apply in a single na attr?
>>>> 
>>> My feeling is the above approach will be (become) complex. It looks like we introduce
>>> new functionality which re-order all actions, something like rss_expand. 
>> 
>> +1
>> 
>>>>>>> +		switch ((*actions)->type) {
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
>>>>>>> +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
>>>>>>> +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
>>>>>>> +			flow_tcf_pedit_key_set_tp_port(*actions,
>>>>>>> +							&p_parser, item_flags);
>>>>>>> +			break;
>>>>>>> +		default:
>>>>>>> +			goto pedit_mnl_msg_done;
>>>>>>> +		}
>>>>>>> +	}
>>>>>>> +pedit_mnl_msg_done:
>>>>>>> +	p_parser.sel.action = TC_ACT_PIPE;
>>>>>>> +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
>>>>>>> +			sizeof(p_parser.sel) +
>>>>>>> +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
>>>>>>> +			&p_parser);
>>>>>>> +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
>>>>>>> +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
>>>>>>> +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
>>>>>>> +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
>>>>>>> +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
>>>>>>> +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
>>>>>>> +				 p_parser.keys_ex[i].htype);
>>>>>>> +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
>>>>>>> +				 p_parser.keys_ex[i].cmd);
>>>>>>> +		mnl_attr_nest_end(nl, na_pedit_key);
>>>>>>> +	}
>>>>>>> +	mnl_attr_nest_end(nl, na_pedit_keys);
>>>>>>> +	mnl_attr_nest_end(nl, na_act_options);
>>>>>>> +	(*actions)--;
>>>>>>> +	return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +/**
>>>>>>> + * Calculate max memory size of one TC-pedit actions.
>>>>>>> + * One TC-pedit action can contain set of keys each defining
>>>>>>> + * a rewrite element (rte_flow action)
>>>>>>> + *
>>>>>>> + * @param[in] actions
>>>>>>> + *   actions specification.
>>>>>>> + * @param[inout] action_flags
>>>>>>> + *   actions flags
>>>>>>> + * @param[inout] size
>>>>>>> + *   accumulated size
>>>>>>> + * @return
>>>>>>> + *   Max memory size of one TC-pedit action
>>>>>>> + */
>>>>>>> +static int
>>>>>>> +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
>>>>>>> +				uint64_t *action_flags)
>>>>>>> +{
>>>>>>> +	int pedit_size = 0;
>>>>>>> +	int keys = 0;
>>>>>>> +	uint64_t flags = 0;
>>>>>>> +
>>>>>>> +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
>>>>>>> +		      SZ_NLATTR_STRZ_OF("pedit") +
>>>>>>> +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
>>>>>>> +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
>>>>>>> +		switch ((*actions)->type) {
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_IPV4_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_IPV4_DST;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_IPV6_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_IPV6_DST;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
>>>>>>> +			/* TCP is as same as UDP */
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_TP_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
>>>>>>> +			/* TCP is as same as UDP */
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_TP_DST;
>>>>>>> +			break;
>>>>>>> +		default:
>>>>>>> +			goto get_pedit_action_size_done;
>>>>>>> +		}
>>>>>>> +	}
>>>>>>> +get_pedit_action_size_done:
>>>>>>> +	/* TCA_PEDIT_PARAMS_EX */
>>>>>>> +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
>>>>>>> +			keys * sizeof(struct tc_pedit_key));
>>>>>> 
>>>>>>> +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
>>>>>>> +	pedit_size += keys *
>>>>>>> +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
>>>>>>> +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
>>>>>>> +	(*action_flags) |= flags;
>>>>>>> +	(*actions)--;
>>>>>>> +	return pedit_size;
>>>>>>> +}
>>>>>>> +
>>>>>>> /**
>>>>>>>  * Retrieve mask for pattern item.
>>>>>>>  *
>>>>>>> @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>>>>>>> 			of_set_vlan_vid;
>>>>>>> 		const struct rte_flow_action_of_set_vlan_pcp *
>>>>>>> 			of_set_vlan_pcp;
>>>>>>> +		const struct rte_flow_action_set_ipv4 *set_ipv4;
>>>>>>> +		const struct rte_flow_action_set_ipv6 *set_ipv6;
>>>>>>> 	} conf;
>>>>>>> 	uint32_t item_flags = 0;
>>>>>>> 	uint32_t action_flags = 0;
>>>>>>> @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>>>>>>> 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
>>>>>>> 			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
>>>>>>> 			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_TP_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_TP_DST;
>>>>>>> +			break;
>>>>>>> 		default:
>>>>>>> 			return rte_flow_error_set(error, ENOTSUP,
>>>>>>> 						  RTE_FLOW_ERROR_TYPE_ACTION,
>>>>>>> 						  actions,
>>>>>>> 						  "action not supported");
>>>>>>> 		}
>>>>>>> +		if (IS_MODIFY_ACTION(actions->type)) {
>>>> 
>>>> This would be a redundant 'if' as classification is already done above. So, how
>>>> about adding a goto label at the end of this code - 'err_no_action_conf:', and
>>>> use goto above.  E.g.,
>>>> 
>>>> 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
>>>> 			action_flags |= MLX5_ACTION_SET_TP_DST;
>>>> 			if (!actions->conf)
>>>> 				goto err_no_action_conf;
>>>> 			break;
>>>> 
>>> In some level I do agree with you it's redundant. But things like this kind of
>>> redundancy is not avoidable. I mean if we use "goto err_no_action_conf", the
>>> "if (!actions->conf) goto err_no_action_conf" has to be repeated in each "case"
>>> which needs to check conf or you think it's acceptable ?
>> 
>> But in your approach, the redundant check (IS_MODIFY_ACTION) is inside a loop
>> and it has to be checked even if there's no 'set' action in the action list.
>> 
> Yes, it will check very time. But I doubt it will impact performace significantly.

Not a performance issue but I just wanted to make the code modular as much as possible.

> Another thing bother me is, if taking 'goto' approach, beside the 'if (!actions->conf)...',
> at this moment, we also need to add 'if (set action is discontinued)' for
> each modification case. I'm not sure if this will keep code clear
> considering many repeated code there...

And like I asked, this sanity check is also needed for other actions (PORT_ID and VLAN).
That's why I just wanted to have more generic way.

I'm not sure which way is clearer considering all that.

I don't have a strong objection about your current code but please make sure you add the
same sanity check for other existing acitons in a unified way. Also I hope the macro
(IS_MODIFY_ACTION) has better name.


Thanks,
Yongseok

>>>> 
>>>> And if I may, can I ask you to add the same to RTE_FLOW_ACTION_TYPE_PORT_ID?
>>>> 
>>> Yes, I will add.
>>>>>>> +			if (!actions->conf)
>>>>>>> +				return rte_flow_error_set(error, ENOTSUP,
>>>>>>> +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
>>>>>>> +						actions,
>>>>>>> +						"action configuration not set");
>>>>>>> +		}
>>>>>>> +	}
  

Patch

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index ca1de9f21..49b95e78e 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -346,6 +346,11 @@  mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		linux/tc_act/tc_vlan.h \
 		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
 		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_TC_ACT_PEDIT \
+		linux/tc_act/tc_pedit.h \
+		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
+		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
 		HAVE_SUPPORTED_40000baseKR4_Full \
 		/usr/include/linux/ethtool.h \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index fd93ac162..ef6a85101 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -182,6 +182,8 @@  if build
 		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
 		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
 		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
+		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
+		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
 		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
 		'RDMA_NL_NLDEV' ],
 		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 10d700a7f..be182a643 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -87,6 +87,12 @@ 
 #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
 #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
 #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
+#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
+#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
+#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
+#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
+#define MLX5_ACTION_SET_TP_SRC (1u << 15)
+#define MLX5_ACTION_SET_TP_DST (1u << 16)
 
 /* possible L3 layers protocols filtering. */
 #define MLX5_IP_PROTOCOL_TCP 6
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 14376188e..85c92f369 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -53,6 +53,62 @@  struct tc_vlan {
 
 #endif /* HAVE_TC_ACT_VLAN */
 
+#ifdef HAVE_TC_ACT_PEDIT
+
+#include <linux/tc_act/tc_pedit.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+enum {
+	TCA_PEDIT_UNSPEC,
+	TCA_PEDIT_TM,
+	TCA_PEDIT_PARMS,
+	TCA_PEDIT_PAD,
+	TCA_PEDIT_PARMS_EX,
+	TCA_PEDIT_KEYS_EX,
+	TCA_PEDIT_KEY_EX,
+	__TCA_PEDIT_MAX
+};
+
+enum {
+	TCA_PEDIT_KEY_EX_HTYPE = 1,
+	TCA_PEDIT_KEY_EX_CMD = 2,
+	__TCA_PEDIT_KEY_EX_MAX
+};
+
+enum pedit_header_type {
+	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
+	__PEDIT_HDR_TYPE_MAX,
+};
+
+enum pedit_cmd {
+	TCA_PEDIT_KEY_EX_CMD_SET = 0,
+	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
+	__PEDIT_CMD_MAX,
+};
+
+struct tc_pedit_key {
+	__u32           mask;  /* AND */
+	__u32           val;   /*XOR */
+	__u32           off;  /*offset */
+	__u32           at;
+	__u32           offmask;
+	__u32           shift;
+};
+
+struct tc_pedit_sel {
+	tc_gen;
+	unsigned char           nkeys;
+	unsigned char           flags;
+	struct tc_pedit_key     keys[0];
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
 /* Normally found in linux/netlink.h. */
 #ifndef NETLINK_CAP_ACK
 #define NETLINK_CAP_ACK 10
@@ -153,6 +209,14 @@  struct tc_vlan {
 #define IPV6_ADDR_LEN 16
 #endif
 
+#ifndef IPV4_ADDR_LEN
+#define IPV4_ADDR_LEN 4
+#endif
+
+#ifndef TP_PORT_LEN
+#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
+#endif
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -227,6 +291,220 @@  struct flow_tcf_ptoi {
 
 #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
 
+#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
+		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
+		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
+		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
+		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
+		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
+		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
+		1 : 0; })
+#define MAX_PEDIT_KEYS (128)
+#define SZ_PEDIT_KEY_VAL (4)
+
+struct pedit_key_ex {
+	enum pedit_header_type htype;
+	enum pedit_cmd cmd;
+};
+
+struct pedit_parser {
+	struct tc_pedit_sel sel;
+	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
+	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
+};
+
+static int
+flow_tcf_calc_pedit_keys(const uint64_t size)
+{
+	int keys = (size / SZ_PEDIT_KEY_VAL) +
+		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
+	return keys;
+}
+
+static void
+flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
+				struct pedit_parser *p_parser,
+				uint64_t item_flags)
+{
+	int idx = p_parser->sel.nkeys;
+
+	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
+	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
+	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+	p_parser->keys[idx].off =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
+	p_parser->keys[idx].mask = 0xFFFF0000;
+	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
+			actions->conf)->port;
+	p_parser->sel.nkeys = (++idx);
+}
+
+static void
+flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
+				 struct pedit_parser *p_parser)
+{
+	int idx = p_parser->sel.nkeys;
+	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
+	int off_base =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
+	const struct rte_flow_action_set_ipv6 *conf =
+		(const struct rte_flow_action_set_ipv6 *)actions->conf;
+
+	for (int i = 0; i < keys; i++, idx++) {
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
+		p_parser->keys[idx].mask = ~UINT32_MAX;
+		memcpy(&p_parser->keys[idx].val,
+			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
+			SZ_PEDIT_KEY_VAL);
+	}
+	p_parser->sel.nkeys += keys;
+}
+
+static void
+flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
+				 struct pedit_parser *p_parser)
+{
+	int idx = p_parser->sel.nkeys;
+
+	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+	p_parser->keys[idx].off =
+		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
+	p_parser->keys[idx].mask = ~UINT32_MAX;
+	p_parser->keys[idx].val =
+		((const struct rte_flow_action_set_ipv4 *)
+		 actions->conf)->ipv4_addr;
+	p_parser->sel.nkeys = (++idx);
+}
+
+static int
+flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
+			      const struct rte_flow_action **actions,
+			      uint64_t item_flags)
+{
+	struct pedit_parser p_parser;
+
+	memset(&p_parser, 0, sizeof(p_parser));
+	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
+	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
+							    TCA_ACT_OPTIONS);
+	/* all modify header actions should be in one tc-pedit action */
+	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+		switch ((*actions)->type) {
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			flow_tcf_pedit_key_set_tp_port(*actions,
+							&p_parser, item_flags);
+			break;
+		default:
+			goto pedit_mnl_msg_done;
+		}
+	}
+pedit_mnl_msg_done:
+	p_parser.sel.action = TC_ACT_PIPE;
+	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
+			sizeof(p_parser.sel) +
+			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
+			&p_parser);
+	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
+					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
+	for (int i = 0; i < p_parser.sel.nkeys; i++) {
+		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
+					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
+		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
+				 p_parser.keys_ex[i].htype);
+		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
+				 p_parser.keys_ex[i].cmd);
+		mnl_attr_nest_end(nl, na_pedit_key);
+	}
+	mnl_attr_nest_end(nl, na_pedit_keys);
+	mnl_attr_nest_end(nl, na_act_options);
+	(*actions)--;
+	return 0;
+}
+
+/**
+ * Calculate max memory size of one TC-pedit actions.
+ * One TC-pedit action can contain set of keys each defining
+ * a rewrite element (rte_flow action)
+ *
+ * @param[in] actions
+ *   actions specification.
+ * @param[inout] action_flags
+ *   actions flags
+ * @param[inout] size
+ *   accumulated size
+ * @return
+ *   Max memory size of one TC-pedit action
+ */
+static int
+flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
+				uint64_t *action_flags)
+{
+	int pedit_size = 0;
+	int keys = 0;
+	uint64_t flags = 0;
+
+	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
+		      SZ_NLATTR_STRZ_OF("pedit") +
+		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
+	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+		switch ((*actions)->type) {
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
+			flags |= MLX5_ACTION_SET_IPV4_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
+			flags |= MLX5_ACTION_SET_IPV4_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
+			flags |= MLX5_ACTION_SET_IPV6_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
+			flags |= MLX5_ACTION_SET_IPV6_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+			/* TCP is as same as UDP */
+			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
+			flags |= MLX5_ACTION_SET_TP_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			/* TCP is as same as UDP */
+			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
+			flags |= MLX5_ACTION_SET_TP_DST;
+			break;
+		default:
+			goto get_pedit_action_size_done;
+		}
+	}
+get_pedit_action_size_done:
+	/* TCA_PEDIT_PARAMS_EX */
+	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
+			keys * sizeof(struct tc_pedit_key));
+	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
+	pedit_size += keys *
+		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
+		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
+	(*action_flags) |= flags;
+	(*actions)--;
+	return pedit_size;
+}
+
 /**
  * Retrieve mask for pattern item.
  *
@@ -430,6 +708,8 @@  flow_tcf_validate(struct rte_eth_dev *dev,
 			of_set_vlan_vid;
 		const struct rte_flow_action_of_set_vlan_pcp *
 			of_set_vlan_pcp;
+		const struct rte_flow_action_set_ipv4 *set_ipv4;
+		const struct rte_flow_action_set_ipv6 *set_ipv6;
 	} conf;
 	uint32_t item_flags = 0;
 	uint32_t action_flags = 0;
@@ -690,12 +970,64 @@  flow_tcf_validate(struct rte_eth_dev *dev,
 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
 			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			action_flags |= MLX5_ACTION_SET_IPV4_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			action_flags |= MLX5_ACTION_SET_IPV6_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+			action_flags |= MLX5_ACTION_SET_TP_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			action_flags |= MLX5_ACTION_SET_TP_DST;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
 						  actions,
 						  "action not supported");
 		}
+		if (IS_MODIFY_ACTION(actions->type)) {
+			if (!actions->conf)
+				return rte_flow_error_set(error, ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+						actions,
+						"action configuration not set");
+		}
+	}
+	if (action_flags &
+	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
+		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "no ipv4 item found in"
+						  " pattern");
+	}
+	if (action_flags &
+	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
+		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
+			return rte_flow_error_set(error, ENOTSUP,
+				RTE_FLOW_ERROR_TYPE_ACTION,
+				actions,
+				"no ipv6 item found in pattern");
+	}
+	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
+		if (!(item_flags &
+		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
+		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
+			return rte_flow_error_set(error, ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						actions,
+						"no TCP/UDP item found in"
+						" pattern");
 	}
 	return 0;
 }
@@ -840,6 +1172,15 @@  flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
 				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
 				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			size += flow_tcf_get_pedit_actions_size(&actions,
+								&flags);
+			break;
 		default:
 			DRV_LOG(WARNING,
 				"unsupported action %p type %d,"
@@ -998,6 +1339,7 @@  flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 	struct nlattr *na_flower_act;
 	struct nlattr *na_vlan_id = NULL;
 	struct nlattr *na_vlan_priority = NULL;
+	uint64_t item_flags = 0;
 
 	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
 						PTOI_TABLE_SZ_MAX(dev)));
@@ -1189,6 +1531,7 @@  flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_UDP:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
 			mask.udp = flow_tcf_item_mask
 				(items, &rte_flow_item_udp_mask,
 				 &flow_tcf_mask_supported.udp,
@@ -1218,6 +1561,7 @@  flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_TCP:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
 			mask.tcp = flow_tcf_item_mask
 				(items, &rte_flow_item_tcp_mask,
 				 &flow_tcf_mask_supported.tcp,
@@ -1368,6 +1712,18 @@  flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 					conf.of_set_vlan_pcp->vlan_pcp;
 			}
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			na_act_index =
+				mnl_attr_nest_start(nlh, na_act_index_cur++);
+			flow_tcf_create_pedit_mnl_msg(nlh,
+						      &actions, item_flags);
+			mnl_attr_nest_end(nlh, na_act_index);
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,