[v4] net/mlx5: support metadata as flow rule criteria
Checks
Commit Message
As described in series starting at [1], it adds option to set
metadata value as match pattern when creating a new flow rule.
This patch adds metadata support in mlx5 driver, in two parts:
- Add the validation and setting of metadata value in matcher,
when creating a new flow rule.
- Add the passing of metadata value from mbuf to wqe when
indicated by ol_flag, in different burst functions.
[1] "ethdev: support metadata as flow rule criteria"
http://mails.dpdk.org/archives/dev/2018-October/115469.html
---
v4:
- Rebase.
- Apply code review comments.
v3:
- Update meta item validation.
v2:
- Split the support of egress rules to a different patch.
---
Signed-off-by: Dekel Peled <dekelp@mellanox.com>
---
drivers/net/mlx5/mlx5_flow.c | 2 +-
drivers/net/mlx5/mlx5_flow.h | 8 +++
drivers/net/mlx5/mlx5_flow_dv.c | 109 ++++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_prm.h | 2 +-
drivers/net/mlx5/mlx5_rxtx.c | 33 ++++++++--
drivers/net/mlx5/mlx5_rxtx_vec.c | 38 +++++++++---
drivers/net/mlx5/mlx5_rxtx_vec.h | 1 +
drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 9 ++-
drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 10 ++--
drivers/net/mlx5/mlx5_txq.c | 6 ++
10 files changed, 192 insertions(+), 26 deletions(-)
Comments
On Wed, Oct 17, 2018 at 02:53:37PM +0300, Dekel Peled wrote:
> As described in series starting at [1], it adds option to set
> metadata value as match pattern when creating a new flow rule.
>
> This patch adds metadata support in mlx5 driver, in two parts:
> - Add the validation and setting of metadata value in matcher,
> when creating a new flow rule.
> - Add the passing of metadata value from mbuf to wqe when
> indicated by ol_flag, in different burst functions.
>
> [1] "ethdev: support metadata as flow rule criteria"
> http://mails.dpdk.org/archives/dev/2018-October/115469.html
>
> ---
> v4:
> - Rebase.
> - Apply code review comments.
> v3:
> - Update meta item validation.
> v2:
> - Split the support of egress rules to a different patch.
> ---
>
> Signed-off-by: Dekel Peled <dekelp@mellanox.com>
> ---
> drivers/net/mlx5/mlx5_flow.c | 2 +-
> drivers/net/mlx5/mlx5_flow.h | 8 +++
> drivers/net/mlx5/mlx5_flow_dv.c | 109 ++++++++++++++++++++++++++++++++++
> drivers/net/mlx5/mlx5_prm.h | 2 +-
> drivers/net/mlx5/mlx5_rxtx.c | 33 ++++++++--
> drivers/net/mlx5/mlx5_rxtx_vec.c | 38 +++++++++---
> drivers/net/mlx5/mlx5_rxtx_vec.h | 1 +
> drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 9 ++-
> drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 10 ++--
> drivers/net/mlx5/mlx5_txq.c | 6 ++
> 10 files changed, 192 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index bd70fce..15262f6 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -418,7 +418,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
> * @return
> * 0 on success, a negative errno value otherwise and rte_errno is set.
> */
> -static int
> +int
> mlx5_flow_item_acceptable(const struct rte_flow_item *item,
> const uint8_t *mask,
> const uint8_t *nic_mask,
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 094f666..834a6ed 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -43,6 +43,9 @@
> #define MLX5_FLOW_LAYER_GRE (1u << 14)
> #define MLX5_FLOW_LAYER_MPLS (1u << 15)
>
> +/* General pattern items bits. */
> +#define MLX5_FLOW_ITEM_METADATA (1u << 16)
> +
> /* Outer Masks. */
> #define MLX5_FLOW_LAYER_OUTER_L3 \
> (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
> @@ -307,6 +310,11 @@ int mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
> int mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
> const struct rte_flow_attr *attributes,
> struct rte_flow_error *error);
> +int mlx5_flow_item_acceptable(const struct rte_flow_item *item,
> + const uint8_t *mask,
> + const uint8_t *nic_mask,
> + unsigned int size,
> + struct rte_flow_error *error);
> int mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
> uint64_t item_flags,
> struct rte_flow_error *error);
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
> index a013201..bfddfab 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -36,6 +36,69 @@
> #ifdef HAVE_IBV_FLOW_DV_SUPPORT
>
> /**
> + * Validate META item.
> + *
> + * @param[in] dev
> + * Pointer to the rte_eth_dev structure.
> + * @param[in] item
> + * Item specification.
> + * @param[in] attr
> + * Attributes of flow that includes this item.
> + * @param[out] error
> + * Pointer to error structure.
> + *
> + * @return
> + * 0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_validate_item_meta(struct rte_eth_dev *dev,
> + const struct rte_flow_item *item,
> + const struct rte_flow_attr *attr,
> + struct rte_flow_error *error)
> +{
> + const struct rte_flow_item_meta *spec = item->spec;
> + const struct rte_flow_item_meta *mask = item->mask;
> +
No blank line.
> + const struct rte_flow_item_meta nic_mask = {
> + .data = RTE_BE32(UINT32_MAX)
> + };
> +
Ditto.
> + int ret;
> + uint64_t offloads = dev->data->dev_conf.txmode.offloads;
> +
> + if (!(offloads & DEV_TX_OFFLOAD_MATCH_METADATA))
> + return rte_flow_error_set(error, EPERM,
> + RTE_FLOW_ERROR_TYPE_ITEM,
> + NULL,
> + "match on metadata offload "
> + "configuration is off for this port");
> + if (!spec)
> + return rte_flow_error_set(error, EINVAL,
> + RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
> + item->spec,
> + "data cannot be empty");
> + if (!spec->data)
> + return rte_flow_error_set(error, EINVAL,
> + RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
> + NULL,
> + "data cannot be zero");
> + if (!mask)
> + mask = &rte_flow_item_meta_mask;
> + ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
> + (const uint8_t *)&nic_mask,
> + sizeof(struct rte_flow_item_meta),
> + error);
> + if (ret < 0)
> + return ret;
> + if (attr->ingress)
> + return rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> + NULL,
> + "pattern not supported for ingress");
> + return 0;
> +}
> +
> +/**
> * Verify the @p attributes will be correctly understood by the NIC and store
> * them in the @p flow if everything is correct.
> *
> @@ -214,6 +277,13 @@
> return ret;
> item_flags |= MLX5_FLOW_LAYER_MPLS;
> break;
> + case RTE_FLOW_ITEM_TYPE_META:
> + ret = flow_dv_validate_item_meta(dev, items, attr,
> + error);
> + if (ret < 0)
> + return ret;
> + item_flags |= MLX5_FLOW_ITEM_METADATA;
> + break;
> default:
> return rte_flow_error_set(error, ENOTSUP,
> RTE_FLOW_ERROR_TYPE_ITEM,
> @@ -855,6 +925,42 @@
> }
>
> /**
> + * Add META item to matcher
> + *
> + * @param[in, out] matcher
> + * Flow matcher.
> + * @param[in, out] key
> + * Flow matcher value.
> + * @param[in] item
> + * Flow pattern to translate.
> + * @param[in] inner
> + * Item is inner pattern.
> + */
> +static void
> +flow_dv_translate_item_meta(void *matcher, void *key,
> + const struct rte_flow_item *item)
> +{
> + const struct rte_flow_item_meta *meta_m;
> + const struct rte_flow_item_meta *meta_v;
> +
> + void *misc2_m =
> + MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters_2);
> + void *misc2_v =
> + MLX5_ADDR_OF(fte_match_param, key, misc_parameters_2);
> +
> + meta_m = (const void *)item->mask;
> + if (!meta_m)
> + meta_m = &rte_flow_item_meta_mask;
> + meta_v = (const void *)item->spec;
> + if (meta_v) {
> + MLX5_SET(fte_match_set_misc2, misc2_m, metadata_reg_a,
> + RTE_BE32(meta_m->data));
Nope. RTE_BE32() is for builtin constant, not for a variable.
You should use rte_cpu_to_be_32() instead.
> + MLX5_SET(fte_match_set_misc2, misc2_v, metadata_reg_a,
> + RTE_BE32(meta_v->data));
Same here.
> + }
> +}
> +
> +/**
> * Update the matcher and the value based the selected item.
> *
> * @param[in, out] matcher
> @@ -940,6 +1046,9 @@
> flow_dv_translate_item_vxlan(tmatcher->mask.buf, key, item,
> inner);
> break;
> + case RTE_FLOW_ITEM_TYPE_META:
> + flow_dv_translate_item_meta(tmatcher->mask.buf, key, item);
> + break;
> default:
> break;
> }
> diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
> index 69296a0..29742b1 100644
> --- a/drivers/net/mlx5/mlx5_prm.h
> +++ b/drivers/net/mlx5/mlx5_prm.h
> @@ -159,7 +159,7 @@ struct mlx5_wqe_eth_seg_small {
> uint8_t cs_flags;
> uint8_t rsvd1;
> uint16_t mss;
> - uint32_t rsvd2;
> + uint32_t flow_table_metadata;
> uint16_t inline_hdr_sz;
> uint8_t inline_hdr[2];
> } __rte_aligned(MLX5_WQE_DWORD_SIZE);
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 558e6b6..5b4d2fd 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -523,6 +523,7 @@
> uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);
> uint32_t swp_offsets = 0;
> uint8_t swp_types = 0;
> + uint32_t metadata;
> uint16_t tso_segsz = 0;
> #ifdef MLX5_PMD_SOFT_COUNTERS
> uint32_t total_length = 0;
> @@ -566,6 +567,10 @@
> cs_flags = txq_ol_cksum_to_cs(buf);
> txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types);
> raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
> + /* Copy metadata from mbuf if valid */
> + metadata = buf->ol_flags & PKT_TX_METADATA ?
> + buf->tx_metadata : 0;
Indentation.
> +
No blank line.
> /* Replace the Ethernet type by the VLAN if necessary. */
> if (buf->ol_flags & PKT_TX_VLAN_PKT) {
> uint32_t vlan = rte_cpu_to_be_32(0x81000000 |
> @@ -781,7 +786,7 @@
> swp_offsets,
> cs_flags | (swp_types << 8) |
> (rte_cpu_to_be_16(tso_segsz) << 16),
> - 0,
> + rte_cpu_to_be_32(metadata),
> (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),
> };
> } else {
> @@ -795,7 +800,7 @@
> wqe->eseg = (rte_v128u32_t){
> swp_offsets,
> cs_flags | (swp_types << 8),
> - 0,
> + rte_cpu_to_be_32(metadata),
> (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),
> };
> }
> @@ -861,7 +866,7 @@
> mpw->wqe->eseg.inline_hdr_sz = 0;
> mpw->wqe->eseg.rsvd0 = 0;
> mpw->wqe->eseg.rsvd1 = 0;
> - mpw->wqe->eseg.rsvd2 = 0;
> + mpw->wqe->eseg.flow_table_metadata = 0;
> mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
> (txq->wqe_ci << 8) |
> MLX5_OPCODE_TSO);
> @@ -948,6 +953,7 @@
> uint32_t length;
> unsigned int segs_n = buf->nb_segs;
> uint32_t cs_flags;
> + uint32_t metadata;
>
> /*
> * Make sure there is enough room to store this packet and
> @@ -964,6 +970,9 @@
> max_elts -= segs_n;
> --pkts_n;
> cs_flags = txq_ol_cksum_to_cs(buf);
> + /* Copy metadata from mbuf if valid */
> + metadata = buf->ol_flags & PKT_TX_METADATA ?
> + buf->tx_metadata : 0;
Indentation.
And no need to change to big-endian? I think it needs.
> /* Retrieve packet information. */
> length = PKT_LEN(buf);
> assert(length);
> @@ -971,6 +980,7 @@
> if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
> ((mpw.len != length) ||
> (segs_n != 1) ||
> + (mpw.wqe->eseg.flow_table_metadata != metadata) ||
> (mpw.wqe->eseg.cs_flags != cs_flags)))
> mlx5_mpw_close(txq, &mpw);
> if (mpw.state == MLX5_MPW_STATE_CLOSED) {
> @@ -984,6 +994,7 @@
> max_wqe -= 2;
> mlx5_mpw_new(txq, &mpw, length);
> mpw.wqe->eseg.cs_flags = cs_flags;
> + mpw.wqe->eseg.flow_table_metadata = metadata;
> }
> /* Multi-segment packets must be alone in their MPW. */
> assert((segs_n == 1) || (mpw.pkts_n == 0));
> @@ -1082,7 +1093,7 @@
> mpw->wqe->eseg.cs_flags = 0;
> mpw->wqe->eseg.rsvd0 = 0;
> mpw->wqe->eseg.rsvd1 = 0;
> - mpw->wqe->eseg.rsvd2 = 0;
> + mpw->wqe->eseg.flow_table_metadata = 0;
> inl = (struct mlx5_wqe_inl_small *)
> (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
> mpw->data.raw = (uint8_t *)&inl->raw;
> @@ -1172,6 +1183,7 @@
> uint32_t length;
> unsigned int segs_n = buf->nb_segs;
> uint8_t cs_flags;
> + uint32_t metadata;
>
> /*
> * Make sure there is enough room to store this packet and
> @@ -1193,18 +1205,23 @@
> */
> max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
> cs_flags = txq_ol_cksum_to_cs(buf);
> + /* Copy metadata from mbuf if valid */
> + metadata = buf->ol_flags & PKT_TX_METADATA ?
> + buf->tx_metadata : 0;
Indentation.
And no need to change to big-endian?
> /* Retrieve packet information. */
> length = PKT_LEN(buf);
> /* Start new session if packet differs. */
> if (mpw.state == MLX5_MPW_STATE_OPENED) {
> if ((mpw.len != length) ||
> (segs_n != 1) ||
> + (mpw.wqe->eseg.flow_table_metadata != metadata) ||
> (mpw.wqe->eseg.cs_flags != cs_flags))
> mlx5_mpw_close(txq, &mpw);
> } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
> if ((mpw.len != length) ||
> (segs_n != 1) ||
> (length > inline_room) ||
> + (mpw.wqe->eseg.flow_table_metadata != metadata) ||
> (mpw.wqe->eseg.cs_flags != cs_flags)) {
> mlx5_mpw_inline_close(txq, &mpw);
> inline_room =
> @@ -1224,12 +1241,14 @@
> max_wqe -= 2;
> mlx5_mpw_new(txq, &mpw, length);
> mpw.wqe->eseg.cs_flags = cs_flags;
> + mpw.wqe->eseg.flow_table_metadata = metadata;
> } else {
> if (unlikely(max_wqe < wqe_inl_n))
> break;
> max_wqe -= wqe_inl_n;
> mlx5_mpw_inline_new(txq, &mpw, length);
> mpw.wqe->eseg.cs_flags = cs_flags;
> + mpw.wqe->eseg.flow_table_metadata = metadata;
> }
> }
> /* Multi-segment packets must be alone in their MPW. */
> @@ -1461,6 +1480,7 @@
> unsigned int do_inline = 0; /* Whether inline is possible. */
> uint32_t length;
> uint8_t cs_flags;
> + uint32_t metadata;
>
> /* Multi-segmented packet is handled in slow-path outside. */
> assert(NB_SEGS(buf) == 1);
> @@ -1468,6 +1488,9 @@
> if (max_elts - j == 0)
> break;
> cs_flags = txq_ol_cksum_to_cs(buf);
> + /* Copy metadata from mbuf if valid */
> + metadata = buf->ol_flags & PKT_TX_METADATA ?
> + buf->tx_metadata : 0;
Indentation.
And no need to change to big-endian?
> /* Retrieve packet information. */
> length = PKT_LEN(buf);
> /* Start new session if:
> @@ -1482,6 +1505,7 @@
> (length <= txq->inline_max_packet_sz &&
> inl_pad + sizeof(inl_hdr) + length >
> mpw_room) ||
> + (mpw.wqe->eseg.flow_table_metadata != metadata) ||
> (mpw.wqe->eseg.cs_flags != cs_flags))
> max_wqe -= mlx5_empw_close(txq, &mpw);
> }
> @@ -1505,6 +1529,7 @@
> sizeof(inl_hdr) + length <= mpw_room &&
> !txq->mpw_hdr_dseg;
> mpw.wqe->eseg.cs_flags = cs_flags;
> + mpw.wqe->eseg.flow_table_metadata = metadata;
> } else {
> /* Evaluate whether the next packet can be inlined.
> * Inlininig is possible when:
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
> index 0a4aed8..16a8608 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec.c
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
> @@ -41,6 +41,8 @@
>
> /**
> * Count the number of packets having same ol_flags and calculate cs_flags.
> + * If PKT_TX_METADATA is set in ol_flags, packets must have same metadata
> + * as well.
Packets can have different metadata but we just want to count the number of
packets having same data. Please correct the comment.
> *
> * @param pkts
> * Pointer to array of packets.
> @@ -48,26 +50,41 @@
> * Number of packets.
> * @param cs_flags
> * Pointer of flags to be returned.
> + * @param metadata
> + * Pointer of metadata to be returned.
> + * @param txq_offloads
> + * Offloads enabled on Tx queue
> *
> * @return
> - * Number of packets having same ol_flags.
> + * Number of packets having same ol_flags and metadata, if relevant.
> */
> static inline unsigned int
> -txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags)
> +txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags,
> + uint32_t *metadata, const uint64_t txq_offloads)
> {
> unsigned int pos;
> const uint64_t ol_mask =
> PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
> PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
> - PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
> + PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM | PKT_TX_METADATA;
Shouldn't add PKT_TX_METADATA. As it is for cksum, you might rather want to
change the name, e.g., cksum_ol_mask.
>
> if (!pkts_n)
> return 0;
> /* Count the number of packets having same ol_flags. */
This comment has to be corrected and moved.
> - for (pos = 1; pos < pkts_n; ++pos)
> - if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
> + for (pos = 1; pos < pkts_n; ++pos) {
> + if ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) &&
> + ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask))
Indentation.
> break;
> + /* If the metadata ol_flag is set,
> + * metadata must be same in all packets.
> + */
Correct comment. First line should be empty for multi-line comment.
And it can't be 'must'. We are not forcing it but just counting the number of
packets having same metadata like I mentioned above.
> + if ((txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) &&
> + (pkts[pos]->ol_flags & PKT_TX_METADATA) &&
> + pkts[0]->tx_metadata != pkts[pos]->tx_metadata)
Disagree. What if pkts[0] doesn't have PKT_TXT_METADATA while pkt[1] has it?
And, indentation.
> + break;
> + }
> *cs_flags = txq_ol_cksum_to_cs(pkts[0]);
> + *metadata = rte_cpu_to_be_32(pkts[0]->tx_metadata);
Same here. You should check if pkts[0] has metadata first.
> return pos;
Here's my suggestion for the whole func.
unsigned int pos;
const uint64_t cksum_ol_mask =
PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
const uint32_t p0_metadata;
if (!pkts_n)
return 0;
p0_metadata = pkts[0]->ol_flags & PKT_TX_METADATA ?
pkts[0]->tx_metadata : 0;
/* Count the number of packets having same offload parameters. */
for (pos = 1; pos < pkts_n; ++pos) {
/* Check if packet can have same checksum flags. */
if ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) &&
((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & cksum_ol_mask))
break;
/* Check if packet has same metadata. */
if (txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) {
const uint32_t p1_metadata =
pkts[pos]->ol_flags & PKT_TX_METADATA ?
pkts[pos]->tx_metadata : 0;
if (p1_metadata != p0_metadata)
break;
}
}
*cs_flags = txq_ol_cksum_to_cs(pkts[0]);
*metadata = rte_cpu_to_be_32(p0_metadata);
return pos;
> }
>
> @@ -96,7 +113,7 @@
> uint16_t ret;
>
> n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
> - ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
> + ret = txq_burst_v(txq, &pkts[nb_tx], n, 0, 0);
> nb_tx += ret;
> if (!ret)
> break;
> @@ -127,6 +144,7 @@
> uint8_t cs_flags = 0;
> uint16_t n;
> uint16_t ret;
> + uint32_t metadata = 0;
Let's use rte_be32_t instead.
>
> /* Transmit multi-seg packets in the head of pkts list. */
> if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
> @@ -137,9 +155,11 @@
> n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
> if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> n = txq_count_contig_single_seg(&pkts[nb_tx], n);
> - if (txq->offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP)
> - n = txq_calc_offload(&pkts[nb_tx], n, &cs_flags);
> - ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
> + if (txq->offloads & (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP |
> + DEV_TX_OFFLOAD_MATCH_METADATA))
Indentation.
> + n = txq_calc_offload(&pkts[nb_tx], n,
> + &cs_flags, &metadata, txq->offloads);
Indentation.
> + ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags, metadata);
> nb_tx += ret;
> if (!ret)
> break;
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h
> index fb884f9..fda7004 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec.h
> @@ -22,6 +22,7 @@
> /* HW offload capabilities of vectorized Tx. */
> #define MLX5_VEC_TX_OFFLOAD_CAP \
> (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP | \
> + DEV_TX_OFFLOAD_MATCH_METADATA | \
> DEV_TX_OFFLOAD_MULTI_SEGS)
>
> /*
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> index b37b738..a8a4d7b 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> @@ -201,13 +201,15 @@
> * Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
> * @param cs_flags
> * Checksum offload flags to be written in the descriptor.
> + * @param metadata
> + * Metadata value to be written in the descriptor.
> *
> * @return
> * Number of packets successfully transmitted (<= pkts_n).
> */
> static inline uint16_t
> txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
> - uint8_t cs_flags)
> + uint8_t cs_flags, uint32_t metadata)
Let's use rte_be32_t instead.
> {
> struct rte_mbuf **elts;
> uint16_t elts_head = txq->elts_head;
> @@ -294,10 +296,7 @@
> vst1q_u8((void *)t_wqe, ctrl);
> /* Fill ESEG in the header. */
> vst1q_u8((void *)(t_wqe + 1),
> - ((uint8x16_t) { 0, 0, 0, 0,
> - cs_flags, 0, 0, 0,
> - 0, 0, 0, 0,
> - 0, 0, 0, 0 }));
> + ((uint32x4_t) { 0, cs_flags, metadata, 0 }));
> #ifdef MLX5_PMD_SOFT_COUNTERS
> txq->stats.opackets += pkts_n;
> #endif
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> index 54b3783..31aae4a 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> @@ -202,13 +202,15 @@
> * Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
> * @param cs_flags
> * Checksum offload flags to be written in the descriptor.
> + * @param metadata
> + * Metadata value to be written in the descriptor.
> *
> * @return
> * Number of packets successfully transmitted (<= pkts_n).
> */
> static inline uint16_t
> txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
> - uint8_t cs_flags)
> + uint8_t cs_flags, uint32_t metadata)
Let's use rte_be32_t instead.
> {
> struct rte_mbuf **elts;
> uint16_t elts_head = txq->elts_head;
> @@ -292,11 +294,7 @@
> ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
> _mm_store_si128(t_wqe, ctrl);
> /* Fill ESEG in the header. */
> - _mm_store_si128(t_wqe + 1,
> - _mm_set_epi8(0, 0, 0, 0,
> - 0, 0, 0, 0,
> - 0, 0, 0, cs_flags,
> - 0, 0, 0, 0));
> + _mm_store_si128(t_wqe + 1, _mm_set_epi32(0, metadata, cs_flags, 0));
> #ifdef MLX5_PMD_SOFT_COUNTERS
> txq->stats.opackets += pkts_n;
> #endif
> diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
> index f9bc473..7263fb1 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -128,6 +128,12 @@
> offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
> DEV_TX_OFFLOAD_GRE_TNL_TSO);
> }
> +
Please no blank line.
> +#ifdef HAVE_IBV_FLOW_DV_SUPPORT
> + if (config->dv_flow_en)
> + offloads |= DEV_TX_OFFLOAD_MATCH_METADATA;
> +#endif
> +
Same here.
> return offloads;
> }
>
> --
> 1.8.3.1
>
Thanks, PSB.
> -----Original Message-----
> From: Yongseok Koh
> Sent: Thursday, October 18, 2018 11:01 AM
> To: Dekel Peled <dekelp@mellanox.com>
> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org; Ori Kam
> <orika@mellanox.com>
> Subject: Re: [PATCH v4] net/mlx5: support metadata as flow rule criteria
>
> On Wed, Oct 17, 2018 at 02:53:37PM +0300, Dekel Peled wrote:
> > As described in series starting at [1], it adds option to set metadata
> > value as match pattern when creating a new flow rule.
> >
> > This patch adds metadata support in mlx5 driver, in two parts:
> > - Add the validation and setting of metadata value in matcher,
> > when creating a new flow rule.
> > - Add the passing of metadata value from mbuf to wqe when
> > indicated by ol_flag, in different burst functions.
> >
> > [1] "ethdev: support metadata as flow rule criteria"
> > http://mails.dpdk.org/archives/dev/2018-October/115469.html
> >
> > ---
> > v4:
> > - Rebase.
> > - Apply code review comments.
> > v3:
> > - Update meta item validation.
> > v2:
> > - Split the support of egress rules to a different patch.
> > ---
> >
> > Signed-off-by: Dekel Peled <dekelp@mellanox.com>
> > ---
> > drivers/net/mlx5/mlx5_flow.c | 2 +-
> > drivers/net/mlx5/mlx5_flow.h | 8 +++
> > drivers/net/mlx5/mlx5_flow_dv.c | 109
> ++++++++++++++++++++++++++++++++++
> > drivers/net/mlx5/mlx5_prm.h | 2 +-
> > drivers/net/mlx5/mlx5_rxtx.c | 33 ++++++++--
> > drivers/net/mlx5/mlx5_rxtx_vec.c | 38 +++++++++---
> > drivers/net/mlx5/mlx5_rxtx_vec.h | 1 +
> > drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 9 ++-
> > drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 10 ++--
> > drivers/net/mlx5/mlx5_txq.c | 6 ++
> > 10 files changed, 192 insertions(+), 26 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/mlx5_flow.c
> > b/drivers/net/mlx5/mlx5_flow.c index bd70fce..15262f6 100644
> > --- a/drivers/net/mlx5/mlx5_flow.c
> > +++ b/drivers/net/mlx5/mlx5_flow.c
> > @@ -418,7 +418,7 @@ uint32_t mlx5_flow_adjust_priority(struct
> rte_eth_dev *dev, int32_t priority,
> > * @return
> > * 0 on success, a negative errno value otherwise and rte_errno is set.
> > */
> > -static int
> > +int
> > mlx5_flow_item_acceptable(const struct rte_flow_item *item,
> > const uint8_t *mask,
> > const uint8_t *nic_mask,
> > diff --git a/drivers/net/mlx5/mlx5_flow.h
> > b/drivers/net/mlx5/mlx5_flow.h index 094f666..834a6ed 100644
> > --- a/drivers/net/mlx5/mlx5_flow.h
> > +++ b/drivers/net/mlx5/mlx5_flow.h
> > @@ -43,6 +43,9 @@
> > #define MLX5_FLOW_LAYER_GRE (1u << 14) #define
> MLX5_FLOW_LAYER_MPLS
> > (1u << 15)
> >
> > +/* General pattern items bits. */
> > +#define MLX5_FLOW_ITEM_METADATA (1u << 16)
> > +
> > /* Outer Masks. */
> > #define MLX5_FLOW_LAYER_OUTER_L3 \
> > (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
> MLX5_FLOW_LAYER_OUTER_L3_IPV6) @@
> > -307,6 +310,11 @@ int mlx5_flow_validate_action_rss(const struct
> > rte_flow_action *action, int mlx5_flow_validate_attributes(struct
> rte_eth_dev *dev,
> > const struct rte_flow_attr *attributes,
> > struct rte_flow_error *error);
> > +int mlx5_flow_item_acceptable(const struct rte_flow_item *item,
> > + const uint8_t *mask,
> > + const uint8_t *nic_mask,
> > + unsigned int size,
> > + struct rte_flow_error *error);
> > int mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
> > uint64_t item_flags,
> > struct rte_flow_error *error);
> > diff --git a/drivers/net/mlx5/mlx5_flow_dv.c
> > b/drivers/net/mlx5/mlx5_flow_dv.c index a013201..bfddfab 100644
> > --- a/drivers/net/mlx5/mlx5_flow_dv.c
> > +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> > @@ -36,6 +36,69 @@
> > #ifdef HAVE_IBV_FLOW_DV_SUPPORT
> >
> > /**
> > + * Validate META item.
> > + *
> > + * @param[in] dev
> > + * Pointer to the rte_eth_dev structure.
> > + * @param[in] item
> > + * Item specification.
> > + * @param[in] attr
> > + * Attributes of flow that includes this item.
> > + * @param[out] error
> > + * Pointer to error structure.
> > + *
> > + * @return
> > + * 0 on success, a negative errno value otherwise and rte_errno is set.
> > + */
> > +static int
> > +flow_dv_validate_item_meta(struct rte_eth_dev *dev,
> > + const struct rte_flow_item *item,
> > + const struct rte_flow_attr *attr,
> > + struct rte_flow_error *error)
> > +{
> > + const struct rte_flow_item_meta *spec = item->spec;
> > + const struct rte_flow_item_meta *mask = item->mask;
> > +
>
> No blank line.
Removed.
>
> > + const struct rte_flow_item_meta nic_mask = {
> > + .data = RTE_BE32(UINT32_MAX)
> > + };
> > +
>
> Ditto.
Removed.
>
> > + int ret;
> > + uint64_t offloads = dev->data->dev_conf.txmode.offloads;
> > +
> > + if (!(offloads & DEV_TX_OFFLOAD_MATCH_METADATA))
> > + return rte_flow_error_set(error, EPERM,
> > + RTE_FLOW_ERROR_TYPE_ITEM,
> > + NULL,
> > + "match on metadata offload "
> > + "configuration is off for this port");
> > + if (!spec)
> > + return rte_flow_error_set(error, EINVAL,
> > +
> RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
> > + item->spec,
> > + "data cannot be empty");
> > + if (!spec->data)
> > + return rte_flow_error_set(error, EINVAL,
> > +
> RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
> > + NULL,
> > + "data cannot be zero");
> > + if (!mask)
> > + mask = &rte_flow_item_meta_mask;
> > + ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
> > + (const uint8_t *)&nic_mask,
> > + sizeof(struct rte_flow_item_meta),
> > + error);
> > + if (ret < 0)
> > + return ret;
> > + if (attr->ingress)
> > + return rte_flow_error_set(error, ENOTSUP,
> > +
> RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> > + NULL,
> > + "pattern not supported for
> ingress");
> > + return 0;
> > +}
> > +
> > +/**
> > * Verify the @p attributes will be correctly understood by the NIC and
> store
> > * them in the @p flow if everything is correct.
> > *
> > @@ -214,6 +277,13 @@
> > return ret;
> > item_flags |= MLX5_FLOW_LAYER_MPLS;
> > break;
> > + case RTE_FLOW_ITEM_TYPE_META:
> > + ret = flow_dv_validate_item_meta(dev, items, attr,
> > + error);
> > + if (ret < 0)
> > + return ret;
> > + item_flags |= MLX5_FLOW_ITEM_METADATA;
> > + break;
> > default:
> > return rte_flow_error_set(error, ENOTSUP,
> >
> RTE_FLOW_ERROR_TYPE_ITEM,
> > @@ -855,6 +925,42 @@
> > }
> >
> > /**
> > + * Add META item to matcher
> > + *
> > + * @param[in, out] matcher
> > + * Flow matcher.
> > + * @param[in, out] key
> > + * Flow matcher value.
> > + * @param[in] item
> > + * Flow pattern to translate.
> > + * @param[in] inner
> > + * Item is inner pattern.
> > + */
> > +static void
> > +flow_dv_translate_item_meta(void *matcher, void *key,
> > + const struct rte_flow_item *item) {
> > + const struct rte_flow_item_meta *meta_m;
> > + const struct rte_flow_item_meta *meta_v;
> > +
> > + void *misc2_m =
> > + MLX5_ADDR_OF(fte_match_param, matcher,
> misc_parameters_2);
> > + void *misc2_v =
> > + MLX5_ADDR_OF(fte_match_param, key,
> misc_parameters_2);
> > +
> > + meta_m = (const void *)item->mask;
> > + if (!meta_m)
> > + meta_m = &rte_flow_item_meta_mask;
> > + meta_v = (const void *)item->spec;
> > + if (meta_v) {
> > + MLX5_SET(fte_match_set_misc2, misc2_m,
> metadata_reg_a,
> > + RTE_BE32(meta_m->data));
>
> Nope. RTE_BE32() is for builtin constant, not for a variable.
> You should use rte_cpu_to_be_32() instead.
Replaced.
>
> > + MLX5_SET(fte_match_set_misc2, misc2_v, metadata_reg_a,
> > + RTE_BE32(meta_v->data));
>
> Same here.
Replaced.
>
> > + }
> > +}
> > +
> > +/**
> > * Update the matcher and the value based the selected item.
> > *
> > * @param[in, out] matcher
> > @@ -940,6 +1046,9 @@
> > flow_dv_translate_item_vxlan(tmatcher->mask.buf, key,
> item,
> > inner);
> > break;
> > + case RTE_FLOW_ITEM_TYPE_META:
> > + flow_dv_translate_item_meta(tmatcher->mask.buf, key,
> item);
> > + break;
> > default:
> > break;
> > }
> > diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
> > index 69296a0..29742b1 100644
> > --- a/drivers/net/mlx5/mlx5_prm.h
> > +++ b/drivers/net/mlx5/mlx5_prm.h
> > @@ -159,7 +159,7 @@ struct mlx5_wqe_eth_seg_small {
> > uint8_t cs_flags;
> > uint8_t rsvd1;
> > uint16_t mss;
> > - uint32_t rsvd2;
> > + uint32_t flow_table_metadata;
> > uint16_t inline_hdr_sz;
> > uint8_t inline_hdr[2];
> > } __rte_aligned(MLX5_WQE_DWORD_SIZE);
> > diff --git a/drivers/net/mlx5/mlx5_rxtx.c
> > b/drivers/net/mlx5/mlx5_rxtx.c index 558e6b6..5b4d2fd 100644
> > --- a/drivers/net/mlx5/mlx5_rxtx.c
> > +++ b/drivers/net/mlx5/mlx5_rxtx.c
> > @@ -523,6 +523,7 @@
> > uint8_t tso = txq->tso_en && (buf->ol_flags &
> PKT_TX_TCP_SEG);
> > uint32_t swp_offsets = 0;
> > uint8_t swp_types = 0;
> > + uint32_t metadata;
> > uint16_t tso_segsz = 0;
> > #ifdef MLX5_PMD_SOFT_COUNTERS
> > uint32_t total_length = 0;
> > @@ -566,6 +567,10 @@
> > cs_flags = txq_ol_cksum_to_cs(buf);
> > txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets,
> &swp_types);
> > raw = ((uint8_t *)(uintptr_t)wqe) + 2 *
> MLX5_WQE_DWORD_SIZE;
> > + /* Copy metadata from mbuf if valid */
> > + metadata = buf->ol_flags & PKT_TX_METADATA ?
> > + buf->tx_metadata : 0;
>
> Indentation.
Changed.
>
> > +
>
> No blank line.
Removed.
>
> > /* Replace the Ethernet type by the VLAN if necessary. */
> > if (buf->ol_flags & PKT_TX_VLAN_PKT) {
> > uint32_t vlan = rte_cpu_to_be_32(0x81000000 | @@
> -781,7 +786,7 @@
> > swp_offsets,
> > cs_flags | (swp_types << 8) |
> > (rte_cpu_to_be_16(tso_segsz) << 16),
> > - 0,
> > + rte_cpu_to_be_32(metadata),
> > (ehdr << 16) |
> rte_cpu_to_be_16(tso_header_sz),
> > };
> > } else {
> > @@ -795,7 +800,7 @@
> > wqe->eseg = (rte_v128u32_t){
> > swp_offsets,
> > cs_flags | (swp_types << 8),
> > - 0,
> > + rte_cpu_to_be_32(metadata),
> > (ehdr << 16) |
> rte_cpu_to_be_16(pkt_inline_sz),
> > };
> > }
> > @@ -861,7 +866,7 @@
> > mpw->wqe->eseg.inline_hdr_sz = 0;
> > mpw->wqe->eseg.rsvd0 = 0;
> > mpw->wqe->eseg.rsvd1 = 0;
> > - mpw->wqe->eseg.rsvd2 = 0;
> > + mpw->wqe->eseg.flow_table_metadata = 0;
> > mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW <<
> 24) |
> > (txq->wqe_ci << 8) |
> > MLX5_OPCODE_TSO);
> > @@ -948,6 +953,7 @@
> > uint32_t length;
> > unsigned int segs_n = buf->nb_segs;
> > uint32_t cs_flags;
> > + uint32_t metadata;
> >
> > /*
> > * Make sure there is enough room to store this packet and
> @@
> > -964,6 +970,9 @@
> > max_elts -= segs_n;
> > --pkts_n;
> > cs_flags = txq_ol_cksum_to_cs(buf);
> > + /* Copy metadata from mbuf if valid */
> > + metadata = buf->ol_flags & PKT_TX_METADATA ?
> > + buf->tx_metadata : 0;
>
> Indentation.
Changed.
> And no need to change to big-endian? I think it needs.
Metadata written in mbuf by application as big-endian.
>
> > /* Retrieve packet information. */
> > length = PKT_LEN(buf);
> > assert(length);
> > @@ -971,6 +980,7 @@
> > if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
> > ((mpw.len != length) ||
> > (segs_n != 1) ||
> > + (mpw.wqe->eseg.flow_table_metadata != metadata) ||
> > (mpw.wqe->eseg.cs_flags != cs_flags)))
> > mlx5_mpw_close(txq, &mpw);
> > if (mpw.state == MLX5_MPW_STATE_CLOSED) { @@ -984,6
> +994,7 @@
> > max_wqe -= 2;
> > mlx5_mpw_new(txq, &mpw, length);
> > mpw.wqe->eseg.cs_flags = cs_flags;
> > + mpw.wqe->eseg.flow_table_metadata = metadata;
> > }
> > /* Multi-segment packets must be alone in their MPW. */
> > assert((segs_n == 1) || (mpw.pkts_n == 0)); @@ -1082,7
> +1093,7 @@
> > mpw->wqe->eseg.cs_flags = 0;
> > mpw->wqe->eseg.rsvd0 = 0;
> > mpw->wqe->eseg.rsvd1 = 0;
> > - mpw->wqe->eseg.rsvd2 = 0;
> > + mpw->wqe->eseg.flow_table_metadata = 0;
> > inl = (struct mlx5_wqe_inl_small *)
> > (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
> > mpw->data.raw = (uint8_t *)&inl->raw; @@ -1172,6 +1183,7 @@
> > uint32_t length;
> > unsigned int segs_n = buf->nb_segs;
> > uint8_t cs_flags;
> > + uint32_t metadata;
> >
> > /*
> > * Make sure there is enough room to store this packet and
> @@
> > -1193,18 +1205,23 @@
> > */
> > max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq-
> >wqe_pi);
> > cs_flags = txq_ol_cksum_to_cs(buf);
> > + /* Copy metadata from mbuf if valid */
> > + metadata = buf->ol_flags & PKT_TX_METADATA ?
> > + buf->tx_metadata : 0;
>
> Indentation.
Changed.
> And no need to change to big-endian?
Metadata written in mbuf by application as big-endian.
>
> > /* Retrieve packet information. */
> > length = PKT_LEN(buf);
> > /* Start new session if packet differs. */
> > if (mpw.state == MLX5_MPW_STATE_OPENED) {
> > if ((mpw.len != length) ||
> > (segs_n != 1) ||
> > + (mpw.wqe->eseg.flow_table_metadata !=
> metadata) ||
> > (mpw.wqe->eseg.cs_flags != cs_flags))
> > mlx5_mpw_close(txq, &mpw);
> > } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
> > if ((mpw.len != length) ||
> > (segs_n != 1) ||
> > (length > inline_room) ||
> > + (mpw.wqe->eseg.flow_table_metadata !=
> metadata) ||
> > (mpw.wqe->eseg.cs_flags != cs_flags)) {
> > mlx5_mpw_inline_close(txq, &mpw);
> > inline_room =
> > @@ -1224,12 +1241,14 @@
> > max_wqe -= 2;
> > mlx5_mpw_new(txq, &mpw, length);
> > mpw.wqe->eseg.cs_flags = cs_flags;
> > + mpw.wqe->eseg.flow_table_metadata =
> metadata;
> > } else {
> > if (unlikely(max_wqe < wqe_inl_n))
> > break;
> > max_wqe -= wqe_inl_n;
> > mlx5_mpw_inline_new(txq, &mpw, length);
> > mpw.wqe->eseg.cs_flags = cs_flags;
> > + mpw.wqe->eseg.flow_table_metadata =
> metadata;
> > }
> > }
> > /* Multi-segment packets must be alone in their MPW. */
> @@ -1461,6
> > +1480,7 @@
> > unsigned int do_inline = 0; /* Whether inline is possible. */
> > uint32_t length;
> > uint8_t cs_flags;
> > + uint32_t metadata;
> >
> > /* Multi-segmented packet is handled in slow-path outside.
> */
> > assert(NB_SEGS(buf) == 1);
> > @@ -1468,6 +1488,9 @@
> > if (max_elts - j == 0)
> > break;
> > cs_flags = txq_ol_cksum_to_cs(buf);
> > + /* Copy metadata from mbuf if valid */
> > + metadata = buf->ol_flags & PKT_TX_METADATA ?
> > + buf->tx_metadata : 0;
>
> Indentation.
Changed.
> And no need to change to big-endian?
Metadata written in mbuf by application as big-endian.
>
> > /* Retrieve packet information. */
> > length = PKT_LEN(buf);
> > /* Start new session if:
> > @@ -1482,6 +1505,7 @@
> > (length <= txq->inline_max_packet_sz &&
> > inl_pad + sizeof(inl_hdr) + length >
> > mpw_room) ||
> > + (mpw.wqe->eseg.flow_table_metadata !=
> metadata) ||
> > (mpw.wqe->eseg.cs_flags != cs_flags))
> > max_wqe -= mlx5_empw_close(txq, &mpw);
> > }
> > @@ -1505,6 +1529,7 @@
> > sizeof(inl_hdr) + length <= mpw_room &&
> > !txq->mpw_hdr_dseg;
> > mpw.wqe->eseg.cs_flags = cs_flags;
> > + mpw.wqe->eseg.flow_table_metadata = metadata;
> > } else {
> > /* Evaluate whether the next packet can be inlined.
> > * Inlininig is possible when:
> > diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c
> > b/drivers/net/mlx5/mlx5_rxtx_vec.c
> > index 0a4aed8..16a8608 100644
> > --- a/drivers/net/mlx5/mlx5_rxtx_vec.c
> > +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
> > @@ -41,6 +41,8 @@
> >
> > /**
> > * Count the number of packets having same ol_flags and calculate
> cs_flags.
> > + * If PKT_TX_METADATA is set in ol_flags, packets must have same
> > + metadata
> > + * as well.
>
> Packets can have different metadata but we just want to count the number
> of packets having same data. Please correct the comment.
Corrected.
>
> > *
> > * @param pkts
> > * Pointer to array of packets.
> > @@ -48,26 +50,41 @@
> > * Number of packets.
> > * @param cs_flags
> > * Pointer of flags to be returned.
> > + * @param metadata
> > + * Pointer of metadata to be returned.
> > + * @param txq_offloads
> > + * Offloads enabled on Tx queue
> > *
> > * @return
> > - * Number of packets having same ol_flags.
> > + * Number of packets having same ol_flags and metadata, if relevant.
> > */
> > static inline unsigned int
> > -txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t
> > *cs_flags)
> > +txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t
> *cs_flags,
> > + uint32_t *metadata, const uint64_t txq_offloads)
> > {
> > unsigned int pos;
> > const uint64_t ol_mask =
> > PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
> > PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
> > - PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
> > + PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM |
> PKT_TX_METADATA;
>
> Shouldn't add PKT_TX_METADATA. As it is for cksum, you might rather want
> to change the name, e.g., cksum_ol_mask.
>
> >
> > if (!pkts_n)
> > return 0;
> > /* Count the number of packets having same ol_flags. */
>
> This comment has to be corrected and moved.
>
> > - for (pos = 1; pos < pkts_n; ++pos)
> > - if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
> > + for (pos = 1; pos < pkts_n; ++pos) {
> > + if ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP)
> &&
> > + ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask))
>
> Indentation.
>
> > break;
> > + /* If the metadata ol_flag is set,
> > + * metadata must be same in all packets.
> > + */
>
> Correct comment. First line should be empty for multi-line comment.
> And it can't be 'must'. We are not forcing it but just counting the number of
> packets having same metadata like I mentioned above.
>
> > + if ((txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA)
> &&
> > + (pkts[pos]->ol_flags & PKT_TX_METADATA) &&
> > + pkts[0]->tx_metadata != pkts[pos]->tx_metadata)
>
> Disagree. What if pkts[0] doesn't have PKT_TXT_METADATA while pkt[1] has
> it?
> And, indentation.
>
> > + break;
> > + }
> > *cs_flags = txq_ol_cksum_to_cs(pkts[0]);
> > + *metadata = rte_cpu_to_be_32(pkts[0]->tx_metadata);
>
> Same here. You should check if pkts[0] has metadata first.
>
> > return pos;
>
> Here's my suggestion for the whole func.
>
> unsigned int pos;
> const uint64_t cksum_ol_mask =
> PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
> PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
> PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
> const uint32_t p0_metadata;
>
> if (!pkts_n)
> return 0;
> p0_metadata = pkts[0]->ol_flags & PKT_TX_METADATA ?
> pkts[0]->tx_metadata : 0;
> /* Count the number of packets having same offload parameters. */
> for (pos = 1; pos < pkts_n; ++pos) {
> /* Check if packet can have same checksum flags. */
> if ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP)
> &&
> ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) &
> cksum_ol_mask))
> break;
> /* Check if packet has same metadata. */
> if (txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) {
> const uint32_t p1_metadata =
> pkts[pos]->ol_flags & PKT_TX_METADATA ?
> pkts[pos]->tx_metadata : 0;
>
> if (p1_metadata != p0_metadata)
> break;
> }
> }
> *cs_flags = txq_ol_cksum_to_cs(pkts[0]);
> *metadata = rte_cpu_to_be_32(p0_metadata);
> return pos;
Modified per your suggestion.
> > }
> >
> > @@ -96,7 +113,7 @@
> > uint16_t ret;
> >
> > n = RTE_MIN((uint16_t)(pkts_n - nb_tx),
> MLX5_VPMD_TX_MAX_BURST);
> > - ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
> > + ret = txq_burst_v(txq, &pkts[nb_tx], n, 0, 0);
> > nb_tx += ret;
> > if (!ret)
> > break;
> > @@ -127,6 +144,7 @@
> > uint8_t cs_flags = 0;
> > uint16_t n;
> > uint16_t ret;
> > + uint32_t metadata = 0;
>
> Let's use rte_be32_t instead.
Agree.
>
> >
> > /* Transmit multi-seg packets in the head of pkts list. */
> > if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
> @@ -137,9
> > +155,11 @@
> > n = RTE_MIN((uint16_t)(pkts_n - nb_tx),
> MLX5_VPMD_TX_MAX_BURST);
> > if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> > n = txq_count_contig_single_seg(&pkts[nb_tx], n);
> > - if (txq->offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP)
> > - n = txq_calc_offload(&pkts[nb_tx], n, &cs_flags);
> > - ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
> > + if (txq->offloads & (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP |
> > + DEV_TX_OFFLOAD_MATCH_METADATA))
>
> Indentation.
Changed.
>
> > + n = txq_calc_offload(&pkts[nb_tx], n,
> > + &cs_flags, &metadata, txq-
> >offloads);
>
> Indentation.
Changed.
>
> > + ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags, metadata);
> > nb_tx += ret;
> > if (!ret)
> > break;
> > diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h
> > b/drivers/net/mlx5/mlx5_rxtx_vec.h
> > index fb884f9..fda7004 100644
> > --- a/drivers/net/mlx5/mlx5_rxtx_vec.h
> > +++ b/drivers/net/mlx5/mlx5_rxtx_vec.h
> > @@ -22,6 +22,7 @@
> > /* HW offload capabilities of vectorized Tx. */ #define
> > MLX5_VEC_TX_OFFLOAD_CAP \
> > (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP | \
> > + DEV_TX_OFFLOAD_MATCH_METADATA | \
> > DEV_TX_OFFLOAD_MULTI_SEGS)
> >
> > /*
> > diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> > b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> > index b37b738..a8a4d7b 100644
> > --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> > +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> > @@ -201,13 +201,15 @@
> > * Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
> > * @param cs_flags
> > * Checksum offload flags to be written in the descriptor.
> > + * @param metadata
> > + * Metadata value to be written in the descriptor.
> > *
> > * @return
> > * Number of packets successfully transmitted (<= pkts_n).
> > */
> > static inline uint16_t
> > txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t
> pkts_n,
> > - uint8_t cs_flags)
> > + uint8_t cs_flags, uint32_t metadata)
>
> Let's use rte_be32_t instead.
Agree.
>
> > {
> > struct rte_mbuf **elts;
> > uint16_t elts_head = txq->elts_head; @@ -294,10 +296,7 @@
> > vst1q_u8((void *)t_wqe, ctrl);
> > /* Fill ESEG in the header. */
> > vst1q_u8((void *)(t_wqe + 1),
> > - ((uint8x16_t) { 0, 0, 0, 0,
> > - cs_flags, 0, 0, 0,
> > - 0, 0, 0, 0,
> > - 0, 0, 0, 0 }));
> > + ((uint32x4_t) { 0, cs_flags, metadata, 0 }));
> > #ifdef MLX5_PMD_SOFT_COUNTERS
> > txq->stats.opackets += pkts_n;
> > #endif
> > diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> > b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> > index 54b3783..31aae4a 100644
> > --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> > +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> > @@ -202,13 +202,15 @@
> > * Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
> > * @param cs_flags
> > * Checksum offload flags to be written in the descriptor.
> > + * @param metadata
> > + * Metadata value to be written in the descriptor.
> > *
> > * @return
> > * Number of packets successfully transmitted (<= pkts_n).
> > */
> > static inline uint16_t
> > txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t
> pkts_n,
> > - uint8_t cs_flags)
> > + uint8_t cs_flags, uint32_t metadata)
>
> Let's use rte_be32_t instead.
Agree.
>
> > {
> > struct rte_mbuf **elts;
> > uint16_t elts_head = txq->elts_head; @@ -292,11 +294,7 @@
> > ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
> > _mm_store_si128(t_wqe, ctrl);
> > /* Fill ESEG in the header. */
> > - _mm_store_si128(t_wqe + 1,
> > - _mm_set_epi8(0, 0, 0, 0,
> > - 0, 0, 0, 0,
> > - 0, 0, 0, cs_flags,
> > - 0, 0, 0, 0));
> > + _mm_store_si128(t_wqe + 1, _mm_set_epi32(0, metadata, cs_flags,
> 0));
> > #ifdef MLX5_PMD_SOFT_COUNTERS
> > txq->stats.opackets += pkts_n;
> > #endif
> > diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
> > index f9bc473..7263fb1 100644
> > --- a/drivers/net/mlx5/mlx5_txq.c
> > +++ b/drivers/net/mlx5/mlx5_txq.c
> > @@ -128,6 +128,12 @@
> > offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
> > DEV_TX_OFFLOAD_GRE_TNL_TSO);
> > }
> > +
>
> Please no blank line.
Removed.
>
> > +#ifdef HAVE_IBV_FLOW_DV_SUPPORT
> > + if (config->dv_flow_en)
> > + offloads |= DEV_TX_OFFLOAD_MATCH_METADATA; #endif
> > +
>
> Same here.
Removed.
>
> > return offloads;
> > }
> >
> > --
> > 1.8.3.1
> >
@@ -418,7 +418,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
+int
mlx5_flow_item_acceptable(const struct rte_flow_item *item,
const uint8_t *mask,
const uint8_t *nic_mask,
@@ -43,6 +43,9 @@
#define MLX5_FLOW_LAYER_GRE (1u << 14)
#define MLX5_FLOW_LAYER_MPLS (1u << 15)
+/* General pattern items bits. */
+#define MLX5_FLOW_ITEM_METADATA (1u << 16)
+
/* Outer Masks. */
#define MLX5_FLOW_LAYER_OUTER_L3 \
(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
@@ -307,6 +310,11 @@ int mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
int mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
const struct rte_flow_attr *attributes,
struct rte_flow_error *error);
+int mlx5_flow_item_acceptable(const struct rte_flow_item *item,
+ const uint8_t *mask,
+ const uint8_t *nic_mask,
+ unsigned int size,
+ struct rte_flow_error *error);
int mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
uint64_t item_flags,
struct rte_flow_error *error);
@@ -36,6 +36,69 @@
#ifdef HAVE_IBV_FLOW_DV_SUPPORT
/**
+ * Validate META item.
+ *
+ * @param[in] dev
+ * Pointer to the rte_eth_dev structure.
+ * @param[in] item
+ * Item specification.
+ * @param[in] attr
+ * Attributes of flow that includes this item.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_meta(struct rte_eth_dev *dev,
+ const struct rte_flow_item *item,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error)
+{
+ const struct rte_flow_item_meta *spec = item->spec;
+ const struct rte_flow_item_meta *mask = item->mask;
+
+ const struct rte_flow_item_meta nic_mask = {
+ .data = RTE_BE32(UINT32_MAX)
+ };
+
+ int ret;
+ uint64_t offloads = dev->data->dev_conf.txmode.offloads;
+
+ if (!(offloads & DEV_TX_OFFLOAD_MATCH_METADATA))
+ return rte_flow_error_set(error, EPERM,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ NULL,
+ "match on metadata offload "
+ "configuration is off for this port");
+ if (!spec)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ item->spec,
+ "data cannot be empty");
+ if (!spec->data)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ NULL,
+ "data cannot be zero");
+ if (!mask)
+ mask = &rte_flow_item_meta_mask;
+ ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+ (const uint8_t *)&nic_mask,
+ sizeof(struct rte_flow_item_meta),
+ error);
+ if (ret < 0)
+ return ret;
+ if (attr->ingress)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+ NULL,
+ "pattern not supported for ingress");
+ return 0;
+}
+
+/**
* Verify the @p attributes will be correctly understood by the NIC and store
* them in the @p flow if everything is correct.
*
@@ -214,6 +277,13 @@
return ret;
item_flags |= MLX5_FLOW_LAYER_MPLS;
break;
+ case RTE_FLOW_ITEM_TYPE_META:
+ ret = flow_dv_validate_item_meta(dev, items, attr,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_ITEM_METADATA;
+ break;
default:
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM,
@@ -855,6 +925,42 @@
}
/**
+ * Add META item to matcher
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_meta(void *matcher, void *key,
+ const struct rte_flow_item *item)
+{
+ const struct rte_flow_item_meta *meta_m;
+ const struct rte_flow_item_meta *meta_v;
+
+ void *misc2_m =
+ MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters_2);
+ void *misc2_v =
+ MLX5_ADDR_OF(fte_match_param, key, misc_parameters_2);
+
+ meta_m = (const void *)item->mask;
+ if (!meta_m)
+ meta_m = &rte_flow_item_meta_mask;
+ meta_v = (const void *)item->spec;
+ if (meta_v) {
+ MLX5_SET(fte_match_set_misc2, misc2_m, metadata_reg_a,
+ RTE_BE32(meta_m->data));
+ MLX5_SET(fte_match_set_misc2, misc2_v, metadata_reg_a,
+ RTE_BE32(meta_v->data));
+ }
+}
+
+/**
* Update the matcher and the value based the selected item.
*
* @param[in, out] matcher
@@ -940,6 +1046,9 @@
flow_dv_translate_item_vxlan(tmatcher->mask.buf, key, item,
inner);
break;
+ case RTE_FLOW_ITEM_TYPE_META:
+ flow_dv_translate_item_meta(tmatcher->mask.buf, key, item);
+ break;
default:
break;
}
@@ -159,7 +159,7 @@ struct mlx5_wqe_eth_seg_small {
uint8_t cs_flags;
uint8_t rsvd1;
uint16_t mss;
- uint32_t rsvd2;
+ uint32_t flow_table_metadata;
uint16_t inline_hdr_sz;
uint8_t inline_hdr[2];
} __rte_aligned(MLX5_WQE_DWORD_SIZE);
@@ -523,6 +523,7 @@
uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);
uint32_t swp_offsets = 0;
uint8_t swp_types = 0;
+ uint32_t metadata;
uint16_t tso_segsz = 0;
#ifdef MLX5_PMD_SOFT_COUNTERS
uint32_t total_length = 0;
@@ -566,6 +567,10 @@
cs_flags = txq_ol_cksum_to_cs(buf);
txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types);
raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ?
+ buf->tx_metadata : 0;
+
/* Replace the Ethernet type by the VLAN if necessary. */
if (buf->ol_flags & PKT_TX_VLAN_PKT) {
uint32_t vlan = rte_cpu_to_be_32(0x81000000 |
@@ -781,7 +786,7 @@
swp_offsets,
cs_flags | (swp_types << 8) |
(rte_cpu_to_be_16(tso_segsz) << 16),
- 0,
+ rte_cpu_to_be_32(metadata),
(ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),
};
} else {
@@ -795,7 +800,7 @@
wqe->eseg = (rte_v128u32_t){
swp_offsets,
cs_flags | (swp_types << 8),
- 0,
+ rte_cpu_to_be_32(metadata),
(ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),
};
}
@@ -861,7 +866,7 @@
mpw->wqe->eseg.inline_hdr_sz = 0;
mpw->wqe->eseg.rsvd0 = 0;
mpw->wqe->eseg.rsvd1 = 0;
- mpw->wqe->eseg.rsvd2 = 0;
+ mpw->wqe->eseg.flow_table_metadata = 0;
mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
(txq->wqe_ci << 8) |
MLX5_OPCODE_TSO);
@@ -948,6 +953,7 @@
uint32_t length;
unsigned int segs_n = buf->nb_segs;
uint32_t cs_flags;
+ uint32_t metadata;
/*
* Make sure there is enough room to store this packet and
@@ -964,6 +970,9 @@
max_elts -= segs_n;
--pkts_n;
cs_flags = txq_ol_cksum_to_cs(buf);
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ?
+ buf->tx_metadata : 0;
/* Retrieve packet information. */
length = PKT_LEN(buf);
assert(length);
@@ -971,6 +980,7 @@
if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
((mpw.len != length) ||
(segs_n != 1) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags)))
mlx5_mpw_close(txq, &mpw);
if (mpw.state == MLX5_MPW_STATE_CLOSED) {
@@ -984,6 +994,7 @@
max_wqe -= 2;
mlx5_mpw_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
}
/* Multi-segment packets must be alone in their MPW. */
assert((segs_n == 1) || (mpw.pkts_n == 0));
@@ -1082,7 +1093,7 @@
mpw->wqe->eseg.cs_flags = 0;
mpw->wqe->eseg.rsvd0 = 0;
mpw->wqe->eseg.rsvd1 = 0;
- mpw->wqe->eseg.rsvd2 = 0;
+ mpw->wqe->eseg.flow_table_metadata = 0;
inl = (struct mlx5_wqe_inl_small *)
(((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
mpw->data.raw = (uint8_t *)&inl->raw;
@@ -1172,6 +1183,7 @@
uint32_t length;
unsigned int segs_n = buf->nb_segs;
uint8_t cs_flags;
+ uint32_t metadata;
/*
* Make sure there is enough room to store this packet and
@@ -1193,18 +1205,23 @@
*/
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
cs_flags = txq_ol_cksum_to_cs(buf);
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ?
+ buf->tx_metadata : 0;
/* Retrieve packet information. */
length = PKT_LEN(buf);
/* Start new session if packet differs. */
if (mpw.state == MLX5_MPW_STATE_OPENED) {
if ((mpw.len != length) ||
(segs_n != 1) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags))
mlx5_mpw_close(txq, &mpw);
} else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
if ((mpw.len != length) ||
(segs_n != 1) ||
(length > inline_room) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags)) {
mlx5_mpw_inline_close(txq, &mpw);
inline_room =
@@ -1224,12 +1241,14 @@
max_wqe -= 2;
mlx5_mpw_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
} else {
if (unlikely(max_wqe < wqe_inl_n))
break;
max_wqe -= wqe_inl_n;
mlx5_mpw_inline_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
}
}
/* Multi-segment packets must be alone in their MPW. */
@@ -1461,6 +1480,7 @@
unsigned int do_inline = 0; /* Whether inline is possible. */
uint32_t length;
uint8_t cs_flags;
+ uint32_t metadata;
/* Multi-segmented packet is handled in slow-path outside. */
assert(NB_SEGS(buf) == 1);
@@ -1468,6 +1488,9 @@
if (max_elts - j == 0)
break;
cs_flags = txq_ol_cksum_to_cs(buf);
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ?
+ buf->tx_metadata : 0;
/* Retrieve packet information. */
length = PKT_LEN(buf);
/* Start new session if:
@@ -1482,6 +1505,7 @@
(length <= txq->inline_max_packet_sz &&
inl_pad + sizeof(inl_hdr) + length >
mpw_room) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags))
max_wqe -= mlx5_empw_close(txq, &mpw);
}
@@ -1505,6 +1529,7 @@
sizeof(inl_hdr) + length <= mpw_room &&
!txq->mpw_hdr_dseg;
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
} else {
/* Evaluate whether the next packet can be inlined.
* Inlininig is possible when:
@@ -41,6 +41,8 @@
/**
* Count the number of packets having same ol_flags and calculate cs_flags.
+ * If PKT_TX_METADATA is set in ol_flags, packets must have same metadata
+ * as well.
*
* @param pkts
* Pointer to array of packets.
@@ -48,26 +50,41 @@
* Number of packets.
* @param cs_flags
* Pointer of flags to be returned.
+ * @param metadata
+ * Pointer of metadata to be returned.
+ * @param txq_offloads
+ * Offloads enabled on Tx queue
*
* @return
- * Number of packets having same ol_flags.
+ * Number of packets having same ol_flags and metadata, if relevant.
*/
static inline unsigned int
-txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags)
+txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags,
+ uint32_t *metadata, const uint64_t txq_offloads)
{
unsigned int pos;
const uint64_t ol_mask =
PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
- PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
+ PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM | PKT_TX_METADATA;
if (!pkts_n)
return 0;
/* Count the number of packets having same ol_flags. */
- for (pos = 1; pos < pkts_n; ++pos)
- if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
+ for (pos = 1; pos < pkts_n; ++pos) {
+ if ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) &&
+ ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask))
break;
+ /* If the metadata ol_flag is set,
+ * metadata must be same in all packets.
+ */
+ if ((txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) &&
+ (pkts[pos]->ol_flags & PKT_TX_METADATA) &&
+ pkts[0]->tx_metadata != pkts[pos]->tx_metadata)
+ break;
+ }
*cs_flags = txq_ol_cksum_to_cs(pkts[0]);
+ *metadata = rte_cpu_to_be_32(pkts[0]->tx_metadata);
return pos;
}
@@ -96,7 +113,7 @@
uint16_t ret;
n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
- ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
+ ret = txq_burst_v(txq, &pkts[nb_tx], n, 0, 0);
nb_tx += ret;
if (!ret)
break;
@@ -127,6 +144,7 @@
uint8_t cs_flags = 0;
uint16_t n;
uint16_t ret;
+ uint32_t metadata = 0;
/* Transmit multi-seg packets in the head of pkts list. */
if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
@@ -137,9 +155,11 @@
n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
n = txq_count_contig_single_seg(&pkts[nb_tx], n);
- if (txq->offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP)
- n = txq_calc_offload(&pkts[nb_tx], n, &cs_flags);
- ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
+ if (txq->offloads & (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP |
+ DEV_TX_OFFLOAD_MATCH_METADATA))
+ n = txq_calc_offload(&pkts[nb_tx], n,
+ &cs_flags, &metadata, txq->offloads);
+ ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags, metadata);
nb_tx += ret;
if (!ret)
break;
@@ -22,6 +22,7 @@
/* HW offload capabilities of vectorized Tx. */
#define MLX5_VEC_TX_OFFLOAD_CAP \
(MLX5_VEC_TX_CKSUM_OFFLOAD_CAP | \
+ DEV_TX_OFFLOAD_MATCH_METADATA | \
DEV_TX_OFFLOAD_MULTI_SEGS)
/*
@@ -201,13 +201,15 @@
* Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
* @param cs_flags
* Checksum offload flags to be written in the descriptor.
+ * @param metadata
+ * Metadata value to be written in the descriptor.
*
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
static inline uint16_t
txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
- uint8_t cs_flags)
+ uint8_t cs_flags, uint32_t metadata)
{
struct rte_mbuf **elts;
uint16_t elts_head = txq->elts_head;
@@ -294,10 +296,7 @@
vst1q_u8((void *)t_wqe, ctrl);
/* Fill ESEG in the header. */
vst1q_u8((void *)(t_wqe + 1),
- ((uint8x16_t) { 0, 0, 0, 0,
- cs_flags, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0 }));
+ ((uint32x4_t) { 0, cs_flags, metadata, 0 }));
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.opackets += pkts_n;
#endif
@@ -202,13 +202,15 @@
* Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
* @param cs_flags
* Checksum offload flags to be written in the descriptor.
+ * @param metadata
+ * Metadata value to be written in the descriptor.
*
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
static inline uint16_t
txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
- uint8_t cs_flags)
+ uint8_t cs_flags, uint32_t metadata)
{
struct rte_mbuf **elts;
uint16_t elts_head = txq->elts_head;
@@ -292,11 +294,7 @@
ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
_mm_store_si128(t_wqe, ctrl);
/* Fill ESEG in the header. */
- _mm_store_si128(t_wqe + 1,
- _mm_set_epi8(0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, cs_flags,
- 0, 0, 0, 0));
+ _mm_store_si128(t_wqe + 1, _mm_set_epi32(0, metadata, cs_flags, 0));
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.opackets += pkts_n;
#endif
@@ -128,6 +128,12 @@
offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
DEV_TX_OFFLOAD_GRE_TNL_TSO);
}
+
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ if (config->dv_flow_en)
+ offloads |= DEV_TX_OFFLOAD_MATCH_METADATA;
+#endif
+
return offloads;
}