> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@amd.com>
> Sent: Wednesday, May 31, 2023 1:46 AM
> To: Dong Zhou <dongzhou@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-
> Contact-Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Aman Singh
> <aman.deep.singh@intel.com>; Yuying Zhang <yuying.zhang@intel.com>;
> Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>; Olivier Matz
> <olivier.matz@6wind.com>
> Cc: dev@dpdk.org
> Subject: Re: [PATCH v4] ethdev: add flow item for RoCE infiniband BTH
>
> On 5/30/2023 4:06 AM, Dong Zhou wrote:
> > IB(InfiniBand) is one type of networking used in high-performance
> > computing with high throughput and low latency. Like Ethernet, IB
> > defines a layered protocol (Physical, Link, Network, Transport
> > Layers). IB provides native support for RDMA(Remote DMA), an extension
> > of the DMA that allows direct access to remote host memory without CPU
> > intervention. IB network requires NICs and switches to support the IB
> > protocol.
> >
> > RoCE(RDMA over Converged Ethernet) is a network protocol that allows
> > RDMA to run on Ethernet. RoCE encapsulates IB packets on Ethernet and
> > has two versions, RoCEv1 and RoCEv2. RoCEv1 is an Ethernet link layer
> > protocol, IB packets are encapsulated in the Ethernet layer and use
> > Ethernet type 0x8915. RoCEv2 is an internet layer protocol, IB packets
> > are encapsulated in UDP payload and use a destination port 4791, The
> > format of the RoCEv2 packet is as follows:
> > ETH + IP + UDP(dport 4791) + IB(BTH + ExtHDR + PAYLOAD + CRC)
> >
> > BTH(Base Transport Header) is the IB transport layer header, RoCEv1
> > and RoCEv2 both contain this header. This patch introduces a new RTE
> > item to match the IB BTH in RoCE packets. One use of this match is
> > that the user can monitor RoCEv2's CNP(Congestion Notification
> > Packet) by matching BTH opcode 0x81.
> >
> > This patch also adds the testpmd command line to match the RoCEv2 BTH.
> > Usage example:
> >
> > testpmd> flow create 0 group 1 ingress pattern
> > eth / ipv4 / udp dst is 4791 / ib_bth opcode is 0x81
> > dst_qp is 0xd3 / end actions queue index 0 / end
> >
> > Signed-off-by: Dong Zhou <dongzhou@nvidia.com>
> > Acked-by: Ori Kam <orika@nvidia.com>
> > Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> >
> > v2:
> > - Change "ethernet" name to "Ethernet" in the commit log.
> > - Add "RoCE" and "IB" 2 words to words-case.txt.
> > - Add "rte_byteorder.h" header file in "rte_ib.h" to fix compile errors.
> > - Add "Acked-by" labels in the first ethdev patch.
> >
> > v3:
> > - Do rebase to fix the patch apply failure.
> > - Add "Acked-by" label in the second net/mlx5 patch.
> >
> > v4:
> > - Split this series of patches, only keep the first ethdev patch.
> >
>
> Patch looks good, can you please add a release notes update too?
Sure, will send the v5 patch to update it.
@@ -496,6 +496,11 @@ enum index {
ITEM_QUOTA_STATE_NAME,
ITEM_AGGR_AFFINITY,
ITEM_AGGR_AFFINITY_VALUE,
+ ITEM_IB_BTH,
+ ITEM_IB_BTH_OPCODE,
+ ITEM_IB_BTH_PKEY,
+ ITEM_IB_BTH_DST_QPN,
+ ITEM_IB_BTH_PSN,
/* Validate/create actions. */
ACTIONS,
@@ -1452,6 +1457,7 @@ static const enum index next_item[] = {
ITEM_METER,
ITEM_QUOTA,
ITEM_AGGR_AFFINITY,
+ ITEM_IB_BTH,
END_SET,
ZERO,
};
@@ -1953,6 +1959,15 @@ static const enum index item_aggr_affinity[] = {
ZERO,
};
+static const enum index item_ib_bth[] = {
+ ITEM_IB_BTH_OPCODE,
+ ITEM_IB_BTH_PKEY,
+ ITEM_IB_BTH_DST_QPN,
+ ITEM_IB_BTH_PSN,
+ ITEM_NEXT,
+ ZERO,
+};
+
static const enum index next_action[] = {
ACTION_END,
ACTION_VOID,
@@ -5523,6 +5538,46 @@ static const struct token token_list[] = {
.call = parse_quota_state_name,
.comp = comp_quota_state_name
},
+ [ITEM_IB_BTH] = {
+ .name = "ib_bth",
+ .help = "match ib bth fields",
+ .priv = PRIV_ITEM(IB_BTH,
+ sizeof(struct rte_flow_item_ib_bth)),
+ .next = NEXT(item_ib_bth),
+ .call = parse_vc,
+ },
+ [ITEM_IB_BTH_OPCODE] = {
+ .name = "opcode",
+ .help = "match ib bth opcode",
+ .next = NEXT(item_ib_bth, NEXT_ENTRY(COMMON_UNSIGNED),
+ item_param),
+ .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_ib_bth,
+ hdr.opcode)),
+ },
+ [ITEM_IB_BTH_PKEY] = {
+ .name = "pkey",
+ .help = "partition key",
+ .next = NEXT(item_ib_bth, NEXT_ENTRY(COMMON_UNSIGNED),
+ item_param),
+ .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_ib_bth,
+ hdr.pkey)),
+ },
+ [ITEM_IB_BTH_DST_QPN] = {
+ .name = "dst_qp",
+ .help = "destination qp",
+ .next = NEXT(item_ib_bth, NEXT_ENTRY(COMMON_UNSIGNED),
+ item_param),
+ .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_ib_bth,
+ hdr.dst_qp)),
+ },
+ [ITEM_IB_BTH_PSN] = {
+ .name = "psn",
+ .help = "packet sequence number",
+ .next = NEXT(item_ib_bth, NEXT_ENTRY(COMMON_UNSIGNED),
+ item_param),
+ .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_ib_bth,
+ hdr.psn)),
+ },
/* Validate/create actions. */
[ACTIONS] = {
.name = "actions",
@@ -11849,6 +11904,9 @@ flow_item_default_mask(const struct rte_flow_item *item)
case RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY:
mask = &rte_flow_item_aggr_affinity_mask;
break;
+ case RTE_FLOW_ITEM_TYPE_IB_BTH:
+ mask = &rte_flow_item_ib_bth_mask;
+ break;
default:
break;
}
@@ -27,6 +27,7 @@ GENEVE
GTPU
GUID
HW
+IB
ICMP
ID
IO
@@ -74,6 +75,7 @@ QinQ
RDMA
RETA
ROC
+RoCE
RQ
RSS
RVU
@@ -104,6 +104,7 @@ gtpc =
gtpu =
gtp_psc =
higig2 =
+ib_bth =
icmp =
icmp6 =
icmp6_echo_request =
@@ -1551,6 +1551,13 @@ Matches flow quota state set by quota action.
- ``state``: Flow quota state
+Item: ``IB_BTH``
+^^^^^^^^^^^^^^^^
+
+Matches an InfiniBand base transport header in RoCE packet.
+
+- ``hdr``: InfiniBand base transport header definition (``rte_ib.h``).
+
Actions
~~~~~~~
@@ -3781,6 +3781,13 @@ This section lists supported pattern items and their attributes, if any.
- ``send_to_kernel``: send packets to kernel.
+- ``ib_bth``: match InfiniBand BTH(base transport header).
+
+ - ``opcode {unsigned}``: Opcode.
+ - ``pkey {unsigned}``: Partition key.
+ - ``dst_qp {unsigned}``: Destination Queue Pair.
+ - ``psn {unsigned}``: Packet Sequence Number.
+
Actions list
^^^^^^^^^^^^
@@ -164,6 +164,7 @@ static const struct rte_flow_desc_data rte_flow_desc_item[] = {
MK_FLOW_ITEM(IPV6_ROUTING_EXT, sizeof(struct rte_flow_item_ipv6_routing_ext)),
MK_FLOW_ITEM(QUOTA, sizeof(struct rte_flow_item_quota)),
MK_FLOW_ITEM(AGGR_AFFINITY, sizeof(struct rte_flow_item_aggr_affinity)),
+ MK_FLOW_ITEM(IB_BTH, sizeof(struct rte_flow_item_ib_bth)),
};
/** Generate flow_action[] entry. */
@@ -38,6 +38,7 @@
#include <rte_ppp.h>
#include <rte_gre.h>
#include <rte_macsec.h>
+#include <rte_ib.h>
#ifdef __cplusplus
extern "C" {
@@ -672,6 +673,13 @@ enum rte_flow_item_type {
* @see struct rte_flow_item_aggr_affinity.
*/
RTE_FLOW_ITEM_TYPE_AGGR_AFFINITY,
+
+ /**
+ * Matches an InfiniBand base transport header in RoCE packet.
+ *
+ * See struct rte_flow_item_ib_bth.
+ */
+ RTE_FLOW_ITEM_TYPE_IB_BTH,
};
/**
@@ -2260,6 +2268,25 @@ rte_flow_item_aggr_affinity_mask = {
};
#endif
+/**
+ * RTE_FLOW_ITEM_TYPE_IB_BTH.
+ *
+ * Matches an InfiniBand base transport header in RoCE packet.
+ */
+struct rte_flow_item_ib_bth {
+ struct rte_ib_bth hdr; /**< InfiniBand base transport header definition. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_IB_BTH. */
+#ifndef __cplusplus
+static const struct rte_flow_item_ib_bth rte_flow_item_ib_bth_mask = {
+ .hdr = {
+ .opcode = 0xff,
+ .dst_qp = "\xff\xff\xff",
+ },
+};
+#endif
+
/**
* Action types.
*
@@ -22,6 +22,7 @@ headers = files(
'rte_geneve.h',
'rte_l2tpv2.h',
'rte_ppp.h',
+ 'rte_ib.h',
)
sources = files(
new file mode 100644
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 NVIDIA Corporation & Affiliates
+ */
+
+#ifndef RTE_IB_H
+#define RTE_IB_H
+
+/**
+ * @file
+ *
+ * InfiniBand headers definitions
+ *
+ * The infiniBand headers are used by RoCE (RDMA over Converged Ethernet).
+ */
+
+#include <stdint.h>
+
+#include <rte_byteorder.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * InfiniBand Base Transport Header according to
+ * IB Specification Vol 1-Release-1.4.
+ */
+__extension__
+struct rte_ib_bth {
+ uint8_t opcode; /**< Opcode. */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ uint8_t tver:4; /**< Transport Header Version. */
+ uint8_t padcnt:2; /**< Pad Count. */
+ uint8_t m:1; /**< MigReq. */
+ uint8_t se:1; /**< Solicited Event. */
+#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ uint8_t se:1; /**< Solicited Event. */
+ uint8_t m:1; /**< MigReq. */
+ uint8_t padcnt:2; /**< Pad Count. */
+ uint8_t tver:4; /**< Transport Header Version. */
+#endif
+ rte_be16_t pkey; /**< Partition key. */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ uint8_t rsvd0:6; /**< Reserved. */
+ uint8_t b:1; /**< BECN. */
+ uint8_t f:1; /**< FECN. */
+#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ uint8_t f:1; /**< FECN. */
+ uint8_t b:1; /**< BECN. */
+ uint8_t rsvd0:6; /**< Reserved. */
+#endif
+ uint8_t dst_qp[3]; /**< Destination QP */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ uint8_t rsvd1:7; /**< Reserved. */
+ uint8_t a:1; /**< Acknowledge Request. */
+#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ uint8_t a:1; /**< Acknowledge Request. */
+ uint8_t rsvd1:7; /**< Reserved. */
+#endif
+ uint8_t psn[3]; /**< Packet Sequence Number */
+} __rte_packed;
+
+/** RoCEv2 default port. */
+#define RTE_ROCEV2_DEFAULT_PORT 4791
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_IB_H */