[RFC,v2,1/3] ethdev: introduce protocol type based header split

Message ID 20220322035629.18756-2-xuan.ding@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series ethdev: introduce protocol type based header split |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Ding, Xuan March 22, 2022, 3:56 a.m. UTC
  From: Xuan Ding <xuan.ding@intel.com>

Header split consists of splitting a received packet into two separate
regions based on the packet content. The split happens after the
packet header and before the packet payload. Splitting is usually between
the packet header that can be posted to a dedicated buffer and the packet
payload that can be posted to a different buffer.

Currently, Rx buffer split supports length and offset based packet split.
Although header split is a subset of buffer split, configure buffer split
based on length and offset is not suitable for NICs that do split based on
header protocol types. And tunneling makes the conversion from offset to
protocol impossible.

This patch extends the current buffer split to support protocol based
header split. A new proto field is introduced in the rte_eth_rxseg_split
structure reserved field to specify header protocol type. With Rx offload
flag RTE_ETH_RX_OFFLOAD_HEADER_SPLIT enabled and protocol type configured,
PMD will split the ingress packets into two separate regions. Currently,
both inner and outer L2/L3/L4 level header split can be supported.

For example, let's suppose we configured the Rx queue with the
following segments:
    seg0 - pool0
    seg1 - pool1

With header split type configured with RTE_ETH_RX_HEADER_SPLIT_UDP,
the packet consists of MAC_IP_UDP_PAYLOAD will be split like following:
    seg0 - pool0, udp_header
    seg1 - pool1, payload

The memory attributes for the split parts may differ either - for example
the mempool0 and mempool1 belong to dpdk memory and external memory,
respectively.

Signed-off-by: Xuan Ding <xuan.ding@intel.com>
Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
---
 lib/ethdev/rte_ethdev.c | 24 +++++++++++++----------
 lib/ethdev/rte_ethdev.h | 43 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 55 insertions(+), 12 deletions(-)
  

Comments

Qi Zhang March 22, 2022, 7:14 a.m. UTC | #1
> -----Original Message-----
> From: Ding, Xuan <xuan.ding@intel.com>
> Sent: Tuesday, March 22, 2022 11:56 AM
> To: thomas@monjalon.net; Yigit, Ferruh <ferruh.yigit@intel.com>;
> andrew.rybchenko@oktetlabs.ru
> Cc: dev@dpdk.org; stephen@networkplumber.org;
> mb@smartsharesystems.com; viacheslavo@nvidia.com; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Yu, Ping <ping.yu@intel.com>; Wu, WenxuanX
> <wenxuanx.wu@intel.com>; Ding, Xuan <xuan.ding@intel.com>; Wang,
> YuanX <yuanx.wang@intel.com>
> Subject: [RFC,v2 1/3] ethdev: introduce protocol type based header split
> 
> From: Xuan Ding <xuan.ding@intel.com>
> 
> Header split consists of splitting a received packet into two separate regions
> based on the packet content. The split happens after the packet header and
> before the packet payload. Splitting is usually between the packet header
> that can be posted to a dedicated buffer and the packet payload that can be
> posted to a different buffer.
> 
> Currently, Rx buffer split supports length and offset based packet split.
> Although header split is a subset of buffer split, configure buffer split based
> on length and offset is not suitable for NICs that do split based on header
> protocol types. And tunneling makes the conversion from offset to protocol
> impossible.
> 
> This patch extends the current buffer split to support protocol based header
> split. A new proto field is introduced in the rte_eth_rxseg_split structure
> reserved field to specify header protocol type. With Rx offload flag
> RTE_ETH_RX_OFFLOAD_HEADER_SPLIT enabled and protocol type
> configured, PMD will split the ingress packets into two separate regions.
> Currently, both inner and outer L2/L3/L4 level header split can be supported.
> 
> For example, let's suppose we configured the Rx queue with the following
> segments:
>     seg0 - pool0
>     seg1 - pool1
> 
> With header split type configured with RTE_ETH_RX_HEADER_SPLIT_UDP,
> the packet consists of MAC_IP_UDP_PAYLOAD will be split like following:
>     seg0 - pool0, udp_header
>     seg1 - pool1, payload
> 
> The memory attributes for the split parts may differ either - for example the
> mempool0 and mempool1 belong to dpdk memory and external memory,
> respectively.
> 
> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> ---
>  lib/ethdev/rte_ethdev.c | 24 +++++++++++++----------
> lib/ethdev/rte_ethdev.h | 43
> +++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 55 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> 70c850a2f1..49c8fef1c3 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1661,6 +1661,7 @@ rte_eth_rx_queue_check_split(const struct
> rte_eth_rxseg_split *rx_seg,
>  		struct rte_mempool *mpl = rx_seg[seg_idx].mp;
>  		uint32_t length = rx_seg[seg_idx].length;
>  		uint32_t offset = rx_seg[seg_idx].offset;
> +		uint16_t proto = rx_seg[seg_idx].proto;
> 
>  		if (mpl == NULL) {
>  			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
> @@ -1692,15 +1693,17 @@ rte_eth_rx_queue_check_split(const struct
> rte_eth_rxseg_split *rx_seg,
>  					(struct rte_pktmbuf_pool_private));
>  			return -ENOSPC;
>  		}
> -		offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
> -		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
> -		length = length != 0 ? length : *mbp_buf_size;
> -		if (*mbp_buf_size < length + offset) {
> -			RTE_ETHDEV_LOG(ERR,
> -				       "%s mbuf_data_room_size %u < %u
> (segment length=%u + segment offset=%u)\n",
> -				       mpl->name, *mbp_buf_size,
> -				       length + offset, length, offset);
> -			return -EINVAL;
> +		if (proto == 0) {
> +			offset += seg_idx != 0 ? 0 :
> RTE_PKTMBUF_HEADROOM;
> +			*mbp_buf_size =
> rte_pktmbuf_data_room_size(mpl);
> +			length = length != 0 ? length : *mbp_buf_size;
> +			if (*mbp_buf_size < length + offset) {
> +				RTE_ETHDEV_LOG(ERR,
> +					"%s mbuf_data_room_size %u < %u
> (segment length=%u + segment offset=%u)\n",
> +					mpl->name, *mbp_buf_size,
> +					length + offset, length, offset);
> +				return -EINVAL;
> +			}
>  		}

As the length and proto is exclusive, it better also check the length when proto!=0

.....

> @@ -1197,12 +1197,26 @@ struct rte_eth_txmode {
>   *     - pool from the last valid element
>   *     - the buffer size from this pool
>   *     - zero offset
> + *
> + * Header split is a subset of buffer split. The split happens after
> + the
> + * packet header and before the packet payload. For PMDs that do not
> + * support header split configuration by length and offset, the
> + location
> + * of the split needs to be specified by the header protocol type.
> + While for
> + * buffer split, this field should not be configured.
> + *
> + * If RTE_ETH_RX_OFFLOAD_HEADER_SPLIT flag is set in offloads field,
> + * the PMD will split the received packets into two separate regions:
> + * - The header buffer will be allocated from the memory pool,
> + *   specified in the first array element, the second buffer, from the
> + *   pool in the second element.
> + * - The length and offset do not need to be configured in header split.

We may not necessarily ignore the offset configure for header split as there is no confliction, a driver still can support copying a split header to a specific mbuf offset
And if we support offset with header split, offset boundary check can also be considered in rte_eth_rx_queue_check_split

Regards
Qi
  
Ding, Xuan March 22, 2022, 7:43 a.m. UTC | #2
Hi Qi,

> -----Original Message-----
> From: Zhang, Qi Z <qi.z.zhang@intel.com>
> Sent: Tuesday, March 22, 2022 3:14 PM
> To: Ding, Xuan <xuan.ding@intel.com>; thomas@monjalon.net; Yigit, Ferruh
> <ferruh.yigit@intel.com>; andrew.rybchenko@oktetlabs.ru
> Cc: dev@dpdk.org; stephen@networkplumber.org;
> mb@smartsharesystems.com; viacheslavo@nvidia.com; Yu, Ping
> <ping.yu@intel.com>; Wu, WenxuanX <wenxuanx.wu@intel.com>; Wang,
> YuanX <yuanx.wang@intel.com>
> Subject: RE: [RFC,v2 1/3] ethdev: introduce protocol type based header split
> 
> 
> 
> > -----Original Message-----
> > From: Ding, Xuan <xuan.ding@intel.com>
> > Sent: Tuesday, March 22, 2022 11:56 AM
> > To: thomas@monjalon.net; Yigit, Ferruh <ferruh.yigit@intel.com>;
> > andrew.rybchenko@oktetlabs.ru
> > Cc: dev@dpdk.org; stephen@networkplumber.org;
> > mb@smartsharesystems.com; viacheslavo@nvidia.com; Zhang, Qi Z
> > <qi.z.zhang@intel.com>; Yu, Ping <ping.yu@intel.com>; Wu, WenxuanX
> > <wenxuanx.wu@intel.com>; Ding, Xuan <xuan.ding@intel.com>; Wang,
> YuanX
> > <yuanx.wang@intel.com>
> > Subject: [RFC,v2 1/3] ethdev: introduce protocol type based header
> > split
> >
> > From: Xuan Ding <xuan.ding@intel.com>
> >
> > Header split consists of splitting a received packet into two separate
> > regions based on the packet content. The split happens after the
> > packet header and before the packet payload. Splitting is usually
> > between the packet header that can be posted to a dedicated buffer and
> > the packet payload that can be posted to a different buffer.
> >
> > Currently, Rx buffer split supports length and offset based packet split.
> > Although header split is a subset of buffer split, configure buffer
> > split based on length and offset is not suitable for NICs that do
> > split based on header protocol types. And tunneling makes the
> > conversion from offset to protocol impossible.
> >
> > This patch extends the current buffer split to support protocol based
> > header split. A new proto field is introduced in the
> > rte_eth_rxseg_split structure reserved field to specify header
> > protocol type. With Rx offload flag RTE_ETH_RX_OFFLOAD_HEADER_SPLIT
> > enabled and protocol type configured, PMD will split the ingress packets
> into two separate regions.
> > Currently, both inner and outer L2/L3/L4 level header split can be
> supported.
> >
> > For example, let's suppose we configured the Rx queue with the
> > following
> > segments:
> >     seg0 - pool0
> >     seg1 - pool1
> >
> > With header split type configured with RTE_ETH_RX_HEADER_SPLIT_UDP,
> > the packet consists of MAC_IP_UDP_PAYLOAD will be split like following:
> >     seg0 - pool0, udp_header
> >     seg1 - pool1, payload
> >
> > The memory attributes for the split parts may differ either - for
> > example the
> > mempool0 and mempool1 belong to dpdk memory and external memory,
> > respectively.
> >
> > Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> > Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> > ---
> >  lib/ethdev/rte_ethdev.c | 24 +++++++++++++----------
> > lib/ethdev/rte_ethdev.h | 43
> > +++++++++++++++++++++++++++++++++++++++--
> >  2 files changed, 55 insertions(+), 12 deletions(-)
> >
> > diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> > 70c850a2f1..49c8fef1c3 100644
> > --- a/lib/ethdev/rte_ethdev.c
> > +++ b/lib/ethdev/rte_ethdev.c
> > @@ -1661,6 +1661,7 @@ rte_eth_rx_queue_check_split(const struct
> > rte_eth_rxseg_split *rx_seg,  struct rte_mempool *mpl =
> > rx_seg[seg_idx].mp;  uint32_t length = rx_seg[seg_idx].length;
> > uint32_t offset = rx_seg[seg_idx].offset;
> > +uint16_t proto = rx_seg[seg_idx].proto;
> >
> >  if (mpl == NULL) {
> >  RTE_ETHDEV_LOG(ERR, "null mempool pointer\n"); @@ -1692,15
> +1693,17
> > @@ rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split
> > *rx_seg,  (struct rte_pktmbuf_pool_private));  return -ENOSPC;  }
> > -offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM; -*mbp_buf_size =
> > rte_pktmbuf_data_room_size(mpl); -length = length != 0 ? length :
> > *mbp_buf_size; -if (*mbp_buf_size < length + offset) {
> > -RTE_ETHDEV_LOG(ERR,
> > -       "%s mbuf_data_room_size %u < %u
> > (segment length=%u + segment offset=%u)\n",
> > -       mpl->name, *mbp_buf_size,
> > -       length + offset, length, offset);
> > -return -EINVAL;
> > +if (proto == 0) {
> > +offset += seg_idx != 0 ? 0 :
> > RTE_PKTMBUF_HEADROOM;
> > +*mbp_buf_size =
> > rte_pktmbuf_data_room_size(mpl);
> > +length = length != 0 ? length : *mbp_buf_size; if (*mbp_buf_size <
> > +length + offset) { RTE_ETHDEV_LOG(ERR, "%s mbuf_data_room_size %u <
> > +%u
> > (segment length=%u + segment offset=%u)\n",
> > +mpl->name, *mbp_buf_size,
> > +length + offset, length, offset);
> > +return -EINVAL;
> > +}
> >  }
> 
> As the length and proto is exclusive, it better also check the length when
> proto!=0

Thanks for your comments, will fix it in next version.

> 
> .....
> 
> > @@ -1197,12 +1197,26 @@ struct rte_eth_txmode {
> >   *     - pool from the last valid element
> >   *     - the buffer size from this pool
> >   *     - zero offset
> > + *
> > + * Header split is a subset of buffer split. The split happens after
> > + the
> > + * packet header and before the packet payload. For PMDs that do not
> > + * support header split configuration by length and offset, the
> > + location
> > + * of the split needs to be specified by the header protocol type.
> > + While for
> > + * buffer split, this field should not be configured.
> > + *
> > + * If RTE_ETH_RX_OFFLOAD_HEADER_SPLIT flag is set in offloads field,
> > + * the PMD will split the received packets into two separate regions:
> > + * - The header buffer will be allocated from the memory pool,
> > + *   specified in the first array element, the second buffer, from the
> > + *   pool in the second element.
> > + * - The length and offset do not need to be configured in header split.
> 
> We may not necessarily ignore the offset configure for header split as there
> is no confliction, a driver still can support copying a split header to a specific
> mbuf offset And if we support offset with header split, offset boundary check
> can also be considered in rte_eth_rx_queue_check_split

You are right. Only length and proto is exclusive in between buffer split and header split.
Will update in next version.

Thanks,
Xuan

> 
> Regards
> Qi
> 
>
  

Patch

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 70c850a2f1..49c8fef1c3 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1661,6 +1661,7 @@  rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 		struct rte_mempool *mpl = rx_seg[seg_idx].mp;
 		uint32_t length = rx_seg[seg_idx].length;
 		uint32_t offset = rx_seg[seg_idx].offset;
+		uint16_t proto = rx_seg[seg_idx].proto;
 
 		if (mpl == NULL) {
 			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
@@ -1692,15 +1693,17 @@  rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 					(struct rte_pktmbuf_pool_private));
 			return -ENOSPC;
 		}
-		offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
-		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
-		length = length != 0 ? length : *mbp_buf_size;
-		if (*mbp_buf_size < length + offset) {
-			RTE_ETHDEV_LOG(ERR,
-				       "%s mbuf_data_room_size %u < %u (segment length=%u + segment offset=%u)\n",
-				       mpl->name, *mbp_buf_size,
-				       length + offset, length, offset);
-			return -EINVAL;
+		if (proto == 0) {
+			offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
+			*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
+			length = length != 0 ? length : *mbp_buf_size;
+			if (*mbp_buf_size < length + offset) {
+				RTE_ETHDEV_LOG(ERR,
+					"%s mbuf_data_room_size %u < %u (segment length=%u + segment offset=%u)\n",
+					mpl->name, *mbp_buf_size,
+					length + offset, length, offset);
+				return -EINVAL;
+			}
 		}
 	}
 	return 0;
@@ -1778,7 +1781,8 @@  rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
 		n_seg = rx_conf->rx_nseg;
 
-		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT ||
+			rx_conf->offloads & RTE_ETH_RX_OFFLOAD_HEADER_SPLIT) {
 			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
 							   &mbp_buf_size,
 							   &dev_info);
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index c2d1f9a972..6d66de316c 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1197,12 +1197,26 @@  struct rte_eth_txmode {
  *     - pool from the last valid element
  *     - the buffer size from this pool
  *     - zero offset
+ *
+ * Header split is a subset of buffer split. The split happens after the
+ * packet header and before the packet payload. For PMDs that do not
+ * support header split configuration by length and offset, the location
+ * of the split needs to be specified by the header protocol type. While for
+ * buffer split, this field should not be configured.
+ *
+ * If RTE_ETH_RX_OFFLOAD_HEADER_SPLIT flag is set in offloads field,
+ * the PMD will split the received packets into two separate regions:
+ * - The header buffer will be allocated from the memory pool,
+ *   specified in the first array element, the second buffer, from the
+ *   pool in the second element.
+ * - The length and offset do not need to be configured in header split.
  */
 struct rte_eth_rxseg_split {
 	struct rte_mempool *mp; /**< Memory pool to allocate segment from. */
 	uint16_t length; /**< Segment data length, configures split point. */
 	uint16_t offset; /**< Data offset from beginning of mbuf data buffer. */
-	uint32_t reserved; /**< Reserved field. */
+	uint16_t proto; /**< header protocol type, configures header split point. */
+	uint16_t reserved; /**< Reserved field. */
 };
 
 /**
@@ -1212,7 +1226,7 @@  struct rte_eth_rxseg_split {
  * A common structure used to describe Rx packet segment properties.
  */
 union rte_eth_rxseg {
-	/* The settings for buffer split offload. */
+	/* The settings for buffer split and header split offload. */
 	struct rte_eth_rxseg_split split;
 	/* The other features settings should be added here. */
 };
@@ -1664,6 +1678,31 @@  struct rte_eth_conf {
 			     RTE_ETH_RX_OFFLOAD_QINQ_STRIP)
 #define DEV_RX_OFFLOAD_VLAN RTE_DEPRECATED(DEV_RX_OFFLOAD_VLAN) RTE_ETH_RX_OFFLOAD_VLAN
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this enumaration may change without prior notice.
+ * This enum indicates the header split protocol type
+ */
+enum rte_eth_rx_header_split_protocol_type {
+	RTE_ETH_RX_HEADER_SPLIT_NONE = 0,
+	RTE_ETH_RX_HEADER_SPLIT_MAC,
+	RTE_ETH_RX_HEADER_SPLIT_IPV4,
+	RTE_ETH_RX_HEADER_SPLIT_IPV6,
+	RTE_ETH_RX_HEADER_SPLIT_L3,
+	RTE_ETH_RX_HEADER_SPLIT_TCP,
+	RTE_ETH_RX_HEADER_SPLIT_UDP,
+	RTE_ETH_RX_HEADER_SPLIT_SCTP,
+	RTE_ETH_RX_HEADER_SPLIT_L4,
+	RTE_ETH_RX_HEADER_SPLIT_INNER_MAC,
+	RTE_ETH_RX_HEADER_SPLIT_INNER_IPV4,
+	RTE_ETH_RX_HEADER_SPLIT_INNER_IPV6,
+	RTE_ETH_RX_HEADER_SPLIT_INNER_L3,
+	RTE_ETH_RX_HEADER_SPLIT_INNER_TCP,
+	RTE_ETH_RX_HEADER_SPLIT_INNER_UDP,
+	RTE_ETH_RX_HEADER_SPLIT_INNER_SCTP,
+	RTE_ETH_RX_HEADER_SPLIT_INNER_L4,
+};
+
 /*
  * If new Rx offload capabilities are defined, they also must be
  * mentioned in rte_rx_offload_names in rte_ethdev.c file.