[v4,16/29] node: add ethdev Rx node

Message ID 20200405085613.1336841-17-jerinj@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series graph: introduce graph subsystem |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Jerin Jacob Kollanukkaran April 5, 2020, 8:56 a.m. UTC
  From: Nithin Dabilpuram <ndabilpuram@marvell.com>

Add source rte_node ethdev_rx process function and register
it. This node is a source node that will be called periodically
and when called, performs rte_eth_rx_burst() on a specific
(port, queue) pair and enqueue them as stream of objects to
next node.

Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
---
 lib/librte_node/Makefile         |   3 +-
 lib/librte_node/ethdev_rx.c      | 221 +++++++++++++++++++++++++++++++
 lib/librte_node/ethdev_rx_priv.h |  81 +++++++++++
 lib/librte_node/meson.build      |   4 +-
 4 files changed, 306 insertions(+), 3 deletions(-)
 create mode 100644 lib/librte_node/ethdev_rx.c
 create mode 100644 lib/librte_node/ethdev_rx_priv.h
  

Comments

Andrzej Ostruszka April 9, 2020, 11:05 p.m. UTC | #1
On 4/5/20 10:56 AM, jerinj@marvell.com wrote:
> From: Nithin Dabilpuram <ndabilpuram@marvell.com>
> 
> Add source rte_node ethdev_rx process function and register
> it. This node is a source node that will be called periodically
> and when called, performs rte_eth_rx_burst() on a specific
> (port, queue) pair and enqueue them as stream of objects to
> next node.
> 
> Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
[...]
> +/* Callback for soft ptype parsing */
> +static uint16_t
> +eth_pkt_parse_cb(uint16_t port, uint16_t queue, struct rte_mbuf **mbufs,
> +		 uint16_t nb_pkts, uint16_t max_pkts, void *user_param)
> +{
> +	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3;
> +	struct rte_ether_hdr *eth_hdr;
> +	uint16_t etype, n_left;
> +	struct rte_mbuf **pkts;
> +
> +	RTE_SET_USED(port);
> +	RTE_SET_USED(queue);
> +	RTE_SET_USED(max_pkts);
> +	RTE_SET_USED(user_param);
> +
> +	pkts = mbufs;
> +	n_left = nb_pkts;
> +	while (n_left >= 12) {
> +
> +		/* Prefetch next-next mbufs */
> +		rte_prefetch0(pkts[8]);
> +		rte_prefetch0(pkts[9]);
> +		rte_prefetch0(pkts[10]);
> +		rte_prefetch0(pkts[11]);
> +
> +		/* Prefetch next mbuf data */
> +		rte_prefetch0(
> +			rte_pktmbuf_mtod(pkts[4], struct rte_ether_hdr *));
> +		rte_prefetch0(
> +			rte_pktmbuf_mtod(pkts[5], struct rte_ether_hdr *));
> +		rte_prefetch0(
> +			rte_pktmbuf_mtod(pkts[6], struct rte_ether_hdr *));
> +		rte_prefetch0(
> +			rte_pktmbuf_mtod(pkts[7], struct rte_ether_hdr *));

I know this is software fallback only (and not likely to be used) but is
this aggressive prefetching always beneficial?  I guess you tested this
on octeon and it works, but if this is supposed to be standard RX node
then maybe this is not always good?

On the other hand if other platforms find that detrimental they can
submit some improvements later :)

> +
> +		mbuf0 = pkts[0];
> +		mbuf1 = pkts[1];
> +		mbuf2 = pkts[2];
> +		mbuf3 = pkts[3];
> +		pkts += 4;
> +		n_left -= 4;
> +
> +		/* Extract ptype of mbuf0 */
> +		eth_hdr = rte_pktmbuf_mtod(mbuf0, struct rte_ether_hdr *);
> +		etype = eth_hdr->ether_type;
> +		mbuf0->packet_type = l3_ptype(etype, 0);
> +
> +		/* Extract ptype of mbuf1 */
> +		eth_hdr = rte_pktmbuf_mtod(mbuf1, struct rte_ether_hdr *);
> +		etype = eth_hdr->ether_type;
> +		mbuf1->packet_type = l3_ptype(etype, 0);
> +
> +		/* Extract ptype of mbuf2 */
> +		eth_hdr = rte_pktmbuf_mtod(mbuf2, struct rte_ether_hdr *);
> +		etype = eth_hdr->ether_type;
> +		mbuf2->packet_type = l3_ptype(etype, 0);
> +
> +		/* Extract ptype of mbuf3 */
> +		eth_hdr = rte_pktmbuf_mtod(mbuf3, struct rte_ether_hdr *);
> +		etype = eth_hdr->ether_type;
> +		mbuf3->packet_type = l3_ptype(etype, 0);
> +	}
> +
> +	while (n_left > 0) {
> +		mbuf0 = pkts[0];
> +
> +		pkts += 1;
> +		n_left -= 1;
> +
> +		/* Extract ptype of mbuf0 */
> +		eth_hdr = rte_pktmbuf_mtod(mbuf0, struct rte_ether_hdr *);
> +		etype = eth_hdr->ether_type;
> +		mbuf0->packet_type = l3_ptype(etype, 0);
> +	}
> +
> +	return nb_pkts;
> +}
[...]

With regards
Andrzej Ostruszka
  
Nithin Dabilpuram April 10, 2020, 7 a.m. UTC | #2
On Fri, Apr 10, 2020 at 01:05:08AM +0200, Andrzej Ostruszka wrote:
> On 4/5/20 10:56 AM, jerinj@marvell.com wrote:
> > From: Nithin Dabilpuram <ndabilpuram@marvell.com>
> > 
> > Add source rte_node ethdev_rx process function and register
> > it. This node is a source node that will be called periodically
> > and when called, performs rte_eth_rx_burst() on a specific
> > (port, queue) pair and enqueue them as stream of objects to
> > next node.
> > 
> > Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
> [...]
> > +/* Callback for soft ptype parsing */
> > +static uint16_t
> > +eth_pkt_parse_cb(uint16_t port, uint16_t queue, struct rte_mbuf **mbufs,
> > +		 uint16_t nb_pkts, uint16_t max_pkts, void *user_param)
> > +{
> > +	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3;
> > +	struct rte_ether_hdr *eth_hdr;
> > +	uint16_t etype, n_left;
> > +	struct rte_mbuf **pkts;
> > +
> > +	RTE_SET_USED(port);
> > +	RTE_SET_USED(queue);
> > +	RTE_SET_USED(max_pkts);
> > +	RTE_SET_USED(user_param);
> > +
> > +	pkts = mbufs;
> > +	n_left = nb_pkts;
> > +	while (n_left >= 12) {
> > +
> > +		/* Prefetch next-next mbufs */
> > +		rte_prefetch0(pkts[8]);
> > +		rte_prefetch0(pkts[9]);
> > +		rte_prefetch0(pkts[10]);
> > +		rte_prefetch0(pkts[11]);
> > +
> > +		/* Prefetch next mbuf data */
> > +		rte_prefetch0(
> > +			rte_pktmbuf_mtod(pkts[4], struct rte_ether_hdr *));
> > +		rte_prefetch0(
> > +			rte_pktmbuf_mtod(pkts[5], struct rte_ether_hdr *));
> > +		rte_prefetch0(
> > +			rte_pktmbuf_mtod(pkts[6], struct rte_ether_hdr *));
> > +		rte_prefetch0(
> > +			rte_pktmbuf_mtod(pkts[7], struct rte_ether_hdr *));
> 
> I know this is software fallback only (and not likely to be used) but is
> this aggressive prefetching always beneficial?  I guess you tested this
> on octeon and it works, but if this is supposed to be standard RX node
> then maybe this is not always good?
> 
> On the other hand if other platforms find that detrimental they can
> submit some improvements later :)

I tested it now in octeon and there is 6% increasing when using prefetch based while loop
instead of non-prefetch based while loop.

Yes, it is not intended to be used normally, but I just followed ideology of prefetch ahead 
before use. It could be changed later if needed for other platforms or split into platform 
dependent implementation like lookup node.

> 
> > +
> > +		mbuf0 = pkts[0];
> > +		mbuf1 = pkts[1];
> > +		mbuf2 = pkts[2];
> > +		mbuf3 = pkts[3];
> > +		pkts += 4;
> > +		n_left -= 4;
> > +
> > +		/* Extract ptype of mbuf0 */
> > +		eth_hdr = rte_pktmbuf_mtod(mbuf0, struct rte_ether_hdr *);
> > +		etype = eth_hdr->ether_type;
> > +		mbuf0->packet_type = l3_ptype(etype, 0);
> > +
> > +		/* Extract ptype of mbuf1 */
> > +		eth_hdr = rte_pktmbuf_mtod(mbuf1, struct rte_ether_hdr *);
> > +		etype = eth_hdr->ether_type;
> > +		mbuf1->packet_type = l3_ptype(etype, 0);
> > +
> > +		/* Extract ptype of mbuf2 */
> > +		eth_hdr = rte_pktmbuf_mtod(mbuf2, struct rte_ether_hdr *);
> > +		etype = eth_hdr->ether_type;
> > +		mbuf2->packet_type = l3_ptype(etype, 0);
> > +
> > +		/* Extract ptype of mbuf3 */
> > +		eth_hdr = rte_pktmbuf_mtod(mbuf3, struct rte_ether_hdr *);
> > +		etype = eth_hdr->ether_type;
> > +		mbuf3->packet_type = l3_ptype(etype, 0);
> > +	}
> > +
> > +	while (n_left > 0) {
> > +		mbuf0 = pkts[0];
> > +
> > +		pkts += 1;
> > +		n_left -= 1;
> > +
> > +		/* Extract ptype of mbuf0 */
> > +		eth_hdr = rte_pktmbuf_mtod(mbuf0, struct rte_ether_hdr *);
> > +		etype = eth_hdr->ether_type;
> > +		mbuf0->packet_type = l3_ptype(etype, 0);
> > +	}
> > +
> > +	return nb_pkts;
> > +}
> [...]
> 
> With regards
> Andrzej Ostruszka
  

Patch

diff --git a/lib/librte_node/Makefile b/lib/librte_node/Makefile
index dbc8e1d44..314149385 100644
--- a/lib/librte_node/Makefile
+++ b/lib/librte_node/Makefile
@@ -11,12 +11,13 @@  CFLAGS += -O3 -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS)
 # Strict-aliasing rules are violated by uint8_t[] to context size casts.
 CFLAGS += -fno-strict-aliasing
-LDLIBS += -lrte_eal -lrte_graph
+LDLIBS += -lrte_eal -lrte_graph -lrte_ethdev
 
 EXPORT_MAP := rte_node_version.map
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_NODE) += null.c
 SRCS-$(CONFIG_RTE_LIBRTE_NODE) += log.c
+SRCS-$(CONFIG_RTE_LIBRTE_NODE) += ethdev_rx.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_node/ethdev_rx.c b/lib/librte_node/ethdev_rx.c
new file mode 100644
index 000000000..5cc736598
--- /dev/null
+++ b/lib/librte_node/ethdev_rx.c
@@ -0,0 +1,221 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <rte_debug.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_graph.h>
+#include <rte_graph_worker.h>
+#include <rte_mbuf.h>
+
+#include "ethdev_rx_priv.h"
+#include "node_private.h"
+
+static struct ethdev_rx_node_main ethdev_rx_main;
+
+static __rte_always_inline uint16_t
+ethdev_rx_node_process_inline(struct rte_graph *graph, struct rte_node *node,
+			      uint16_t port, uint16_t queue)
+{
+	uint16_t count, next_index = ETHDEV_RX_NEXT_IP4_LOOKUP;
+
+	/* Get pkts from port */
+	count = rte_eth_rx_burst(port, queue, (struct rte_mbuf **)node->objs,
+				 RTE_GRAPH_BURST_SIZE);
+
+	if (!count)
+		return 0;
+	node->idx = count;
+	/* Enqueue to next node */
+	rte_node_next_stream_move(graph, node, next_index);
+
+	return count;
+}
+
+static __rte_always_inline uint16_t
+ethdev_rx_node_process(struct rte_graph *graph, struct rte_node *node,
+		       void **objs, uint16_t cnt)
+{
+	ethdev_rx_node_ctx_t *ctx = (ethdev_rx_node_ctx_t *)node->ctx;
+	uint16_t n_pkts = 0;
+
+	RTE_SET_USED(objs);
+	RTE_SET_USED(cnt);
+
+	n_pkts = ethdev_rx_node_process_inline(graph, node, ctx->port_id,
+					       ctx->queue_id);
+	return n_pkts;
+}
+
+static inline uint32_t
+l3_ptype(uint16_t etype, uint32_t ptype)
+{
+	ptype = ptype & ~RTE_PTYPE_L3_MASK;
+	if (etype == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
+		ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+	else if (etype == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6))
+		ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+	return ptype;
+}
+
+/* Callback for soft ptype parsing */
+static uint16_t
+eth_pkt_parse_cb(uint16_t port, uint16_t queue, struct rte_mbuf **mbufs,
+		 uint16_t nb_pkts, uint16_t max_pkts, void *user_param)
+{
+	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3;
+	struct rte_ether_hdr *eth_hdr;
+	uint16_t etype, n_left;
+	struct rte_mbuf **pkts;
+
+	RTE_SET_USED(port);
+	RTE_SET_USED(queue);
+	RTE_SET_USED(max_pkts);
+	RTE_SET_USED(user_param);
+
+	pkts = mbufs;
+	n_left = nb_pkts;
+	while (n_left >= 12) {
+
+		/* Prefetch next-next mbufs */
+		rte_prefetch0(pkts[8]);
+		rte_prefetch0(pkts[9]);
+		rte_prefetch0(pkts[10]);
+		rte_prefetch0(pkts[11]);
+
+		/* Prefetch next mbuf data */
+		rte_prefetch0(
+			rte_pktmbuf_mtod(pkts[4], struct rte_ether_hdr *));
+		rte_prefetch0(
+			rte_pktmbuf_mtod(pkts[5], struct rte_ether_hdr *));
+		rte_prefetch0(
+			rte_pktmbuf_mtod(pkts[6], struct rte_ether_hdr *));
+		rte_prefetch0(
+			rte_pktmbuf_mtod(pkts[7], struct rte_ether_hdr *));
+
+		mbuf0 = pkts[0];
+		mbuf1 = pkts[1];
+		mbuf2 = pkts[2];
+		mbuf3 = pkts[3];
+		pkts += 4;
+		n_left -= 4;
+
+		/* Extract ptype of mbuf0 */
+		eth_hdr = rte_pktmbuf_mtod(mbuf0, struct rte_ether_hdr *);
+		etype = eth_hdr->ether_type;
+		mbuf0->packet_type = l3_ptype(etype, 0);
+
+		/* Extract ptype of mbuf1 */
+		eth_hdr = rte_pktmbuf_mtod(mbuf1, struct rte_ether_hdr *);
+		etype = eth_hdr->ether_type;
+		mbuf1->packet_type = l3_ptype(etype, 0);
+
+		/* Extract ptype of mbuf2 */
+		eth_hdr = rte_pktmbuf_mtod(mbuf2, struct rte_ether_hdr *);
+		etype = eth_hdr->ether_type;
+		mbuf2->packet_type = l3_ptype(etype, 0);
+
+		/* Extract ptype of mbuf3 */
+		eth_hdr = rte_pktmbuf_mtod(mbuf3, struct rte_ether_hdr *);
+		etype = eth_hdr->ether_type;
+		mbuf3->packet_type = l3_ptype(etype, 0);
+	}
+
+	while (n_left > 0) {
+		mbuf0 = pkts[0];
+
+		pkts += 1;
+		n_left -= 1;
+
+		/* Extract ptype of mbuf0 */
+		eth_hdr = rte_pktmbuf_mtod(mbuf0, struct rte_ether_hdr *);
+		etype = eth_hdr->ether_type;
+		mbuf0->packet_type = l3_ptype(etype, 0);
+	}
+
+	return nb_pkts;
+}
+
+#define MAX_PTYPES 16
+static int
+ethdev_ptype_setup(uint16_t port, uint16_t queue)
+{
+	uint8_t l3_ipv4 = 0, l3_ipv6 = 0;
+	uint32_t ptypes[MAX_PTYPES];
+	int i, rc;
+
+	/* Check IPv4 & IPv6 ptype support */
+	rc = rte_eth_dev_get_supported_ptypes(port, RTE_PTYPE_L3_MASK, ptypes,
+					      MAX_PTYPES);
+	for (i = 0; i < rc; i++) {
+		if (ptypes[i] & RTE_PTYPE_L3_IPV4)
+			l3_ipv4 = 1;
+		if (ptypes[i] & RTE_PTYPE_L3_IPV6)
+			l3_ipv6 = 1;
+	}
+
+	if (!l3_ipv4 || !l3_ipv6) {
+		node_info("ethdev_rx",
+			  "Enabling ptype callback for required ptypes on port %u\n",
+			  port);
+
+		if (!rte_eth_add_rx_callback(port, queue, eth_pkt_parse_cb,
+					     NULL)) {
+			node_err("ethdev_rx",
+				 "Failed to add rx ptype cb: port=%d, queue=%d\n",
+				 port, queue);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int
+ethdev_rx_node_init(const struct rte_graph *graph, struct rte_node *node)
+{
+	ethdev_rx_node_ctx_t *ctx = (ethdev_rx_node_ctx_t *)node->ctx;
+	ethdev_rx_node_elem_t *elem = ethdev_rx_main.head;
+
+	RTE_SET_USED(graph);
+
+	while (elem) {
+		if (elem->nid == node->id) {
+			/* Update node specific context */
+			memcpy(ctx, &elem->ctx, sizeof(ethdev_rx_node_ctx_t));
+			break;
+		}
+		elem = elem->next;
+	}
+
+	RTE_VERIFY(elem != NULL);
+
+	/* Check and setup ptype */
+	return ethdev_ptype_setup(ctx->port_id, ctx->queue_id);
+}
+
+struct ethdev_rx_node_main *
+ethdev_rx_get_node_data_get(void)
+{
+	return &ethdev_rx_main;
+}
+
+static struct rte_node_register ethdev_rx_node_base = {
+	.process = ethdev_rx_node_process,
+	.flags = RTE_NODE_SOURCE_F,
+	.name = "ethdev_rx",
+
+	.init = ethdev_rx_node_init,
+
+	.nb_edges = ETHDEV_RX_NEXT_MAX,
+	.next_nodes = {[ETHDEV_RX_NEXT_IP4_LOOKUP] = "ip4_lookup"},
+};
+
+struct rte_node_register *
+ethdev_rx_node_get(void)
+{
+	return &ethdev_rx_node_base;
+}
+
+RTE_NODE_REGISTER(ethdev_rx_node_base);
diff --git a/lib/librte_node/ethdev_rx_priv.h b/lib/librte_node/ethdev_rx_priv.h
new file mode 100644
index 000000000..2d7195a36
--- /dev/null
+++ b/lib/librte_node/ethdev_rx_priv.h
@@ -0,0 +1,81 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+#ifndef __INCLUDE_ETHDEV_RX_PRIV_H__
+#define __INCLUDE_ETHDEV_RX_PRIV_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+
+struct ethdev_rx_node_elem;
+struct ethdev_rx_node_ctx;
+typedef struct ethdev_rx_node_elem ethdev_rx_node_elem_t;
+typedef struct ethdev_rx_node_ctx ethdev_rx_node_ctx_t;
+
+/**
+ * @internal
+ *
+ * Ethernet device Rx node context structure.
+ */
+struct ethdev_rx_node_ctx {
+	uint16_t port_id;  /**< Port identifier of the Rx node. */
+	uint16_t queue_id; /**< Queue identifier of the Rx node. */
+};
+
+/**
+ * @internal
+ *
+ * Ethernet device Rx node list element structure.
+ */
+struct ethdev_rx_node_elem {
+	struct ethdev_rx_node_elem *next;
+	/**< Pointer to the next Rx node element. */
+	struct ethdev_rx_node_ctx ctx;
+	/**< Rx node context. */
+	rte_node_t nid;
+	/**< Node identifier of the Rx node. */
+};
+
+enum ethdev_rx_next_nodes {
+	ETHDEV_RX_NEXT_IP4_LOOKUP,
+	ETHDEV_RX_NEXT_MAX,
+};
+
+/**
+ * @internal
+ *
+ * Ethernet Rx node main structure.
+ */
+struct ethdev_rx_node_main {
+	ethdev_rx_node_elem_t *head;
+	/**< Pointer to the head Rx node element. */
+};
+
+/**
+ * @internal
+ *
+ * Get the Ethernet Rx node data.
+ *
+ * @return
+ *   Pointer to Ethernet Rx node data.
+ */
+struct ethdev_rx_node_main *ethdev_rx_get_node_data_get(void);
+
+/**
+ * @internal
+ *
+ * Get the Ethernet Rx node.
+ *
+ * @retrun
+ *   Pointer to the Ethernet Rx node.
+ */
+struct rte_node_register *ethdev_rx_node_get(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_ETHDEV_RX_PRIV_H__ */
diff --git a/lib/librte_node/meson.build b/lib/librte_node/meson.build
index a97813ad4..94caa6c23 100644
--- a/lib/librte_node/meson.build
+++ b/lib/librte_node/meson.build
@@ -1,8 +1,8 @@ 
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(C) 2020 Marvell International Ltd.
 
-sources = files('null.c', 'log.c')
+sources = files('null.c', 'log.c', 'ethdev_rx.c')
 allow_experimental_apis = true
 # Strict-aliasing rules are violated by uint8_t[] to context size casts.
 cflags += '-fno-strict-aliasing'
-deps += ['graph']
+deps += ['graph', 'ethdev']