Message ID | 20220617074241.3260496-1-rbhansali@marvell.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | akhil goyal |
Headers | show |
Series | [v2,1/2] examples/l3fwd: common packet group functionality | expand |
Context | Check | Description |
---|---|---|
ci/checkpatch | success | coding style OK |
CC: Konstantin Ananyev > -----Original Message----- > From: Rahul Bhansali <rbhansali@marvell.com> > Sent: Friday, June 17, 2022 1:13 PM > To: dev@dpdk.org; Ruifeng Wang <ruifeng.wang@arm.com> > Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Rahul Bhansali > <rbhansali@marvell.com> > Subject: [PATCH v2 1/2] examples/l3fwd: common packet group functionality > > This will make the packet grouping function common, so that other examples > can utilize as per need. > > Signed-off-by: Rahul Bhansali <rbhansali@marvell.com> > --- > Changes in v2: New patch to address review comment. > > examples/common/neon_common.h | 50 ++++++++++++ > examples/common/pkt_group.h | 139 > ++++++++++++++++++++++++++++++++++ > examples/l3fwd/Makefile | 5 +- > examples/l3fwd/l3fwd.h | 2 - > examples/l3fwd/l3fwd_common.h | 129 +------------------------------ > examples/l3fwd/l3fwd_neon.h | 43 +---------- > examples/meson.build | 2 +- > 7 files changed, 198 insertions(+), 172 deletions(-) create mode 100644 > examples/common/neon_common.h create mode 100644 > examples/common/pkt_group.h > > diff --git a/examples/common/neon_common.h > b/examples/common/neon_common.h new file mode 100644 index > 0000000000..f01b5ab6bc > --- /dev/null > +++ b/examples/common/neon_common.h > @@ -0,0 +1,50 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2016-2018 Intel Corporation. > + * Copyright(c) 2017-2018 Linaro Limited. > + * Copyright(C) 2022 Marvell. > + */ > + > +#ifndef _NEON_COMMON_H_ > +#define _NEON_COMMON_H_ > + > +#include "pkt_group.h" > + > +/* > + * Group consecutive packets with the same destination port in bursts of 4. > + * Suppose we have array of destination ports: > + * dst_port[] = {a, b, c, d,, e, ... } > + * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. > + * We doing 4 comparisons at once and the result is 4 bit mask. > + * This mask is used as an index into prebuild array of pnum values. > + */ > +static inline uint16_t * > +neon_port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1, > + uint16x8_t dp2) > +{ > + union { > + uint16_t u16[FWDSTEP + 1]; > + uint64_t u64; > + } *pnum = (void *)pn; > + > + uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; > + int32_t v; > + > + dp1 = vceqq_u16(dp1, dp2); > + dp1 = vandq_u16(dp1, mask); > + v = vaddvq_u16(dp1); > + > + /* update last port counter. */ > + lp[0] += gptbl[v].lpv; > + rte_compiler_barrier(); > + > + /* if dest port value has changed. */ > + if (v != GRPMSK) { > + pnum->u64 = gptbl[v].pnum; > + pnum->u16[FWDSTEP] = 1; > + lp = pnum->u16 + gptbl[v].idx; > + } > + > + return lp; > +} > + > +#endif /* _NEON_COMMON_H_ */ > diff --git a/examples/common/pkt_group.h b/examples/common/pkt_group.h > new file mode 100644 index 0000000000..8b26d9380f > --- /dev/null > +++ b/examples/common/pkt_group.h > @@ -0,0 +1,139 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2016-2018 Intel Corporation. > + * Copyright(c) 2017-2018 Linaro Limited. > + * Copyright(C) 2022 Marvell. > + */ > + > +#ifndef _PKT_GROUP_H_ > +#define _PKT_GROUP_H_ > + > +#define FWDSTEP 4 > + > +/* > + * Group consecutive packets with the same destination port into one burst. > + * To avoid extra latency this is done together with some other packet > + * processing, but after we made a final decision about packet's destination. > + * To do this we maintain: > + * pnum - array of number of consecutive packets with the same dest > +port for > + * each packet in the input burst. > + * lp - pointer to the last updated element in the pnum. > + * dlp - dest port value lp corresponds to. > + */ > + > +#define GRPSZ (1 << FWDSTEP) > +#define GRPMSK (GRPSZ - 1) > + > +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ > + if (likely((dlp) == (dcp)[(idx)])) { \ > + (lp)[0]++; \ > + } else { \ > + (dlp) = (dcp)[idx]; \ > + (lp) = (pn) + (idx); \ > + (lp)[0] = 1; \ > + } \ > +} while (0) > + > +static const struct { > + uint64_t pnum; /* prebuild 4 values for pnum[]. */ > + int32_t idx; /* index for new last updated elemnet. */ > + uint16_t lpv; /* add value to the last updated element. */ } > +gptbl[GRPSZ] = { > + { > + /* 0: a != b, b != c, c != d, d != e */ > + .pnum = UINT64_C(0x0001000100010001), > + .idx = 4, > + .lpv = 0, > + }, > + { > + /* 1: a == b, b != c, c != d, d != e */ > + .pnum = UINT64_C(0x0001000100010002), > + .idx = 4, > + .lpv = 1, > + }, > + { > + /* 2: a != b, b == c, c != d, d != e */ > + .pnum = UINT64_C(0x0001000100020001), > + .idx = 4, > + .lpv = 0, > + }, > + { > + /* 3: a == b, b == c, c != d, d != e */ > + .pnum = UINT64_C(0x0001000100020003), > + .idx = 4, > + .lpv = 2, > + }, > + { > + /* 4: a != b, b != c, c == d, d != e */ > + .pnum = UINT64_C(0x0001000200010001), > + .idx = 4, > + .lpv = 0, > + }, > + { > + /* 5: a == b, b != c, c == d, d != e */ > + .pnum = UINT64_C(0x0001000200010002), > + .idx = 4, > + .lpv = 1, > + }, > + { > + /* 6: a != b, b == c, c == d, d != e */ > + .pnum = UINT64_C(0x0001000200030001), > + .idx = 4, > + .lpv = 0, > + }, > + { > + /* 7: a == b, b == c, c == d, d != e */ > + .pnum = UINT64_C(0x0001000200030004), > + .idx = 4, > + .lpv = 3, > + }, > + { > + /* 8: a != b, b != c, c != d, d == e */ > + .pnum = UINT64_C(0x0002000100010001), > + .idx = 3, > + .lpv = 0, > + }, > + { > + /* 9: a == b, b != c, c != d, d == e */ > + .pnum = UINT64_C(0x0002000100010002), > + .idx = 3, > + .lpv = 1, > + }, > + { > + /* 0xa: a != b, b == c, c != d, d == e */ > + .pnum = UINT64_C(0x0002000100020001), > + .idx = 3, > + .lpv = 0, > + }, > + { > + /* 0xb: a == b, b == c, c != d, d == e */ > + .pnum = UINT64_C(0x0002000100020003), > + .idx = 3, > + .lpv = 2, > + }, > + { > + /* 0xc: a != b, b != c, c == d, d == e */ > + .pnum = UINT64_C(0x0002000300010001), > + .idx = 2, > + .lpv = 0, > + }, > + { > + /* 0xd: a == b, b != c, c == d, d == e */ > + .pnum = UINT64_C(0x0002000300010002), > + .idx = 2, > + .lpv = 1, > + }, > + { > + /* 0xe: a != b, b == c, c == d, d == e */ > + .pnum = UINT64_C(0x0002000300040001), > + .idx = 1, > + .lpv = 0, > + }, > + { > + /* 0xf: a == b, b == c, c == d, d == e */ > + .pnum = UINT64_C(0x0002000300040005), > + .idx = 0, > + .lpv = 4, > + }, > +}; > + > +#endif /* _PKT_GROUP_H_ */ > diff --git a/examples/l3fwd/Makefile b/examples/l3fwd/Makefile index > 8efe6378e2..8dbe85c2e6 100644 > --- a/examples/l3fwd/Makefile > +++ b/examples/l3fwd/Makefile > @@ -22,6 +22,7 @@ shared: build/$(APP)-shared > static: build/$(APP)-static > ln -sf $(APP)-static build/$(APP) > > +INCLUDES =-I../common > PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) CFLAGS += -O3 > $(shell $(PKGCONF) --cflags libdpdk) # Added for 'rte_eth_link_to_str()' > @@ -38,10 +39,10 @@ endif > endif > > build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build > - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED) > + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) > +$(LDFLAGS_SHARED) > > build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build > - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC) > + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) > +$(LDFLAGS_STATIC) > > build: > @mkdir -p $@ > diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index > 8a52c90755..40b5f32a9e 100644 > --- a/examples/l3fwd/l3fwd.h > +++ b/examples/l3fwd/l3fwd.h > @@ -44,8 +44,6 @@ > /* Used to mark destination port as 'invalid'. */ > #define BAD_PORT ((uint16_t)-1) > > -#define FWDSTEP 4 > - > /* replace first 12B of the ethernet header. */ > #define MASK_ETH 0x3f > > diff --git a/examples/l3fwd/l3fwd_common.h > b/examples/l3fwd/l3fwd_common.h index 8e4c27218f..224b1c08e8 100644 > --- a/examples/l3fwd/l3fwd_common.h > +++ b/examples/l3fwd/l3fwd_common.h > @@ -7,6 +7,8 @@ > #ifndef _L3FWD_COMMON_H_ > #define _L3FWD_COMMON_H_ > > +#include "pkt_group.h" > + > #ifdef DO_RFC_1812_CHECKS > > #define IPV4_MIN_VER_IHL 0x45 > @@ -50,133 +52,6 @@ rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, uint16_t > *dp, uint32_t ptype) > #define rfc1812_process(mb, dp, ptype) do { } while (0) > #endif /* DO_RFC_1812_CHECKS */ > > -/* > - * We group consecutive packets with the same destination port into one burst. > - * To avoid extra latency this is done together with some other packet > - * processing, but after we made a final decision about packet's destination. > - * To do this we maintain: > - * pnum - array of number of consecutive packets with the same dest port for > - * each packet in the input burst. > - * lp - pointer to the last updated element in the pnum. > - * dlp - dest port value lp corresponds to. > - */ > - > -#define GRPSZ (1 << FWDSTEP) > -#define GRPMSK (GRPSZ - 1) > - > -#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ > - if (likely((dlp) == (dcp)[(idx)])) { \ > - (lp)[0]++; \ > - } else { \ > - (dlp) = (dcp)[idx]; \ > - (lp) = (pn) + (idx); \ > - (lp)[0] = 1; \ > - } \ > -} while (0) > - > -static const struct { > - uint64_t pnum; /* prebuild 4 values for pnum[]. */ > - int32_t idx; /* index for new last updated element. */ > - uint16_t lpv; /* add value to the last updated element. */ > -} gptbl[GRPSZ] = { > - { > - /* 0: a != b, b != c, c != d, d != e */ > - .pnum = UINT64_C(0x0001000100010001), > - .idx = 4, > - .lpv = 0, > - }, > - { > - /* 1: a == b, b != c, c != d, d != e */ > - .pnum = UINT64_C(0x0001000100010002), > - .idx = 4, > - .lpv = 1, > - }, > - { > - /* 2: a != b, b == c, c != d, d != e */ > - .pnum = UINT64_C(0x0001000100020001), > - .idx = 4, > - .lpv = 0, > - }, > - { > - /* 3: a == b, b == c, c != d, d != e */ > - .pnum = UINT64_C(0x0001000100020003), > - .idx = 4, > - .lpv = 2, > - }, > - { > - /* 4: a != b, b != c, c == d, d != e */ > - .pnum = UINT64_C(0x0001000200010001), > - .idx = 4, > - .lpv = 0, > - }, > - { > - /* 5: a == b, b != c, c == d, d != e */ > - .pnum = UINT64_C(0x0001000200010002), > - .idx = 4, > - .lpv = 1, > - }, > - { > - /* 6: a != b, b == c, c == d, d != e */ > - .pnum = UINT64_C(0x0001000200030001), > - .idx = 4, > - .lpv = 0, > - }, > - { > - /* 7: a == b, b == c, c == d, d != e */ > - .pnum = UINT64_C(0x0001000200030004), > - .idx = 4, > - .lpv = 3, > - }, > - { > - /* 8: a != b, b != c, c != d, d == e */ > - .pnum = UINT64_C(0x0002000100010001), > - .idx = 3, > - .lpv = 0, > - }, > - { > - /* 9: a == b, b != c, c != d, d == e */ > - .pnum = UINT64_C(0x0002000100010002), > - .idx = 3, > - .lpv = 1, > - }, > - { > - /* 0xa: a != b, b == c, c != d, d == e */ > - .pnum = UINT64_C(0x0002000100020001), > - .idx = 3, > - .lpv = 0, > - }, > - { > - /* 0xb: a == b, b == c, c != d, d == e */ > - .pnum = UINT64_C(0x0002000100020003), > - .idx = 3, > - .lpv = 2, > - }, > - { > - /* 0xc: a != b, b != c, c == d, d == e */ > - .pnum = UINT64_C(0x0002000300010001), > - .idx = 2, > - .lpv = 0, > - }, > - { > - /* 0xd: a == b, b != c, c == d, d == e */ > - .pnum = UINT64_C(0x0002000300010002), > - .idx = 2, > - .lpv = 1, > - }, > - { > - /* 0xe: a != b, b == c, c == d, d == e */ > - .pnum = UINT64_C(0x0002000300040001), > - .idx = 1, > - .lpv = 0, > - }, > - { > - /* 0xf: a == b, b == c, c == d, d == e */ > - .pnum = UINT64_C(0x0002000300040005), > - .idx = 0, > - .lpv = 4, > - }, > -}; > - > static __rte_always_inline void > send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[], > uint32_t num) > diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h > index e3d33a5229..5fa765b640 100644 > --- a/examples/l3fwd/l3fwd_neon.h > +++ b/examples/l3fwd/l3fwd_neon.h > @@ -7,6 +7,7 @@ > #define _L3FWD_NEON_H_ > > #include "l3fwd.h" > +#include "neon_common.h" > #include "l3fwd_common.h" > > /* > @@ -62,44 +63,6 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], > uint16_t dst_port[FWDSTEP]) > &dst_port[3], pkt[3]->packet_type); > } > > -/* > - * Group consecutive packets with the same destination port in bursts of 4. > - * Suppose we have array of destination ports: > - * dst_port[] = {a, b, c, d,, e, ... } > - * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. > - * We doing 4 comparisons at once and the result is 4 bit mask. > - * This mask is used as an index into prebuild array of pnum values. > - */ > -static inline uint16_t * > -port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1, > - uint16x8_t dp2) > -{ > - union { > - uint16_t u16[FWDSTEP + 1]; > - uint64_t u64; > - } *pnum = (void *)pn; > - > - int32_t v; > - uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; > - > - dp1 = vceqq_u16(dp1, dp2); > - dp1 = vandq_u16(dp1, mask); > - v = vaddvq_u16(dp1); > - > - /* update last port counter. */ > - lp[0] += gptbl[v].lpv; > - rte_compiler_barrier(); > - > - /* if dest port value has changed. */ > - if (v != GRPMSK) { > - pnum->u64 = gptbl[v].pnum; > - pnum->u16[FWDSTEP] = 1; > - lp = pnum->u16 + gptbl[v].idx; > - } > - > - return lp; > -} > - > /** > * Process one packet: > * Update source and destination MAC addresses in the ethernet header. > @@ -161,7 +124,7 @@ send_packets_multi(struct lcore_conf *qconf, struct > rte_mbuf **pkts_burst, > * <d[j-3], d[j-2], d[j-1], d[j], ... > > */ > dp2 = vld1q_u16(&dst_port[j - FWDSTEP + 1]); > - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); > + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, > dp2); > > /* > * dp1: > @@ -175,7 +138,7 @@ send_packets_multi(struct lcore_conf *qconf, struct > rte_mbuf **pkts_burst, > */ > dp2 = vextq_u16(dp1, dp1, 1); > dp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3); > - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); > + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); > > /* > * remove values added by the last repeated diff --git > a/examples/meson.build b/examples/meson.build index > 78de0e1f37..81e93799f2 100644 > --- a/examples/meson.build > +++ b/examples/meson.build > @@ -97,7 +97,7 @@ foreach example: examples > ldflags = default_ldflags > > ext_deps = [] > - includes = [include_directories(example)] > + includes = [include_directories(example, 'common')] > deps = ['eal', 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline'] > subdir(example) > > -- > 2.25.1
> This will make the packet grouping function common, so > that other examples can utilize as per need. > > Signed-off-by: Rahul Bhansali <rbhansali@marvell.com> > --- Adding more people for review. @thomas@monjalon.net: Can this patch be taken in next-crypto as the patch is primarily for ipsec-secgw? > Changes in v2: New patch to address review comment. > > examples/common/neon_common.h | 50 ++++++++++++ > examples/common/pkt_group.h | 139 > ++++++++++++++++++++++++++++++++++ > examples/l3fwd/Makefile | 5 +- > examples/l3fwd/l3fwd.h | 2 - > examples/l3fwd/l3fwd_common.h | 129 +------------------------------ > examples/l3fwd/l3fwd_neon.h | 43 +---------- > examples/meson.build | 2 +- > 7 files changed, 198 insertions(+), 172 deletions(-) > create mode 100644 examples/common/neon_common.h > create mode 100644 examples/common/pkt_group.h
20/06/2022 09:49, Akhil Goyal: > @thomas@monjalon.net: Can this patch be taken in next-crypto as the patch is > primarily for ipsec-secgw? Yes that's fine.
17/06/2022 08:50, Rahul Bhansali пишет: > CC: Konstantin Ananyev > >> -----Original Message----- >> From: Rahul Bhansali <rbhansali@marvell.com> >> Sent: Friday, June 17, 2022 1:13 PM >> To: dev@dpdk.org; Ruifeng Wang <ruifeng.wang@arm.com> >> Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Rahul Bhansali >> <rbhansali@marvell.com> >> Subject: [PATCH v2 1/2] examples/l3fwd: common packet group functionality >> >> This will make the packet grouping function common, so that other examples >> can utilize as per need. >> >> Signed-off-by: Rahul Bhansali <rbhansali@marvell.com> >> --- >> Changes in v2: New patch to address review comment. >> >> examples/common/neon_common.h | 50 ++++++++++++ >> examples/common/pkt_group.h | 139 >> ++++++++++++++++++++++++++++++++++ >> examples/l3fwd/Makefile | 5 +- >> examples/l3fwd/l3fwd.h | 2 - >> examples/l3fwd/l3fwd_common.h | 129 +------------------------------ >> examples/l3fwd/l3fwd_neon.h | 43 +---------- >> examples/meson.build | 2 +- >> 7 files changed, 198 insertions(+), 172 deletions(-) create mode 100644 >> examples/common/neon_common.h create mode 100644 >> examples/common/pkt_group.h >> >> diff --git a/examples/common/neon_common.h >> b/examples/common/neon_common.h new file mode 100644 index >> 0000000000..f01b5ab6bc >> --- /dev/null >> +++ b/examples/common/neon_common.h >> @@ -0,0 +1,50 @@ >> +/* SPDX-License-Identifier: BSD-3-Clause >> + * Copyright(c) 2016-2018 Intel Corporation. >> + * Copyright(c) 2017-2018 Linaro Limited. >> + * Copyright(C) 2022 Marvell. >> + */ >> + >> +#ifndef _NEON_COMMON_H_ >> +#define _NEON_COMMON_H_ >> + >> +#include "pkt_group.h" >> + >> +/* >> + * Group consecutive packets with the same destination port in bursts of 4. >> + * Suppose we have array of destination ports: >> + * dst_port[] = {a, b, c, d,, e, ... } >> + * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. >> + * We doing 4 comparisons at once and the result is 4 bit mask. >> + * This mask is used as an index into prebuild array of pnum values. >> + */ >> +static inline uint16_t * >> +neon_port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1, >> + uint16x8_t dp2) >> +{ >> + union { >> + uint16_t u16[FWDSTEP + 1]; >> + uint64_t u64; >> + } *pnum = (void *)pn; >> + >> + uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; >> + int32_t v; >> + >> + dp1 = vceqq_u16(dp1, dp2); >> + dp1 = vandq_u16(dp1, mask); >> + v = vaddvq_u16(dp1); >> + >> + /* update last port counter. */hh >> + lp[0] += gptbl[v].lpv; >> + rte_compiler_barrier(); >> + >> + /* if dest port value has changed. */ >> + if (v != GRPMSK) { >> + pnum->u64 = gptbl[v].pnum; >> + pnum->u16[FWDSTEP] = 1; >> + lp = pnum->u16 + gptbl[v].idx; >> + } >> + >> + return lp; >> +} Thanks for the effort. As I can see this function: port_groupx4() is nearly identical for all 3 platforms: sse/nenon/altivec (except of course built-in arch-specific instincts). In fact, even comemnts are identical. I wonder can we have something like: examples/common/<arch>/port_group.h and for each arch will have defined port_groupx4(...) ? >> + >> +#endif /* _NEON_COMMON_H_ */ >> diff --git a/examples/common/pkt_group.h b/examples/common/pkt_group.h >> new file mode 100644 index 0000000000..8b26d9380f >> --- /dev/null >> +++ b/examples/common/pkt_group.h >> @@ -0,0 +1,139 @@ >> +/* SPDX-License-Identifier: BSD-3-Clause >> + * Copyright(c) 2016-2018 Intel Corporation. >> + * Copyright(c) 2017-2018 Linaro Limited. >> + * Copyright(C) 2022 Marvell. >> + */ >> + >> +#ifndef _PKT_GROUP_H_ >> +#define _PKT_GROUP_H_ >> + >> +#define FWDSTEP 4 >> + >> +/* >> + * Group consecutive packets with the same destination port into one burst. >> + * To avoid extra latency this is done together with some other packet >> + * processing, but after we made a final decision about packet's destination. >> + * To do this we maintain: >> + * pnum - array of number of consecutive packets with the same dest >> +port for >> + * each packet in the input burst. >> + * lp - pointer to the last updated element in the pnum. >> + * dlp - dest port value lp corresponds to. >> + */ >> + >> +#define GRPSZ (1 << FWDSTEP) >> +#define GRPMSK (GRPSZ - 1) >> + >> +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ >> + if (likely((dlp) == (dcp)[(idx)])) { \ >> + (lp)[0]++; \ >> + } else { \ >> + (dlp) = (dcp)[idx]; \ >> + (lp) = (pn) + (idx); \ >> + (lp)[0] = 1; \ >> + } \ >> +} while (0) >> + >> +static const struct { >> + uint64_t pnum; /* prebuild 4 values for pnum[]. */ >> + int32_t idx; /* index for new last updated elemnet. */ >> + uint16_t lpv; /* add value to the last updated element. */ } >> +gptbl[GRPSZ] = { >> + { >> + /* 0: a != b, b != c, c != d, d != e */ >> + .pnum = UINT64_C(0x0001000100010001), >> + .idx = 4, >> + .lpv = 0, >> + }, >> + { >> + /* 1: a == b, b != c, c != d, d != e */ >> + .pnum = UINT64_C(0x0001000100010002), >> + .idx = 4, >> + .lpv = 1, >> + }, >> + { >> + /* 2: a != b, b == c, c != d, d != e */ >> + .pnum = UINT64_C(0x0001000100020001), >> + .idx = 4, >> + .lpv = 0, >> + }, >> + { >> + /* 3: a == b, b == c, c != d, d != e */ >> + .pnum = UINT64_C(0x0001000100020003), >> + .idx = 4, >> + .lpv = 2, >> + }, >> + { >> + /* 4: a != b, b != c, c == d, d != e */ >> + .pnum = UINT64_C(0x0001000200010001), >> + .idx = 4, >> + .lpv = 0, >> + }, >> + { >> + /* 5: a == b, b != c, c == d, d != e */ >> + .pnum = UINT64_C(0x0001000200010002), >> + .idx = 4, >> + .lpv = 1, >> + }, >> + { >> + /* 6: a != b, b == c, c == d, d != e */ >> + .pnum = UINT64_C(0x0001000200030001), >> + .idx = 4, >> + .lpv = 0, >> + }, >> + { >> + /* 7: a == b, b == c, c == d, d != e */ >> + .pnum = UINT64_C(0x0001000200030004), >> + .idx = 4, >> + .lpv = 3, >> + }, >> + { >> + /* 8: a != b, b != c, c != d, d == e */ >> + .pnum = UINT64_C(0x0002000100010001), >> + .idx = 3, >> + .lpv = 0, >> + }, >> + { >> + /* 9: a == b, b != c, c != d, d == e */ >> + .pnum = UINT64_C(0x0002000100010002), >> + .idx = 3, >> + .lpv = 1, >> + }, >> + { >> + /* 0xa: a != b, b == c, c != d, d == e */ >> + .pnum = UINT64_C(0x0002000100020001), >> + .idx = 3, >> + .lpv = 0, >> + }, >> + { >> + /* 0xb: a == b, b == c, c != d, d == e */ >> + .pnum = UINT64_C(0x0002000100020003), >> + .idx = 3, >> + .lpv = 2, >> + }, >> + { >> + /* 0xc: a != b, b != c, c == d, d == e */ >> + .pnum = UINT64_C(0x0002000300010001), >> + .idx = 2, >> + .lpv = 0, >> + }, >> + { >> + /* 0xd: a == b, b != c, c == d, d == e */ >> + .pnum = UINT64_C(0x0002000300010002), >> + .idx = 2, >> + .lpv = 1, >> + }, >> + { >> + /* 0xe: a != b, b == c, c == d, d == e */ >> + .pnum = UINT64_C(0x0002000300040001), >> + .idx = 1, >> + .lpv = 0, >> + }, >> + { >> + /* 0xf: a == b, b == c, c == d, d == e */ >> + .pnum = UINT64_C(0x0002000300040005), >> + .idx = 0, >> + .lpv = 4, >> + }, >> +}; >> + >> +#endif /* _PKT_GROUP_H_ */ >> diff --git a/examples/l3fwd/Makefile b/examples/l3fwd/Makefile index >> 8efe6378e2..8dbe85c2e6 100644 >> --- a/examples/l3fwd/Makefile >> +++ b/examples/l3fwd/Makefile >> @@ -22,6 +22,7 @@ shared: build/$(APP)-shared >> static: build/$(APP)-static >> ln -sf $(APP)-static build/$(APP) >> >> +INCLUDES =-I../common >> PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) CFLAGS += -O3 >> $(shell $(PKGCONF) --cflags libdpdk) # Added for 'rte_eth_link_to_str()' >> @@ -38,10 +39,10 @@ endif >> endif >> >> build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build >> - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED) >> + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) >> +$(LDFLAGS_SHARED) >> >> build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build >> - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC) >> + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) >> +$(LDFLAGS_STATIC) >> >> build: >> @mkdir -p $@ >> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index >> 8a52c90755..40b5f32a9e 100644 >> --- a/examples/l3fwd/l3fwd.h >> +++ b/examples/l3fwd/l3fwd.h >> @@ -44,8 +44,6 @@ >> /* Used to mark destination port as 'invalid'. */ >> #define BAD_PORT ((uint16_t)-1) >> >> -#define FWDSTEP 4 >> - >> /* replace first 12B of the ethernet header. */ >> #define MASK_ETH 0x3f >> >> diff --git a/examples/l3fwd/l3fwd_common.h >> b/examples/l3fwd/l3fwd_common.h index 8e4c27218f..224b1c08e8 100644 >> --- a/examples/l3fwd/l3fwd_common.h >> +++ b/examples/l3fwd/l3fwd_common.h >> @@ -7,6 +7,8 @@ >> #ifndef _L3FWD_COMMON_H_ >> #define _L3FWD_COMMON_H_ >> >> +#include "pkt_group.h" >> + >> #ifdef DO_RFC_1812_CHECKS >> >> #define IPV4_MIN_VER_IHL 0x45 >> @@ -50,133 +52,6 @@ rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, uint16_t >> *dp, uint32_t ptype) >> #define rfc1812_process(mb, dp, ptype) do { } while (0) >> #endif /* DO_RFC_1812_CHECKS */ >> >> -/* >> - * We group consecutive packets with the same destination port into one burst. >> - * To avoid extra latency this is done together with some other packet >> - * processing, but after we made a final decision about packet's destination. >> - * To do this we maintain: >> - * pnum - array of number of consecutive packets with the same dest port for >> - * each packet in the input burst. >> - * lp - pointer to the last updated element in the pnum. >> - * dlp - dest port value lp corresponds to. >> - */ >> - >> -#define GRPSZ (1 << FWDSTEP) >> -#define GRPMSK (GRPSZ - 1) >> - >> -#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ >> - if (likely((dlp) == (dcp)[(idx)])) { \ >> - (lp)[0]++; \ >> - } else { \ >> - (dlp) = (dcp)[idx]; \ >> - (lp) = (pn) + (idx); \ >> - (lp)[0] = 1; \ >> - } \ >> -} while (0) >> - >> -static const struct { >> - uint64_t pnum; /* prebuild 4 values for pnum[]. */ >> - int32_t idx; /* index for new last updated element. */ >> - uint16_t lpv; /* add value to the last updated element. */ >> -} gptbl[GRPSZ] = { >> - { >> - /* 0: a != b, b != c, c != d, d != e */ >> - .pnum = UINT64_C(0x0001000100010001), >> - .idx = 4, >> - .lpv = 0, >> - }, >> - { >> - /* 1: a == b, b != c, c != d, d != e */ >> - .pnum = UINT64_C(0x0001000100010002), >> - .idx = 4, >> - .lpv = 1, >> - }, >> - { >> - /* 2: a != b, b == c, c != d, d != e */ >> - .pnum = UINT64_C(0x0001000100020001), >> - .idx = 4, >> - .lpv = 0, >> - }, >> - { >> - /* 3: a == b, b == c, c != d, d != e */ >> - .pnum = UINT64_C(0x0001000100020003), >> - .idx = 4, >> - .lpv = 2, >> - }, >> - { >> - /* 4: a != b, b != c, c == d, d != e */ >> - .pnum = UINT64_C(0x0001000200010001), >> - .idx = 4, >> - .lpv = 0, >> - }, >> - { >> - /* 5: a == b, b != c, c == d, d != e */ >> - .pnum = UINT64_C(0x0001000200010002), >> - .idx = 4, >> - .lpv = 1, >> - }, >> - { >> - /* 6: a != b, b == c, c == d, d != e */ >> - .pnum = UINT64_C(0x0001000200030001), >> - .idx = 4, >> - .lpv = 0, >> - }, >> - { >> - /* 7: a == b, b == c, c == d, d != e */ >> - .pnum = UINT64_C(0x0001000200030004), >> - .idx = 4, >> - .lpv = 3, >> - }, >> - { >> - /* 8: a != b, b != c, c != d, d == e */ >> - .pnum = UINT64_C(0x0002000100010001), >> - .idx = 3, >> - .lpv = 0, >> - }, >> - { >> - /* 9: a == b, b != c, c != d, d == e */ >> - .pnum = UINT64_C(0x0002000100010002), >> - .idx = 3, >> - .lpv = 1, >> - }, >> - { >> - /* 0xa: a != b, b == c, c != d, d == e */ >> - .pnum = UINT64_C(0x0002000100020001), >> - .idx = 3, >> - .lpv = 0, >> - }, >> - { >> - /* 0xb: a == b, b == c, c != d, d == e */ >> - .pnum = UINT64_C(0x0002000100020003), >> - .idx = 3, >> - .lpv = 2, >> - }, >> - { >> - /* 0xc: a != b, b != c, c == d, d == e */ >> - .pnum = UINT64_C(0x0002000300010001), >> - .idx = 2, >> - .lpv = 0, >> - }, >> - { >> - /* 0xd: a == b, b != c, c == d, d == e */ >> - .pnum = UINT64_C(0x0002000300010002), >> - .idx = 2, >> - .lpv = 1, >> - }, >> - { >> - /* 0xe: a != b, b == c, c == d, d == e */ >> - .pnum = UINT64_C(0x0002000300040001), >> - .idx = 1, >> - .lpv = 0, >> - }, >> - { >> - /* 0xf: a == b, b == c, c == d, d == e */ >> - .pnum = UINT64_C(0x0002000300040005), >> - .idx = 0, >> - .lpv = 4, >> - }, >> -}; >> - >> static __rte_always_inline void >> send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[], >> uint32_t num) >> diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h >> index e3d33a5229..5fa765b640 100644 >> --- a/examples/l3fwd/l3fwd_neon.h >> +++ b/examples/l3fwd/l3fwd_neon.h >> @@ -7,6 +7,7 @@ >> #define _L3FWD_NEON_H_ >> >> #include "l3fwd.h" >> +#include "neon_common.h" >> #include "l3fwd_common.h" >> >> /* >> @@ -62,44 +63,6 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], >> uint16_t dst_port[FWDSTEP]) >> &dst_port[3], pkt[3]->packet_type); >> } >> >> -/* >> - * Group consecutive packets with the same destination port in bursts of 4. >> - * Suppose we have array of destination ports: >> - * dst_port[] = {a, b, c, d,, e, ... } >> - * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. >> - * We doing 4 comparisons at once and the result is 4 bit mask. >> - * This mask is used as an index into prebuild array of pnum values. >> - */ >> -static inline uint16_t * >> -port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1, >> - uint16x8_t dp2) >> -{ >> - union { >> - uint16_t u16[FWDSTEP + 1]; >> - uint64_t u64; >> - } *pnum = (void *)pn; >> - >> - int32_t v; >> - uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; >> - >> - dp1 = vceqq_u16(dp1, dp2); >> - dp1 = vandq_u16(dp1, mask); >> - v = vaddvq_u16(dp1); >> - >> - /* update last port counter. */ >> - lp[0] += gptbl[v].lpv; >> - rte_compiler_barrier(); >> - >> - /* if dest port value has changed. */ >> - if (v != GRPMSK) { >> - pnum->u64 = gptbl[v].pnum; >> - pnum->u16[FWDSTEP] = 1; >> - lp = pnum->u16 + gptbl[v].idx; >> - } >> - >> - return lp; >> -} >> - >> /** >> * Process one packet: >> * Update source and destination MAC addresses in the ethernet header. >> @@ -161,7 +124,7 @@ send_packets_multi(struct lcore_conf *qconf, struct >> rte_mbuf **pkts_burst, >> * <d[j-3], d[j-2], d[j-1], d[j], ... > >> */ >> dp2 = vld1q_u16(&dst_port[j - FWDSTEP + 1]); >> - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); >> + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, >> dp2); >> >> /* >> * dp1: >> @@ -175,7 +138,7 @@ send_packets_multi(struct lcore_conf *qconf, struct >> rte_mbuf **pkts_burst, >> */ >> dp2 = vextq_u16(dp1, dp1, 1); >> dp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3); >> - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); >> + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); >> >> /* >> * remove values added by the last repeated diff --git >> a/examples/meson.build b/examples/meson.build index >> 78de0e1f37..81e93799f2 100644 >> --- a/examples/meson.build >> +++ b/examples/meson.build >> @@ -97,7 +97,7 @@ foreach example: examples >> ldflags = default_ldflags >> >> ext_deps = [] >> - includes = [include_directories(example)] >> + includes = [include_directories(example, 'common')] >> deps = ['eal', 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline'] >> subdir(example) >> >> -- >> 2.25.1 >
> > > This will make the packet grouping function common, so > > that other examples can utilize as per need. > > > > Signed-off-by: Rahul Bhansali <rbhansali@marvell.com> > > --- Acked-by: Akhil Goyal <gakhil@marvell.com>
> -----Original Message----- > From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru> > Sent: Tuesday, June 21, 2022 4:43 AM > To: Rahul Bhansali <rbhansali@marvell.com>; dev@dpdk.org; Ruifeng Wang > <ruifeng.wang@arm.com> > Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com> > Subject: [EXT] Re: [PATCH v2 1/2] examples/l3fwd: common packet group > functionality > > External Email > > ---------------------------------------------------------------------- > 17/06/2022 08:50, Rahul Bhansali пишет: > > CC: Konstantin Ananyev > > > >> -----Original Message----- > >> From: Rahul Bhansali <rbhansali@marvell.com> > >> Sent: Friday, June 17, 2022 1:13 PM > >> To: dev@dpdk.org; Ruifeng Wang <ruifeng.wang@arm.com> > >> Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Rahul Bhansali > >> <rbhansali@marvell.com> > >> Subject: [PATCH v2 1/2] examples/l3fwd: common packet group > >> functionality > >> > >> This will make the packet grouping function common, so that other > >> examples can utilize as per need. > >> > >> Signed-off-by: Rahul Bhansali <rbhansali@marvell.com> > >> --- > >> Changes in v2: New patch to address review comment. > >> > >> examples/common/neon_common.h | 50 ++++++++++++ > >> examples/common/pkt_group.h | 139 > >> ++++++++++++++++++++++++++++++++++ > >> examples/l3fwd/Makefile | 5 +- > >> examples/l3fwd/l3fwd.h | 2 - > >> examples/l3fwd/l3fwd_common.h | 129 +------------------------------ > >> examples/l3fwd/l3fwd_neon.h | 43 +---------- > >> examples/meson.build | 2 +- > >> 7 files changed, 198 insertions(+), 172 deletions(-) create mode > >> 100644 examples/common/neon_common.h create mode 100644 > >> examples/common/pkt_group.h > >> > >> diff --git a/examples/common/neon_common.h > >> b/examples/common/neon_common.h new file mode 100644 index > >> 0000000000..f01b5ab6bc > >> --- /dev/null > >> +++ b/examples/common/neon_common.h > >> @@ -0,0 +1,50 @@ > >> +/* SPDX-License-Identifier: BSD-3-Clause > >> + * Copyright(c) 2016-2018 Intel Corporation. > >> + * Copyright(c) 2017-2018 Linaro Limited. > >> + * Copyright(C) 2022 Marvell. > >> + */ > >> + > >> +#ifndef _NEON_COMMON_H_ > >> +#define _NEON_COMMON_H_ > >> + > >> +#include "pkt_group.h" > >> + > >> +/* > >> + * Group consecutive packets with the same destination port in bursts of 4. > >> + * Suppose we have array of destination ports: > >> + * dst_port[] = {a, b, c, d,, e, ... } > >> + * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. > >> + * We doing 4 comparisons at once and the result is 4 bit mask. > >> + * This mask is used as an index into prebuild array of pnum values. > >> + */ > >> +static inline uint16_t * > >> +neon_port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t > dp1, > >> + uint16x8_t dp2) > >> +{ > >> + union { > >> + uint16_t u16[FWDSTEP + 1]; > >> + uint64_t u64; > >> + } *pnum = (void *)pn; > >> + > >> + uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; > >> + int32_t v; > >> + > >> + dp1 = vceqq_u16(dp1, dp2); > >> + dp1 = vandq_u16(dp1, mask); > >> + v = vaddvq_u16(dp1); > >> + > >> + /* update last port counter. */hh > >> + lp[0] += gptbl[v].lpv; > >> + rte_compiler_barrier(); > >> + > >> + /* if dest port value has changed. */ > >> + if (v != GRPMSK) { > >> + pnum->u64 = gptbl[v].pnum; > >> + pnum->u16[FWDSTEP] = 1; > >> + lp = pnum->u16 + gptbl[v].idx; > >> + } > >> + > >> + return lp; > >> +} > > Thanks for the effort. > As I can see this function: port_groupx4() is nearly identical for all 3 > platforms: sse/nenon/altivec (except of course built-in arch-specific instincts). > In fact, even comemnts are identical. > I wonder can we have something like: > examples/common/<arch>/port_group.h > and for each arch will have defined port_groupx4(...) ? > Yes, It’s a good point. I was thinking to have arch in file name itself. But we can have arch specific directory and have different header files. Do you want me to make changes for all 3 sse/neon/altivec or just neon ? I can check compilation for all but functionality/perf validate for Neon only. > >> + > >> +#endif /* _NEON_COMMON_H_ */ > >> diff --git a/examples/common/pkt_group.h > >> b/examples/common/pkt_group.h new file mode 100644 index > >> 0000000000..8b26d9380f > >> --- /dev/null > >> +++ b/examples/common/pkt_group.h > >> @@ -0,0 +1,139 @@ > >> +/* SPDX-License-Identifier: BSD-3-Clause > >> + * Copyright(c) 2016-2018 Intel Corporation. > >> + * Copyright(c) 2017-2018 Linaro Limited. > >> + * Copyright(C) 2022 Marvell. > >> + */ > >> + > >> +#ifndef _PKT_GROUP_H_ > >> +#define _PKT_GROUP_H_ > >> + > >> +#define FWDSTEP 4 > >> + > >> +/* > >> + * Group consecutive packets with the same destination port into one burst. > >> + * To avoid extra latency this is done together with some other > >> +packet > >> + * processing, but after we made a final decision about packet's destination. > >> + * To do this we maintain: > >> + * pnum - array of number of consecutive packets with the same dest > >> +port for > >> + * each packet in the input burst. > >> + * lp - pointer to the last updated element in the pnum. > >> + * dlp - dest port value lp corresponds to. > >> + */ > >> + > >> +#define GRPSZ (1 << FWDSTEP) > >> +#define GRPMSK (GRPSZ - 1) > >> + > >> +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ > >> + if (likely((dlp) == (dcp)[(idx)])) { \ > >> + (lp)[0]++; \ > >> + } else { \ > >> + (dlp) = (dcp)[idx]; \ > >> + (lp) = (pn) + (idx); \ > >> + (lp)[0] = 1; \ > >> + } \ > >> +} while (0) > >> + > >> +static const struct { > >> + uint64_t pnum; /* prebuild 4 values for pnum[]. */ > >> + int32_t idx; /* index for new last updated elemnet. */ > >> + uint16_t lpv; /* add value to the last updated element. */ } > >> +gptbl[GRPSZ] = { > >> + { > >> + /* 0: a != b, b != c, c != d, d != e */ > >> + .pnum = UINT64_C(0x0001000100010001), > >> + .idx = 4, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 1: a == b, b != c, c != d, d != e */ > >> + .pnum = UINT64_C(0x0001000100010002), > >> + .idx = 4, > >> + .lpv = 1, > >> + }, > >> + { > >> + /* 2: a != b, b == c, c != d, d != e */ > >> + .pnum = UINT64_C(0x0001000100020001), > >> + .idx = 4, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 3: a == b, b == c, c != d, d != e */ > >> + .pnum = UINT64_C(0x0001000100020003), > >> + .idx = 4, > >> + .lpv = 2, > >> + }, > >> + { > >> + /* 4: a != b, b != c, c == d, d != e */ > >> + .pnum = UINT64_C(0x0001000200010001), > >> + .idx = 4, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 5: a == b, b != c, c == d, d != e */ > >> + .pnum = UINT64_C(0x0001000200010002), > >> + .idx = 4, > >> + .lpv = 1, > >> + }, > >> + { > >> + /* 6: a != b, b == c, c == d, d != e */ > >> + .pnum = UINT64_C(0x0001000200030001), > >> + .idx = 4, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 7: a == b, b == c, c == d, d != e */ > >> + .pnum = UINT64_C(0x0001000200030004), > >> + .idx = 4, > >> + .lpv = 3, > >> + }, > >> + { > >> + /* 8: a != b, b != c, c != d, d == e */ > >> + .pnum = UINT64_C(0x0002000100010001), > >> + .idx = 3, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 9: a == b, b != c, c != d, d == e */ > >> + .pnum = UINT64_C(0x0002000100010002), > >> + .idx = 3, > >> + .lpv = 1, > >> + }, > >> + { > >> + /* 0xa: a != b, b == c, c != d, d == e */ > >> + .pnum = UINT64_C(0x0002000100020001), > >> + .idx = 3, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 0xb: a == b, b == c, c != d, d == e */ > >> + .pnum = UINT64_C(0x0002000100020003), > >> + .idx = 3, > >> + .lpv = 2, > >> + }, > >> + { > >> + /* 0xc: a != b, b != c, c == d, d == e */ > >> + .pnum = UINT64_C(0x0002000300010001), > >> + .idx = 2, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 0xd: a == b, b != c, c == d, d == e */ > >> + .pnum = UINT64_C(0x0002000300010002), > >> + .idx = 2, > >> + .lpv = 1, > >> + }, > >> + { > >> + /* 0xe: a != b, b == c, c == d, d == e */ > >> + .pnum = UINT64_C(0x0002000300040001), > >> + .idx = 1, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 0xf: a == b, b == c, c == d, d == e */ > >> + .pnum = UINT64_C(0x0002000300040005), > >> + .idx = 0, > >> + .lpv = 4, > >> + }, > >> +}; > >> + > >> +#endif /* _PKT_GROUP_H_ */ > >> diff --git a/examples/l3fwd/Makefile b/examples/l3fwd/Makefile index > >> 8efe6378e2..8dbe85c2e6 100644 > >> --- a/examples/l3fwd/Makefile > >> +++ b/examples/l3fwd/Makefile > >> @@ -22,6 +22,7 @@ shared: build/$(APP)-shared > >> static: build/$(APP)-static > >> ln -sf $(APP)-static build/$(APP) > >> > >> +INCLUDES =-I../common > >> PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) CFLAGS > >> += -O3 $(shell $(PKGCONF) --cflags libdpdk) # Added for > 'rte_eth_link_to_str()' > >> @@ -38,10 +39,10 @@ endif > >> endif > >> > >> build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build > >> - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED) > >> + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) > >> +$(LDFLAGS_SHARED) > >> > >> build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build > >> - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC) > >> + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) > >> +$(LDFLAGS_STATIC) > >> > >> build: > >> @mkdir -p $@ > >> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index > >> 8a52c90755..40b5f32a9e 100644 > >> --- a/examples/l3fwd/l3fwd.h > >> +++ b/examples/l3fwd/l3fwd.h > >> @@ -44,8 +44,6 @@ > >> /* Used to mark destination port as 'invalid'. */ > >> #define BAD_PORT ((uint16_t)-1) > >> > >> -#define FWDSTEP 4 > >> - > >> /* replace first 12B of the ethernet header. */ > >> #define MASK_ETH 0x3f > >> > >> diff --git a/examples/l3fwd/l3fwd_common.h > >> b/examples/l3fwd/l3fwd_common.h index 8e4c27218f..224b1c08e8 100644 > >> --- a/examples/l3fwd/l3fwd_common.h > >> +++ b/examples/l3fwd/l3fwd_common.h > >> @@ -7,6 +7,8 @@ > >> #ifndef _L3FWD_COMMON_H_ > >> #define _L3FWD_COMMON_H_ > >> > >> +#include "pkt_group.h" > >> + > >> #ifdef DO_RFC_1812_CHECKS > >> > >> #define IPV4_MIN_VER_IHL 0x45 > >> @@ -50,133 +52,6 @@ rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, > >> uint16_t *dp, uint32_t ptype) > >> #define rfc1812_process(mb, dp, ptype) do { } while (0) > >> #endif /* DO_RFC_1812_CHECKS */ > >> > >> -/* > >> - * We group consecutive packets with the same destination port into one > burst. > >> - * To avoid extra latency this is done together with some other > >> packet > >> - * processing, but after we made a final decision about packet's destination. > >> - * To do this we maintain: > >> - * pnum - array of number of consecutive packets with the same dest > >> port for > >> - * each packet in the input burst. > >> - * lp - pointer to the last updated element in the pnum. > >> - * dlp - dest port value lp corresponds to. > >> - */ > >> - > >> -#define GRPSZ (1 << FWDSTEP) > >> -#define GRPMSK (GRPSZ - 1) > >> - > >> -#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ > >> - if (likely((dlp) == (dcp)[(idx)])) { \ > >> - (lp)[0]++; \ > >> - } else { \ > >> - (dlp) = (dcp)[idx]; \ > >> - (lp) = (pn) + (idx); \ > >> - (lp)[0] = 1; \ > >> - } \ > >> -} while (0) > >> - > >> -static const struct { > >> - uint64_t pnum; /* prebuild 4 values for pnum[]. */ > >> - int32_t idx; /* index for new last updated element. */ > >> - uint16_t lpv; /* add value to the last updated element. */ > >> -} gptbl[GRPSZ] = { > >> - { > >> - /* 0: a != b, b != c, c != d, d != e */ > >> - .pnum = UINT64_C(0x0001000100010001), > >> - .idx = 4, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 1: a == b, b != c, c != d, d != e */ > >> - .pnum = UINT64_C(0x0001000100010002), > >> - .idx = 4, > >> - .lpv = 1, > >> - }, > >> - { > >> - /* 2: a != b, b == c, c != d, d != e */ > >> - .pnum = UINT64_C(0x0001000100020001), > >> - .idx = 4, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 3: a == b, b == c, c != d, d != e */ > >> - .pnum = UINT64_C(0x0001000100020003), > >> - .idx = 4, > >> - .lpv = 2, > >> - }, > >> - { > >> - /* 4: a != b, b != c, c == d, d != e */ > >> - .pnum = UINT64_C(0x0001000200010001), > >> - .idx = 4, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 5: a == b, b != c, c == d, d != e */ > >> - .pnum = UINT64_C(0x0001000200010002), > >> - .idx = 4, > >> - .lpv = 1, > >> - }, > >> - { > >> - /* 6: a != b, b == c, c == d, d != e */ > >> - .pnum = UINT64_C(0x0001000200030001), > >> - .idx = 4, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 7: a == b, b == c, c == d, d != e */ > >> - .pnum = UINT64_C(0x0001000200030004), > >> - .idx = 4, > >> - .lpv = 3, > >> - }, > >> - { > >> - /* 8: a != b, b != c, c != d, d == e */ > >> - .pnum = UINT64_C(0x0002000100010001), > >> - .idx = 3, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 9: a == b, b != c, c != d, d == e */ > >> - .pnum = UINT64_C(0x0002000100010002), > >> - .idx = 3, > >> - .lpv = 1, > >> - }, > >> - { > >> - /* 0xa: a != b, b == c, c != d, d == e */ > >> - .pnum = UINT64_C(0x0002000100020001), > >> - .idx = 3, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 0xb: a == b, b == c, c != d, d == e */ > >> - .pnum = UINT64_C(0x0002000100020003), > >> - .idx = 3, > >> - .lpv = 2, > >> - }, > >> - { > >> - /* 0xc: a != b, b != c, c == d, d == e */ > >> - .pnum = UINT64_C(0x0002000300010001), > >> - .idx = 2, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 0xd: a == b, b != c, c == d, d == e */ > >> - .pnum = UINT64_C(0x0002000300010002), > >> - .idx = 2, > >> - .lpv = 1, > >> - }, > >> - { > >> - /* 0xe: a != b, b == c, c == d, d == e */ > >> - .pnum = UINT64_C(0x0002000300040001), > >> - .idx = 1, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 0xf: a == b, b == c, c == d, d == e */ > >> - .pnum = UINT64_C(0x0002000300040005), > >> - .idx = 0, > >> - .lpv = 4, > >> - }, > >> -}; > >> - > >> static __rte_always_inline void > >> send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf > *m[], > >> uint32_t num) > >> diff --git a/examples/l3fwd/l3fwd_neon.h > >> b/examples/l3fwd/l3fwd_neon.h index e3d33a5229..5fa765b640 100644 > >> --- a/examples/l3fwd/l3fwd_neon.h > >> +++ b/examples/l3fwd/l3fwd_neon.h > >> @@ -7,6 +7,7 @@ > >> #define _L3FWD_NEON_H_ > >> > >> #include "l3fwd.h" > >> +#include "neon_common.h" > >> #include "l3fwd_common.h" > >> > >> /* > >> @@ -62,44 +63,6 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], > >> uint16_t dst_port[FWDSTEP]) > >> &dst_port[3], pkt[3]->packet_type); > >> } > >> > >> -/* > >> - * Group consecutive packets with the same destination port in bursts of 4. > >> - * Suppose we have array of destination ports: > >> - * dst_port[] = {a, b, c, d,, e, ... } > >> - * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. > >> - * We doing 4 comparisons at once and the result is 4 bit mask. > >> - * This mask is used as an index into prebuild array of pnum values. > >> - */ > >> -static inline uint16_t * > >> -port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1, > >> - uint16x8_t dp2) > >> -{ > >> - union { > >> - uint16_t u16[FWDSTEP + 1]; > >> - uint64_t u64; > >> - } *pnum = (void *)pn; > >> - > >> - int32_t v; > >> - uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; > >> - > >> - dp1 = vceqq_u16(dp1, dp2); > >> - dp1 = vandq_u16(dp1, mask); > >> - v = vaddvq_u16(dp1); > >> - > >> - /* update last port counter. */ > >> - lp[0] += gptbl[v].lpv; > >> - rte_compiler_barrier(); > >> - > >> - /* if dest port value has changed. */ > >> - if (v != GRPMSK) { > >> - pnum->u64 = gptbl[v].pnum; > >> - pnum->u16[FWDSTEP] = 1; > >> - lp = pnum->u16 + gptbl[v].idx; > >> - } > >> - > >> - return lp; > >> -} > >> - > >> /** > >> * Process one packet: > >> * Update source and destination MAC addresses in the ethernet header. > >> @@ -161,7 +124,7 @@ send_packets_multi(struct lcore_conf *qconf, > >> struct rte_mbuf **pkts_burst, > >> * <d[j-3], d[j-2], d[j-1], d[j], ... > > >> */ > >> dp2 = vld1q_u16(&dst_port[j - FWDSTEP + 1]); > >> - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); > >> + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, > >> dp2); > >> > >> /* > >> * dp1: > >> @@ -175,7 +138,7 @@ send_packets_multi(struct lcore_conf *qconf, > >> struct rte_mbuf **pkts_burst, > >> */ > >> dp2 = vextq_u16(dp1, dp1, 1); > >> dp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3); > >> - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); > >> + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); > >> > >> /* > >> * remove values added by the last repeated diff --git > >> a/examples/meson.build b/examples/meson.build index > >> 78de0e1f37..81e93799f2 100644 > >> --- a/examples/meson.build > >> +++ b/examples/meson.build > >> @@ -97,7 +97,7 @@ foreach example: examples > >> ldflags = default_ldflags > >> > >> ext_deps = [] > >> - includes = [include_directories(example)] > >> + includes = [include_directories(example, 'common')] > >> deps = ['eal', 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline'] > >> subdir(example) > >> > >> -- > >> 2.25.1 > >
21/06/2022 17:50, Rahul Bhansali пишет: > > >> -----Original Message----- >> From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru> >> Sent: Tuesday, June 21, 2022 4:43 AM >> To: Rahul Bhansali <rbhansali@marvell.com>; dev@dpdk.org; Ruifeng Wang >> <ruifeng.wang@arm.com> >> Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com> >> Subject: [EXT] Re: [PATCH v2 1/2] examples/l3fwd: common packet group >> functionality >> >> External Email >> >> ---------------------------------------------------------------------- >> 17/06/2022 08:50, Rahul Bhansali пишет: >>> CC: Konstantin Ananyev >>> >>>> -----Original Message----- >>>> From: Rahul Bhansali <rbhansali@marvell.com> >>>> Sent: Friday, June 17, 2022 1:13 PM >>>> To: dev@dpdk.org; Ruifeng Wang <ruifeng.wang@arm.com> >>>> Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Rahul Bhansali >>>> <rbhansali@marvell.com> >>>> Subject: [PATCH v2 1/2] examples/l3fwd: common packet group >>>> functionality >>>> >>>> This will make the packet grouping function common, so that other >>>> examples can utilize as per need. >>>> >>>> Signed-off-by: Rahul Bhansali <rbhansali@marvell.com> >>>> --- >>>> Changes in v2: New patch to address review comment. >>>> >>>> examples/common/neon_common.h | 50 ++++++++++++ >>>> examples/common/pkt_group.h | 139 >>>> ++++++++++++++++++++++++++++++++++ >>>> examples/l3fwd/Makefile | 5 +- >>>> examples/l3fwd/l3fwd.h | 2 - >>>> examples/l3fwd/l3fwd_common.h | 129 +------------------------------ >>>> examples/l3fwd/l3fwd_neon.h | 43 +---------- >>>> examples/meson.build | 2 +- >>>> 7 files changed, 198 insertions(+), 172 deletions(-) create mode >>>> 100644 examples/common/neon_common.h create mode 100644 >>>> examples/common/pkt_group.h >>>> >>>> diff --git a/examples/common/neon_common.h >>>> b/examples/common/neon_common.h new file mode 100644 index >>>> 0000000000..f01b5ab6bc >>>> --- /dev/null >>>> +++ b/examples/common/neon_common.h >>>> @@ -0,0 +1,50 @@ >>>> +/* SPDX-License-Identifier: BSD-3-Clause >>>> + * Copyright(c) 2016-2018 Intel Corporation. >>>> + * Copyright(c) 2017-2018 Linaro Limited. >>>> + * Copyright(C) 2022 Marvell. >>>> + */ >>>> + >>>> +#ifndef _NEON_COMMON_H_ >>>> +#define _NEON_COMMON_H_ >>>> + >>>> +#include "pkt_group.h" >>>> + >>>> +/* >>>> + * Group consecutive packets with the same destination port in bursts of 4. >>>> + * Suppose we have array of destination ports: >>>> + * dst_port[] = {a, b, c, d,, e, ... } >>>> + * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. >>>> + * We doing 4 comparisons at once and the result is 4 bit mask. >>>> + * This mask is used as an index into prebuild array of pnum values. >>>> + */ >>>> +static inline uint16_t * >>>> +neon_port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t >> dp1, >>>> + uint16x8_t dp2) >>>> +{ >>>> + union { >>>> + uint16_t u16[FWDSTEP + 1]; >>>> + uint64_t u64; >>>> + } *pnum = (void *)pn; >>>> + >>>> + uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; >>>> + int32_t v; >>>> + >>>> + dp1 = vceqq_u16(dp1, dp2); >>>> + dp1 = vandq_u16(dp1, mask); >>>> + v = vaddvq_u16(dp1); >>>> + >>>> + /* update last port counter. */hh >>>> + lp[0] += gptbl[v].lpv; >>>> + rte_compiler_barrier(); >>>> + >>>> + /* if dest port value has changed. */ >>>> + if (v != GRPMSK) { >>>> + pnum->u64 = gptbl[v].pnum; >>>> + pnum->u16[FWDSTEP] = 1; >>>> + lp = pnum->u16 + gptbl[v].idx; >>>> + } >>>> + >>>> + return lp; >>>> +} >> >> Thanks for the effort. >> As I can see this function: port_groupx4() is nearly identical for all 3 >> platforms: sse/nenon/altivec (except of course built-in arch-specific instincts). >> In fact, even comemnts are identical. >> I wonder can we have something like: >> examples/common/<arch>/port_group.h >> and for each arch will have defined port_groupx4(...) ? >> > Yes, It’s a good point. I was thinking to have arch in file name itself. But we can have arch specific directory and have different header files. > Do you want me to make changes for all 3 sse/neon/altivec or just neon ? My thought was to move headers for all archs. > I can check compilation for all but functionality/perf validate for Neon only. I can do quick functional test for x86. Plus I think l3fwd is part of release cycle testing anyway. Thanks Konstantin >>>> + >>>> +#endif /* _NEON_COMMON_H_ */ >>>> diff --git a/examples/common/pkt_group.h >>>> b/examples/common/pkt_group.h new file mode 100644 index >>>> 0000000000..8b26d9380f >>>> --- /dev/null >>>> +++ b/examples/common/pkt_group.h >>>> @@ -0,0 +1,139 @@ >>>> +/* SPDX-License-Identifier: BSD-3-Clause >>>> + * Copyright(c) 2016-2018 Intel Corporation. >>>> + * Copyright(c) 2017-2018 Linaro Limited. >>>> + * Copyright(C) 2022 Marvell. >>>> + */ >>>> + >>>> +#ifndef _PKT_GROUP_H_ >>>> +#define _PKT_GROUP_H_ >>>> + >>>> +#define FWDSTEP 4 >>>> + >>>> +/* >>>> + * Group consecutive packets with the same destination port into one burst. >>>> + * To avoid extra latency this is done together with some other >>>> +packet >>>> + * processing, but after we made a final decision about packet's destination. >>>> + * To do this we maintain: >>>> + * pnum - array of number of consecutive packets with the same dest >>>> +port for >>>> + * each packet in the input burst. >>>> + * lp - pointer to the last updated element in the pnum. >>>> + * dlp - dest port value lp corresponds to. >>>> + */ >>>> + >>>> +#define GRPSZ (1 << FWDSTEP) >>>> +#define GRPMSK (GRPSZ - 1) >>>> + >>>> +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ >>>> + if (likely((dlp) == (dcp)[(idx)])) { \ >>>> + (lp)[0]++; \ >>>> + } else { \ >>>> + (dlp) = (dcp)[idx]; \ >>>> + (lp) = (pn) + (idx); \ >>>> + (lp)[0] = 1; \ >>>> + } \ >>>> +} while (0) >>>> + >>>> +static const struct { >>>> + uint64_t pnum; /* prebuild 4 values for pnum[]. */ >>>> + int32_t idx; /* index for new last updated elemnet. */ >>>> + uint16_t lpv; /* add value to the last updated element. */ } >>>> +gptbl[GRPSZ] = { >>>> + { >>>> + /* 0: a != b, b != c, c != d, d != e */ >>>> + .pnum = UINT64_C(0x0001000100010001), >>>> + .idx = 4, >>>> + .lpv = 0, >>>> + }, >>>> + { >>>> + /* 1: a == b, b != c, c != d, d != e */ >>>> + .pnum = UINT64_C(0x0001000100010002), >>>> + .idx = 4, >>>> + .lpv = 1, >>>> + }, >>>> + { >>>> + /* 2: a != b, b == c, c != d, d != e */ >>>> + .pnum = UINT64_C(0x0001000100020001), >>>> + .idx = 4, >>>> + .lpv = 0, >>>> + }, >>>> + { >>>> + /* 3: a == b, b == c, c != d, d != e */ >>>> + .pnum = UINT64_C(0x0001000100020003), >>>> + .idx = 4, >>>> + .lpv = 2, >>>> + }, >>>> + { >>>> + /* 4: a != b, b != c, c == d, d != e */ >>>> + .pnum = UINT64_C(0x0001000200010001), >>>> + .idx = 4, >>>> + .lpv = 0, >>>> + }, >>>> + { >>>> + /* 5: a == b, b != c, c == d, d != e */ >>>> + .pnum = UINT64_C(0x0001000200010002), >>>> + .idx = 4, >>>> + .lpv = 1, >>>> + }, >>>> + { >>>> + /* 6: a != b, b == c, c == d, d != e */ >>>> + .pnum = UINT64_C(0x0001000200030001), >>>> + .idx = 4, >>>> + .lpv = 0, >>>> + }, >>>> + { >>>> + /* 7: a == b, b == c, c == d, d != e */ >>>> + .pnum = UINT64_C(0x0001000200030004), >>>> + .idx = 4, >>>> + .lpv = 3, >>>> + }, >>>> + { >>>> + /* 8: a != b, b != c, c != d, d == e */ >>>> + .pnum = UINT64_C(0x0002000100010001), >>>> + .idx = 3, >>>> + .lpv = 0, >>>> + }, >>>> + { >>>> + /* 9: a == b, b != c, c != d, d == e */ >>>> + .pnum = UINT64_C(0x0002000100010002), >>>> + .idx = 3, >>>> + .lpv = 1, >>>> + }, >>>> + { >>>> + /* 0xa: a != b, b == c, c != d, d == e */ >>>> + .pnum = UINT64_C(0x0002000100020001), >>>> + .idx = 3, >>>> + .lpv = 0, >>>> + }, >>>> + { >>>> + /* 0xb: a == b, b == c, c != d, d == e */ >>>> + .pnum = UINT64_C(0x0002000100020003), >>>> + .idx = 3, >>>> + .lpv = 2, >>>> + }, >>>> + { >>>> + /* 0xc: a != b, b != c, c == d, d == e */ >>>> + .pnum = UINT64_C(0x0002000300010001), >>>> + .idx = 2, >>>> + .lpv = 0, >>>> + }, >>>> + { >>>> + /* 0xd: a == b, b != c, c == d, d == e */ >>>> + .pnum = UINT64_C(0x0002000300010002), >>>> + .idx = 2, >>>> + .lpv = 1, >>>> + }, >>>> + { >>>> + /* 0xe: a != b, b == c, c == d, d == e */ >>>> + .pnum = UINT64_C(0x0002000300040001), >>>> + .idx = 1, >>>> + .lpv = 0, >>>> + }, >>>> + { >>>> + /* 0xf: a == b, b == c, c == d, d == e */ >>>> + .pnum = UINT64_C(0x0002000300040005), >>>> + .idx = 0, >>>> + .lpv = 4, >>>> + }, >>>> +}; >>>> + >>>> +#endif /* _PKT_GROUP_H_ */ >>>> diff --git a/examples/l3fwd/Makefile b/examples/l3fwd/Makefile index >>>> 8efe6378e2..8dbe85c2e6 100644 >>>> --- a/examples/l3fwd/Makefile >>>> +++ b/examples/l3fwd/Makefile >>>> @@ -22,6 +22,7 @@ shared: build/$(APP)-shared >>>> static: build/$(APP)-static >>>> ln -sf $(APP)-static build/$(APP) >>>> >>>> +INCLUDES =-I../common >>>> PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) CFLAGS >>>> += -O3 $(shell $(PKGCONF) --cflags libdpdk) # Added for >> 'rte_eth_link_to_str()' >>>> @@ -38,10 +39,10 @@ endif >>>> endif >>>> >>>> build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build >>>> - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED) >>>> + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) >>>> +$(LDFLAGS_SHARED) >>>> >>>> build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build >>>> - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC) >>>> + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) >>>> +$(LDFLAGS_STATIC) >>>> >>>> build: >>>> @mkdir -p $@ >>>> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index >>>> 8a52c90755..40b5f32a9e 100644 >>>> --- a/examples/l3fwd/l3fwd.h >>>> +++ b/examples/l3fwd/l3fwd.h >>>> @@ -44,8 +44,6 @@ >>>> /* Used to mark destination port as 'invalid'. */ >>>> #define BAD_PORT ((uint16_t)-1) >>>> >>>> -#define FWDSTEP 4 >>>> - >>>> /* replace first 12B of the ethernet header. */ >>>> #define MASK_ETH 0x3f >>>> >>>> diff --git a/examples/l3fwd/l3fwd_common.h >>>> b/examples/l3fwd/l3fwd_common.h index 8e4c27218f..224b1c08e8 100644 >>>> --- a/examples/l3fwd/l3fwd_common.h >>>> +++ b/examples/l3fwd/l3fwd_common.h >>>> @@ -7,6 +7,8 @@ >>>> #ifndef _L3FWD_COMMON_H_ >>>> #define _L3FWD_COMMON_H_ >>>> >>>> +#include "pkt_group.h" >>>> + >>>> #ifdef DO_RFC_1812_CHECKS >>>> >>>> #define IPV4_MIN_VER_IHL 0x45 >>>> @@ -50,133 +52,6 @@ rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, >>>> uint16_t *dp, uint32_t ptype) >>>> #define rfc1812_process(mb, dp, ptype) do { } while (0) >>>> #endif /* DO_RFC_1812_CHECKS */ >>>> >>>> -/* >>>> - * We group consecutive packets with the same destination port into one >> burst. >>>> - * To avoid extra latency this is done together with some other >>>> packet >>>> - * processing, but after we made a final decision about packet's destination. >>>> - * To do this we maintain: >>>> - * pnum - array of number of consecutive packets with the same dest >>>> port for >>>> - * each packet in the input burst. >>>> - * lp - pointer to the last updated element in the pnum. >>>> - * dlp - dest port value lp corresponds to. >>>> - */ >>>> - >>>> -#define GRPSZ (1 << FWDSTEP) >>>> -#define GRPMSK (GRPSZ - 1) >>>> - >>>> -#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ >>>> - if (likely((dlp) == (dcp)[(idx)])) { \ >>>> - (lp)[0]++; \ >>>> - } else { \ >>>> - (dlp) = (dcp)[idx]; \ >>>> - (lp) = (pn) + (idx); \ >>>> - (lp)[0] = 1; \ >>>> - } \ >>>> -} while (0) >>>> - >>>> -static const struct { >>>> - uint64_t pnum; /* prebuild 4 values for pnum[]. */ >>>> - int32_t idx; /* index for new last updated element. */ >>>> - uint16_t lpv; /* add value to the last updated element. */ >>>> -} gptbl[GRPSZ] = { >>>> - { >>>> - /* 0: a != b, b != c, c != d, d != e */ >>>> - .pnum = UINT64_C(0x0001000100010001), >>>> - .idx = 4, >>>> - .lpv = 0, >>>> - }, >>>> - { >>>> - /* 1: a == b, b != c, c != d, d != e */ >>>> - .pnum = UINT64_C(0x0001000100010002), >>>> - .idx = 4, >>>> - .lpv = 1, >>>> - }, >>>> - { >>>> - /* 2: a != b, b == c, c != d, d != e */ >>>> - .pnum = UINT64_C(0x0001000100020001), >>>> - .idx = 4, >>>> - .lpv = 0, >>>> - }, >>>> - { >>>> - /* 3: a == b, b == c, c != d, d != e */ >>>> - .pnum = UINT64_C(0x0001000100020003), >>>> - .idx = 4, >>>> - .lpv = 2, >>>> - }, >>>> - { >>>> - /* 4: a != b, b != c, c == d, d != e */ >>>> - .pnum = UINT64_C(0x0001000200010001), >>>> - .idx = 4, >>>> - .lpv = 0, >>>> - }, >>>> - { >>>> - /* 5: a == b, b != c, c == d, d != e */ >>>> - .pnum = UINT64_C(0x0001000200010002), >>>> - .idx = 4, >>>> - .lpv = 1, >>>> - }, >>>> - { >>>> - /* 6: a != b, b == c, c == d, d != e */ >>>> - .pnum = UINT64_C(0x0001000200030001), >>>> - .idx = 4, >>>> - .lpv = 0, >>>> - }, >>>> - { >>>> - /* 7: a == b, b == c, c == d, d != e */ >>>> - .pnum = UINT64_C(0x0001000200030004), >>>> - .idx = 4, >>>> - .lpv = 3, >>>> - }, >>>> - { >>>> - /* 8: a != b, b != c, c != d, d == e */ >>>> - .pnum = UINT64_C(0x0002000100010001), >>>> - .idx = 3, >>>> - .lpv = 0, >>>> - }, >>>> - { >>>> - /* 9: a == b, b != c, c != d, d == e */ >>>> - .pnum = UINT64_C(0x0002000100010002), >>>> - .idx = 3, >>>> - .lpv = 1, >>>> - }, >>>> - { >>>> - /* 0xa: a != b, b == c, c != d, d == e */ >>>> - .pnum = UINT64_C(0x0002000100020001), >>>> - .idx = 3, >>>> - .lpv = 0, >>>> - }, >>>> - { >>>> - /* 0xb: a == b, b == c, c != d, d == e */ >>>> - .pnum = UINT64_C(0x0002000100020003), >>>> - .idx = 3, >>>> - .lpv = 2, >>>> - }, >>>> - { >>>> - /* 0xc: a != b, b != c, c == d, d == e */ >>>> - .pnum = UINT64_C(0x0002000300010001), >>>> - .idx = 2, >>>> - .lpv = 0, >>>> - }, >>>> - { >>>> - /* 0xd: a == b, b != c, c == d, d == e */ >>>> - .pnum = UINT64_C(0x0002000300010002), >>>> - .idx = 2, >>>> - .lpv = 1, >>>> - }, >>>> - { >>>> - /* 0xe: a != b, b == c, c == d, d == e */ >>>> - .pnum = UINT64_C(0x0002000300040001), >>>> - .idx = 1, >>>> - .lpv = 0, >>>> - }, >>>> - { >>>> - /* 0xf: a == b, b == c, c == d, d == e */ >>>> - .pnum = UINT64_C(0x0002000300040005), >>>> - .idx = 0, >>>> - .lpv = 4, >>>> - }, >>>> -}; >>>> - >>>> static __rte_always_inline void >>>> send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf >> *m[], >>>> uint32_t num) >>>> diff --git a/examples/l3fwd/l3fwd_neon.h >>>> b/examples/l3fwd/l3fwd_neon.h index e3d33a5229..5fa765b640 100644 >>>> --- a/examples/l3fwd/l3fwd_neon.h >>>> +++ b/examples/l3fwd/l3fwd_neon.h >>>> @@ -7,6 +7,7 @@ >>>> #define _L3FWD_NEON_H_ >>>> >>>> #include "l3fwd.h" >>>> +#include "neon_common.h" >>>> #include "l3fwd_common.h" >>>> >>>> /* >>>> @@ -62,44 +63,6 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], >>>> uint16_t dst_port[FWDSTEP]) >>>> &dst_port[3], pkt[3]->packet_type); >>>> } >>>> >>>> -/* >>>> - * Group consecutive packets with the same destination port in bursts of 4. >>>> - * Suppose we have array of destination ports: >>>> - * dst_port[] = {a, b, c, d,, e, ... } >>>> - * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. >>>> - * We doing 4 comparisons at once and the result is 4 bit mask. >>>> - * This mask is used as an index into prebuild array of pnum values. >>>> - */ >>>> -static inline uint16_t * >>>> -port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1, >>>> - uint16x8_t dp2) >>>> -{ >>>> - union { >>>> - uint16_t u16[FWDSTEP + 1]; >>>> - uint64_t u64; >>>> - } *pnum = (void *)pn; >>>> - >>>> - int32_t v; >>>> - uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; >>>> - >>>> - dp1 = vceqq_u16(dp1, dp2); >>>> - dp1 = vandq_u16(dp1, mask); >>>> - v = vaddvq_u16(dp1); >>>> - >>>> - /* update last port counter. */ >>>> - lp[0] += gptbl[v].lpv; >>>> - rte_compiler_barrier(); >>>> - >>>> - /* if dest port value has changed. */ >>>> - if (v != GRPMSK) { >>>> - pnum->u64 = gptbl[v].pnum; >>>> - pnum->u16[FWDSTEP] = 1; >>>> - lp = pnum->u16 + gptbl[v].idx; >>>> - } >>>> - >>>> - return lp; >>>> -} >>>> - >>>> /** >>>> * Process one packet: >>>> * Update source and destination MAC addresses in the ethernet header. >>>> @@ -161,7 +124,7 @@ send_packets_multi(struct lcore_conf *qconf, >>>> struct rte_mbuf **pkts_burst, >>>> * <d[j-3], d[j-2], d[j-1], d[j], ... > >>>> */ >>>> dp2 = vld1q_u16(&dst_port[j - FWDSTEP + 1]); >>>> - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); >>>> + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, >>>> dp2); >>>> >>>> /* >>>> * dp1: >>>> @@ -175,7 +138,7 @@ send_packets_multi(struct lcore_conf *qconf, >>>> struct rte_mbuf **pkts_burst, >>>> */ >>>> dp2 = vextq_u16(dp1, dp1, 1); >>>> dp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3); >>>> - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); >>>> + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); >>>> >>>> /* >>>> * remove values added by the last repeated diff --git >>>> a/examples/meson.build b/examples/meson.build index >>>> 78de0e1f37..81e93799f2 100644 >>>> --- a/examples/meson.build >>>> +++ b/examples/meson.build >>>> @@ -97,7 +97,7 @@ foreach example: examples >>>> ldflags = default_ldflags >>>> >>>> ext_deps = [] >>>> - includes = [include_directories(example)] >>>> + includes = [include_directories(example, 'common')] >>>> deps = ['eal', 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline'] >>>> subdir(example) >>>> >>>> -- >>>> 2.25.1 >>> >
diff --git a/examples/common/neon_common.h b/examples/common/neon_common.h new file mode 100644 index 0000000000..f01b5ab6bc --- /dev/null +++ b/examples/common/neon_common.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2018 Intel Corporation. + * Copyright(c) 2017-2018 Linaro Limited. + * Copyright(C) 2022 Marvell. + */ + +#ifndef _NEON_COMMON_H_ +#define _NEON_COMMON_H_ + +#include "pkt_group.h" + +/* + * Group consecutive packets with the same destination port in bursts of 4. + * Suppose we have array of destination ports: + * dst_port[] = {a, b, c, d,, e, ... } + * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. + * We doing 4 comparisons at once and the result is 4 bit mask. + * This mask is used as an index into prebuild array of pnum values. + */ +static inline uint16_t * +neon_port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1, + uint16x8_t dp2) +{ + union { + uint16_t u16[FWDSTEP + 1]; + uint64_t u64; + } *pnum = (void *)pn; + + uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; + int32_t v; + + dp1 = vceqq_u16(dp1, dp2); + dp1 = vandq_u16(dp1, mask); + v = vaddvq_u16(dp1); + + /* update last port counter. */ + lp[0] += gptbl[v].lpv; + rte_compiler_barrier(); + + /* if dest port value has changed. */ + if (v != GRPMSK) { + pnum->u64 = gptbl[v].pnum; + pnum->u16[FWDSTEP] = 1; + lp = pnum->u16 + gptbl[v].idx; + } + + return lp; +} + +#endif /* _NEON_COMMON_H_ */ diff --git a/examples/common/pkt_group.h b/examples/common/pkt_group.h new file mode 100644 index 0000000000..8b26d9380f --- /dev/null +++ b/examples/common/pkt_group.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2018 Intel Corporation. + * Copyright(c) 2017-2018 Linaro Limited. + * Copyright(C) 2022 Marvell. + */ + +#ifndef _PKT_GROUP_H_ +#define _PKT_GROUP_H_ + +#define FWDSTEP 4 + +/* + * Group consecutive packets with the same destination port into one burst. + * To avoid extra latency this is done together with some other packet + * processing, but after we made a final decision about packet's destination. + * To do this we maintain: + * pnum - array of number of consecutive packets with the same dest port for + * each packet in the input burst. + * lp - pointer to the last updated element in the pnum. + * dlp - dest port value lp corresponds to. + */ + +#define GRPSZ (1 << FWDSTEP) +#define GRPMSK (GRPSZ - 1) + +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ + if (likely((dlp) == (dcp)[(idx)])) { \ + (lp)[0]++; \ + } else { \ + (dlp) = (dcp)[idx]; \ + (lp) = (pn) + (idx); \ + (lp)[0] = 1; \ + } \ +} while (0) + +static const struct { + uint64_t pnum; /* prebuild 4 values for pnum[]. */ + int32_t idx; /* index for new last updated elemnet. */ + uint16_t lpv; /* add value to the last updated element. */ +} gptbl[GRPSZ] = { + { + /* 0: a != b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010001), + .idx = 4, + .lpv = 0, + }, + { + /* 1: a == b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010002), + .idx = 4, + .lpv = 1, + }, + { + /* 2: a != b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020001), + .idx = 4, + .lpv = 0, + }, + { + /* 3: a == b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020003), + .idx = 4, + .lpv = 2, + }, + { + /* 4: a != b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010001), + .idx = 4, + .lpv = 0, + }, + { + /* 5: a == b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010002), + .idx = 4, + .lpv = 1, + }, + { + /* 6: a != b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030001), + .idx = 4, + .lpv = 0, + }, + { + /* 7: a == b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030004), + .idx = 4, + .lpv = 3, + }, + { + /* 8: a != b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010001), + .idx = 3, + .lpv = 0, + }, + { + /* 9: a == b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010002), + .idx = 3, + .lpv = 1, + }, + { + /* 0xa: a != b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020001), + .idx = 3, + .lpv = 0, + }, + { + /* 0xb: a == b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020003), + .idx = 3, + .lpv = 2, + }, + { + /* 0xc: a != b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010001), + .idx = 2, + .lpv = 0, + }, + { + /* 0xd: a == b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010002), + .idx = 2, + .lpv = 1, + }, + { + /* 0xe: a != b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040001), + .idx = 1, + .lpv = 0, + }, + { + /* 0xf: a == b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040005), + .idx = 0, + .lpv = 4, + }, +}; + +#endif /* _PKT_GROUP_H_ */ diff --git a/examples/l3fwd/Makefile b/examples/l3fwd/Makefile index 8efe6378e2..8dbe85c2e6 100644 --- a/examples/l3fwd/Makefile +++ b/examples/l3fwd/Makefile @@ -22,6 +22,7 @@ shared: build/$(APP)-shared static: build/$(APP)-static ln -sf $(APP)-static build/$(APP) +INCLUDES =-I../common PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) # Added for 'rte_eth_link_to_str()' @@ -38,10 +39,10 @@ endif endif build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED) + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED) build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC) + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC) build: @mkdir -p $@ diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index 8a52c90755..40b5f32a9e 100644 --- a/examples/l3fwd/l3fwd.h +++ b/examples/l3fwd/l3fwd.h @@ -44,8 +44,6 @@ /* Used to mark destination port as 'invalid'. */ #define BAD_PORT ((uint16_t)-1) -#define FWDSTEP 4 - /* replace first 12B of the ethernet header. */ #define MASK_ETH 0x3f diff --git a/examples/l3fwd/l3fwd_common.h b/examples/l3fwd/l3fwd_common.h index 8e4c27218f..224b1c08e8 100644 --- a/examples/l3fwd/l3fwd_common.h +++ b/examples/l3fwd/l3fwd_common.h @@ -7,6 +7,8 @@ #ifndef _L3FWD_COMMON_H_ #define _L3FWD_COMMON_H_ +#include "pkt_group.h" + #ifdef DO_RFC_1812_CHECKS #define IPV4_MIN_VER_IHL 0x45 @@ -50,133 +52,6 @@ rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) #define rfc1812_process(mb, dp, ptype) do { } while (0) #endif /* DO_RFC_1812_CHECKS */ -/* - * We group consecutive packets with the same destination port into one burst. - * To avoid extra latency this is done together with some other packet - * processing, but after we made a final decision about packet's destination. - * To do this we maintain: - * pnum - array of number of consecutive packets with the same dest port for - * each packet in the input burst. - * lp - pointer to the last updated element in the pnum. - * dlp - dest port value lp corresponds to. - */ - -#define GRPSZ (1 << FWDSTEP) -#define GRPMSK (GRPSZ - 1) - -#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ - if (likely((dlp) == (dcp)[(idx)])) { \ - (lp)[0]++; \ - } else { \ - (dlp) = (dcp)[idx]; \ - (lp) = (pn) + (idx); \ - (lp)[0] = 1; \ - } \ -} while (0) - -static const struct { - uint64_t pnum; /* prebuild 4 values for pnum[]. */ - int32_t idx; /* index for new last updated element. */ - uint16_t lpv; /* add value to the last updated element. */ -} gptbl[GRPSZ] = { - { - /* 0: a != b, b != c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100010001), - .idx = 4, - .lpv = 0, - }, - { - /* 1: a == b, b != c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100010002), - .idx = 4, - .lpv = 1, - }, - { - /* 2: a != b, b == c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100020001), - .idx = 4, - .lpv = 0, - }, - { - /* 3: a == b, b == c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100020003), - .idx = 4, - .lpv = 2, - }, - { - /* 4: a != b, b != c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200010001), - .idx = 4, - .lpv = 0, - }, - { - /* 5: a == b, b != c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200010002), - .idx = 4, - .lpv = 1, - }, - { - /* 6: a != b, b == c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200030001), - .idx = 4, - .lpv = 0, - }, - { - /* 7: a == b, b == c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200030004), - .idx = 4, - .lpv = 3, - }, - { - /* 8: a != b, b != c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100010001), - .idx = 3, - .lpv = 0, - }, - { - /* 9: a == b, b != c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100010002), - .idx = 3, - .lpv = 1, - }, - { - /* 0xa: a != b, b == c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100020001), - .idx = 3, - .lpv = 0, - }, - { - /* 0xb: a == b, b == c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100020003), - .idx = 3, - .lpv = 2, - }, - { - /* 0xc: a != b, b != c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300010001), - .idx = 2, - .lpv = 0, - }, - { - /* 0xd: a == b, b != c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300010002), - .idx = 2, - .lpv = 1, - }, - { - /* 0xe: a != b, b == c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300040001), - .idx = 1, - .lpv = 0, - }, - { - /* 0xf: a == b, b == c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300040005), - .idx = 0, - .lpv = 4, - }, -}; - static __rte_always_inline void send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[], uint32_t num) diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h index e3d33a5229..5fa765b640 100644 --- a/examples/l3fwd/l3fwd_neon.h +++ b/examples/l3fwd/l3fwd_neon.h @@ -7,6 +7,7 @@ #define _L3FWD_NEON_H_ #include "l3fwd.h" +#include "neon_common.h" #include "l3fwd_common.h" /* @@ -62,44 +63,6 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) &dst_port[3], pkt[3]->packet_type); } -/* - * Group consecutive packets with the same destination port in bursts of 4. - * Suppose we have array of destination ports: - * dst_port[] = {a, b, c, d,, e, ... } - * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. - * We doing 4 comparisons at once and the result is 4 bit mask. - * This mask is used as an index into prebuild array of pnum values. - */ -static inline uint16_t * -port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1, - uint16x8_t dp2) -{ - union { - uint16_t u16[FWDSTEP + 1]; - uint64_t u64; - } *pnum = (void *)pn; - - int32_t v; - uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; - - dp1 = vceqq_u16(dp1, dp2); - dp1 = vandq_u16(dp1, mask); - v = vaddvq_u16(dp1); - - /* update last port counter. */ - lp[0] += gptbl[v].lpv; - rte_compiler_barrier(); - - /* if dest port value has changed. */ - if (v != GRPMSK) { - pnum->u64 = gptbl[v].pnum; - pnum->u16[FWDSTEP] = 1; - lp = pnum->u16 + gptbl[v].idx; - } - - return lp; -} - /** * Process one packet: * Update source and destination MAC addresses in the ethernet header. @@ -161,7 +124,7 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, * <d[j-3], d[j-2], d[j-1], d[j], ... > */ dp2 = vld1q_u16(&dst_port[j - FWDSTEP + 1]); - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); /* * dp1: @@ -175,7 +138,7 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, */ dp2 = vextq_u16(dp1, dp1, 1); dp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3); - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); /* * remove values added by the last repeated diff --git a/examples/meson.build b/examples/meson.build index 78de0e1f37..81e93799f2 100644 --- a/examples/meson.build +++ b/examples/meson.build @@ -97,7 +97,7 @@ foreach example: examples ldflags = default_ldflags ext_deps = [] - includes = [include_directories(example)] + includes = [include_directories(example, 'common')] deps = ['eal', 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline'] subdir(example)
This will make the packet grouping function common, so that other examples can utilize as per need. Signed-off-by: Rahul Bhansali <rbhansali@marvell.com> --- Changes in v2: New patch to address review comment. examples/common/neon_common.h | 50 ++++++++++++ examples/common/pkt_group.h | 139 ++++++++++++++++++++++++++++++++++ examples/l3fwd/Makefile | 5 +- examples/l3fwd/l3fwd.h | 2 - examples/l3fwd/l3fwd_common.h | 129 +------------------------------ examples/l3fwd/l3fwd_neon.h | 43 +---------- examples/meson.build | 2 +- 7 files changed, 198 insertions(+), 172 deletions(-) create mode 100644 examples/common/neon_common.h create mode 100644 examples/common/pkt_group.h -- 2.25.1