[1/1] hash: add SVE support for bulk key lookup
Checks
Commit Message
From: Harjot Singh <harjot.singh@arm.com>
- Implemented Vector Length Agnostic SVE code for comparing signatures
in bulk lookup.
- Added Defines in code for SVE code support.
- New Optimised SVE code is 1-2 CPU cycle slower than NEON for N2
processor.
Performance Numbers from hash_perf_autotest :
Elements in Primary or Secondary Location
Results (in CPU cycles/operation)
-----------------------------------
Operations without data
Without pre-computed hash values
Keysize Add/Lookup/Lookup_bulk
Neon SVE
4 93/71/26 93/71/27
8 93/70/26 93/70/27
9 94/74/27 94/74/28
13 100/80/31 100/79/32
16 100/78/30 100/78/31
32 109/110/38 108/110/39
With pre-computed hash values
Keysize Add/Lookup/Lookup_bulk
Neon SVE
4 83/58/27 83/58/29
8 83/57/27 83/57/28
9 83/60/28 83/60/29
13 84/60/28 83/60/29
16 83/58/27 83/58/29
32 84/68/31 84/68/32
Signed-off-by: Harjot Singh <harjot.singh@arm.com>
Reviewed-by: Nathan Brown <nathan.brown@arm.com>
Reviewed-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Jieqiang Wang <jieqiang.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
.mailmap | 1 +
lib/hash/rte_cuckoo_hash.c | 37 ++++++++++++++++++++++++++++++++++++-
lib/hash/rte_cuckoo_hash.h | 1 +
3 files changed, 38 insertions(+), 1 deletion(-)
Comments
On Thu, Aug 17, 2023 at 11:24 PM Harjot Singh <Harjot.Singh@arm.com> wrote:
>
> From: Harjot Singh <harjot.singh@arm.com>
>
> - Implemented Vector Length Agnostic SVE code for comparing signatures
> in bulk lookup.
> - Added Defines in code for SVE code support.
> - New Optimised SVE code is 1-2 CPU cycle slower than NEON for N2
> processor.
>
> Performance Numbers from hash_perf_autotest :
>
> Elements in Primary or Secondary Location
>
> Results (in CPU cycles/operation)
> -----------------------------------
> Operations without data
>
> Without pre-computed hash values
>
> Keysize Add/Lookup/Lookup_bulk
> Neon SVE
> 4 93/71/26 93/71/27
> 8 93/70/26 93/70/27
> 9 94/74/27 94/74/28
> 13 100/80/31 100/79/32
> 16 100/78/30 100/78/31
> 32 109/110/38 108/110/39
>
> With pre-computed hash values
>
> Keysize Add/Lookup/Lookup_bulk
> Neon SVE
> 4 83/58/27 83/58/29
> 8 83/57/27 83/57/28
> 9 83/60/28 83/60/29
> 13 84/60/28 83/60/29
> 16 83/58/27 83/58/29
> 32 84/68/31 84/68/32
>
> Signed-off-by: Harjot Singh <harjot.singh@arm.com>
> Reviewed-by: Nathan Brown <nathan.brown@arm.com>
> Reviewed-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Jieqiang Wang <jieqiang.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Thanks for the patch, please update the release notes.
@@ -481,6 +481,7 @@ Hari Kumar Vemula <hari.kumarx.vemula@intel.com>
Harini Ramakrishnan <harini.ramakrishnan@microsoft.com>
Hariprasad Govindharajan <hariprasad.govindharajan@intel.com>
Harish Patil <harish.patil@cavium.com> <harish.patil@qlogic.com>
+Harjot Singh <harjot.singh@arm.com>
Harman Kalra <hkalra@marvell.com>
Harneet Singh <harneet.singh@intel.com>
Harold Huang <baymaxhuang@gmail.com>
@@ -435,8 +435,11 @@ rte_hash_create(const struct rte_hash_parameters *params)
h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
else
#elif defined(RTE_ARCH_ARM64)
- if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
h->sig_cmp_fn = RTE_HASH_COMPARE_NEON;
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SVE))
+ h->sig_cmp_fn = RTE_HASH_COMPARE_SVE;
+ }
else
#endif
h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
@@ -1892,6 +1895,38 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
*sec_hash_matches = (uint32_t)(vaddvq_u16(x));
}
break;
+#if defined(RTE_HAS_SVE_ACLE)
+ case RTE_HASH_COMPARE_SVE: {
+ svuint16_t vsign, shift, sv_prim_matches, sv_sec_matches;
+ svbool_t pred, p_match, s_match;
+ int i = 0;
+ uint64_t vl = svcnth();
+
+ vsign = svdup_u16(sig);
+ shift = svindex_u16(0, 2);
+ do {
+ pred = svwhilelt_b16(i, RTE_HASH_BUCKET_ENTRIES);
+ /* Compare all signatures in the primary bucket */
+ p_match = svcmpeq_u16(pred, vsign, svld1_u16(pred,
+ &prim_bkt->sig_current[i]));
+ if (svptest_any(svptrue_b16(), p_match)) {
+ sv_prim_matches = svdup_u16_z(p_match, 1);
+ sv_prim_matches = svlsl_u16_z(pred, sv_prim_matches, shift);
+ *prim_hash_matches |= svorv_u16(pred, sv_prim_matches);
+ }
+ /* Compare all signatures in the secondary bucket */
+ s_match = svcmpeq_u16(pred, vsign, svld1_u16(pred,
+ &sec_bkt->sig_current[i]));
+ if (svptest_any(svptrue_b16(), s_match)) {
+ sv_sec_matches = svdup_u16_z(s_match, 1);
+ sv_sec_matches = svlsl_u16_z(pred, sv_sec_matches, shift);
+ *sec_hash_matches |= svorv_u16(pred, sv_sec_matches);
+ }
+ i += vl;
+ } while (i < RTE_HASH_BUCKET_ENTRIES);
+ }
+ break;
+#endif
#endif
default:
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
@@ -148,6 +148,7 @@ enum rte_hash_sig_compare_function {
RTE_HASH_COMPARE_SCALAR = 0,
RTE_HASH_COMPARE_SSE,
RTE_HASH_COMPARE_NEON,
+ RTE_HASH_COMPARE_SVE,
RTE_HASH_COMPARE_NUM
};