diff mbox series

[v7,2/4] hash: add bulk toeplitz hash implementation

Message ID 1635351408-168116-3-git-send-email-vladimir.medvedkin@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers show
Series optimized Toeplitz hash implementation | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Vladimir Medvedkin Oct. 27, 2021, 4:16 p.m. UTC
This patch adds a bulk version for the Toeplitz hash implemented
with Galios Fields New Instructions (GFNI).

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 app/test/test_thash.c                       | 67 ++++++++++++++++++++++++++++-
 doc/guides/prog_guide/toeplitz_hash_lib.rst | 20 ++++++---
 lib/hash/rte_thash_gfni.h                   | 33 ++++++++++++++
 lib/hash/rte_thash_x86_gfni.h               | 39 +++++++++++++++++
 4 files changed, 153 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/app/test/test_thash.c b/app/test/test_thash.c
index 22d784e..a625306 100644
--- a/app/test/test_thash.c
+++ b/app/test/test_thash.c
@@ -230,6 +230,8 @@  enum {
 	SCALAR_DATA_BUF_2_HASH_IDX,
 	GFNI_DATA_BUF_1_HASH_IDX,
 	GFNI_DATA_BUF_2_HASH_IDX,
+	GFNI_BULK_DATA_BUF_1_HASH_IDX,
+	GFNI_BULK_DATA_BUF_2_HASH_IDX,
 	HASH_IDXES
 };
 
@@ -241,6 +243,7 @@  test_toeplitz_hash_rand_data(void)
 	uint32_t hash[HASH_IDXES] = { 0 };
 	uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)];
 	int i, j;
+	uint8_t *bulk_data[2];
 
 	if (!rte_thash_gfni_supported())
 		return TEST_SKIPPED;
@@ -248,6 +251,9 @@  test_toeplitz_hash_rand_data(void)
 	rte_thash_complete_matrix(rss_key_matrixes, default_rss_key,
 		RTE_DIM(default_rss_key));
 
+	for (i = 0; i < 2; i++)
+		bulk_data[i] = (uint8_t *)data[i];
+
 	for (i = 0; i < ITER; i++) {
 		for (j = 0; j < DATA_SZ; j++) {
 			data[0][j] = rte_rand();
@@ -266,11 +272,18 @@  test_toeplitz_hash_rand_data(void)
 		hash[GFNI_DATA_BUF_2_HASH_IDX] = rte_thash_gfni(
 			rss_key_matrixes, (uint8_t *)data[1],
 			DATA_SZ * sizeof(uint32_t));
+		rte_thash_gfni_bulk(rss_key_matrixes,
+			DATA_SZ * sizeof(uint32_t), bulk_data,
+			&hash[GFNI_BULK_DATA_BUF_1_HASH_IDX], 2);
 
 		if ((hash[SCALAR_DATA_BUF_1_HASH_IDX] !=
 				hash[GFNI_DATA_BUF_1_HASH_IDX]) ||
+				(hash[SCALAR_DATA_BUF_1_HASH_IDX] !=
+				hash[GFNI_BULK_DATA_BUF_1_HASH_IDX]) ||
 				(hash[SCALAR_DATA_BUF_2_HASH_IDX] !=
-				hash[GFNI_DATA_BUF_2_HASH_IDX]))
+				hash[GFNI_DATA_BUF_2_HASH_IDX]) ||
+				(hash[SCALAR_DATA_BUF_2_HASH_IDX] !=
+				hash[GFNI_BULK_DATA_BUF_2_HASH_IDX]))
 
 			return -TEST_FAILED;
 	}
@@ -284,6 +297,57 @@  enum {
 };
 
 static int
+test_toeplitz_hash_gfni_bulk(void)
+{
+	uint32_t i, j;
+	union rte_thash_tuple tuple[2];
+	uint8_t *tuples[2];
+	uint32_t rss[2] = { 0 };
+	uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)];
+
+	if (!rte_thash_gfni_supported())
+		return TEST_SKIPPED;
+
+	/* Convert RSS key into matrixes */
+	rte_thash_complete_matrix(rss_key_matrixes, default_rss_key,
+		RTE_DIM(default_rss_key));
+
+	for (i = 0; i < RTE_DIM(tuples); i++) {
+		/* allocate memory enough for a biggest tuple */
+		tuples[i] = rte_zmalloc(NULL, RTE_THASH_V6_L4_LEN * 4, 0);
+		if (tuples[i] == NULL)
+			return -TEST_FAILED;
+	}
+
+	for (i = 0; i < RTE_MIN(RTE_DIM(v4_tbl), RTE_DIM(v6_tbl)); i++) {
+		/*Load IPv4 headers and copy it into the corresponding tuple*/
+		tuple[0].v4.src_addr = rte_cpu_to_be_32(v4_tbl[i].src_ip);
+		tuple[0].v4.dst_addr = rte_cpu_to_be_32(v4_tbl[i].dst_ip);
+		tuple[0].v4.sport = rte_cpu_to_be_16(v4_tbl[i].dst_port);
+		tuple[0].v4.dport = rte_cpu_to_be_16(v4_tbl[i].src_port);
+		rte_memcpy(tuples[0], &tuple[0], RTE_THASH_V4_L4_LEN * 4);
+
+		/*Load IPv6 headers and copy it into the corresponding tuple*/
+		for (j = 0; j < RTE_DIM(tuple[1].v6.src_addr); j++)
+			tuple[1].v6.src_addr[j] = v6_tbl[i].src_ip[j];
+		for (j = 0; j < RTE_DIM(tuple[1].v6.dst_addr); j++)
+			tuple[1].v6.dst_addr[j] = v6_tbl[i].dst_ip[j];
+		tuple[1].v6.sport = rte_cpu_to_be_16(v6_tbl[i].dst_port);
+		tuple[1].v6.dport = rte_cpu_to_be_16(v6_tbl[i].src_port);
+		rte_memcpy(tuples[1], &tuple[1], RTE_THASH_V6_L4_LEN * 4);
+
+		rte_thash_gfni_bulk(rss_key_matrixes, RTE_THASH_V6_L4_LEN * 4,
+			tuples, rss, 2);
+
+		if ((rss[RSS_V4_IDX] != v4_tbl[i].hash_l3l4) ||
+				(rss[RSS_V6_IDX] != v6_tbl[i].hash_l3l4))
+			return -TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
 test_big_tuple_gfni(void)
 {
 	uint32_t arr[16];
@@ -748,6 +812,7 @@  static struct unit_test_suite thash_tests = {
 	TEST_CASE(test_toeplitz_hash_calc),
 	TEST_CASE(test_toeplitz_hash_gfni),
 	TEST_CASE(test_toeplitz_hash_rand_data),
+	TEST_CASE(test_toeplitz_hash_gfni_bulk),
 	TEST_CASE(test_big_tuple_gfni),
 	TEST_CASE(test_create_invalid),
 	TEST_CASE(test_multiple_create),
diff --git a/doc/guides/prog_guide/toeplitz_hash_lib.rst b/doc/guides/prog_guide/toeplitz_hash_lib.rst
index acdd8c3..61eaafd 100644
--- a/doc/guides/prog_guide/toeplitz_hash_lib.rst
+++ b/doc/guides/prog_guide/toeplitz_hash_lib.rst
@@ -19,11 +19,12 @@  to calculate the RSS hash sum to spread the traffic among the queues.
 Toeplitz hash function API
 --------------------------
 
-There are three functions that provide calculation of the Toeplitz hash sum:
+There are four functions that provide calculation of the Toeplitz hash sum:
 
 * ``rte_softrss()``
 * ``rte_softrss_be()``
 * ``rte_thash_gfni()``
+* ``rte_thash_gfni_bulk()``
 
 First two functions are scalar implementation and take the parameters:
 
@@ -38,11 +39,12 @@  to be exactly the same as the one installed on the NIC.
 The ``rte_softrss_be`` function is a faster implementation,
 but it expects ``rss_key`` to be converted to the host byte order.
 
-The last function is vectorized implementation using
-Galois Fields New Instructions. Could be used if ``rte_thash_gfni_supported`` returns true.
-It expects the tuple to be in network byte order.
+The last two functions are vectorized implementations using
+Galois Fields New Instructions. Could be used if ``rte_thash_gfni_supported`` is true.
+They expect the tuple to be in network byte order.
 
-``rte_thash_gfni()`` calculates the hash value for a single tuple
+``rte_thash_gfni()`` calculates the hash value for a single tuple, and
+``rte_thash_gfni_bulk()`` bulk implementation of the rte_thash_gfni().
 
 ``rte_thash_gfni()`` takes the parameters:
 
@@ -50,6 +52,14 @@  It expects the tuple to be in network byte order.
 * A pointer to the tuple.
 * A length of the tuple in bytes.
 
+``rte_thash_gfni_bulk()`` takes the parameters:
+
+* A pointer to the matrices derived from the RSS hash key using ``rte_thash_complete_matrix()``.
+* A length of the longest tuple in bytes.
+* Array of the pointers on data to be hashed.
+* Array of ``uint32_t`` where to put calculated Toeplitz hash values
+* Number of tuples in a bulk.
+
 ``rte_thash_complete_matrix()`` is a function that calculates matrices required by
 GFNI implementations from the RSS hash key. It takes the parameters:
 
diff --git a/lib/hash/rte_thash_gfni.h b/lib/hash/rte_thash_gfni.h
index bbacd41..e97d912 100644
--- a/lib/hash/rte_thash_gfni.h
+++ b/lib/hash/rte_thash_gfni.h
@@ -45,6 +45,39 @@  rte_thash_gfni(const uint64_t *mtrx __rte_unused,
 	return 0;
 }
 
+/**
+ * Bulk implementation for Toeplitz hash.
+ * Dummy implementation.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * @param m
+ *  Pointer to the matrices generated from the corresponding
+ *  RSS hash key using rte_thash_complete_matrix().
+ * @param len
+ *  Length of the largest data buffer to be hashed.
+ * @param tuple
+ *  Array of the pointers on data to be hashed.
+ *  Data must be in network byte order.
+ * @param val
+ *  Array of uint32_t where to put calculated Toeplitz hash values
+ * @param num
+ *  Number of tuples to hash.
+ */
+__rte_experimental
+static inline void
+rte_thash_gfni_bulk(const uint64_t *mtrx __rte_unused,
+	int len __rte_unused, uint8_t *tuple[] __rte_unused,
+	uint32_t val[], uint32_t num)
+{
+	unsigned int i;
+
+	RTE_LOG(ERR, HASH, "%s is undefined under given arch\n", __func__);
+	for (i = 0; i < num; i++)
+		val[i] = 0;
+}
+
 #endif /* RTE_THASH_GFNI_DEFINED */
 
 #ifdef __cplusplus
diff --git a/lib/hash/rte_thash_x86_gfni.h b/lib/hash/rte_thash_x86_gfni.h
index 1cb7353..7bfb937 100644
--- a/lib/hash/rte_thash_x86_gfni.h
+++ b/lib/hash/rte_thash_x86_gfni.h
@@ -173,6 +173,45 @@  rte_thash_gfni(const uint64_t *m, const uint8_t *tuple, int len)
 	return val;
 }
 
+/**
+ * Bulk implementation for Toeplitz hash.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * @param m
+ *  Pointer to the matrices generated from the corresponding
+ *  RSS hash key using rte_thash_complete_matrix().
+ * @param len
+ *  Length of the largest data buffer to be hashed.
+ * @param tuple
+ *  Array of the pointers on data to be hashed.
+ *  Data must be in network byte order.
+ * @param val
+ *  Array of uint32_t where to put calculated Toeplitz hash values
+ * @param num
+ *  Number of tuples to hash.
+ */
+__rte_experimental
+static inline void
+rte_thash_gfni_bulk(const uint64_t *mtrx, int len, uint8_t *tuple[],
+	uint32_t val[], uint32_t num)
+{
+	uint32_t i;
+	uint32_t val_zero;
+	__m512i xor_acc;
+
+	for (i = 0; i != (num & ~1); i += 2) {
+		xor_acc = __rte_thash_gfni(mtrx, tuple[i], tuple[i + 1], len);
+		__rte_thash_xor_reduce(xor_acc, val + i, val + i + 1);
+	}
+
+	if (num & 1) {
+		xor_acc = __rte_thash_gfni(mtrx, tuple[i], NULL, len);
+		__rte_thash_xor_reduce(xor_acc, val + i, &val_zero);
+	}
+}
+
 #endif /* _GFNI_ */
 
 #ifdef __cplusplus