[4/4] pipeline: add instruction support for moving large structure fields

Message ID 20220812095445.1253138-5-cristian.dumitrescu@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series pipeline: support large structure fields |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/github-robot: build success github build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/intel-Testing success Testing PASS

Commit Message

Cristian Dumitrescu Aug. 12, 2022, 9:54 a.m. UTC
  Add support to the move instruction for operands bigger than 64 bits.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Signed-off-by: Harshad Suresh Narayane <harshad.suresh.narayane@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 71 +++++++++++++++++----
 lib/pipeline/rte_swx_pipeline_internal.h | 78 ++++++++++++++++++++++--
 2 files changed, 131 insertions(+), 18 deletions(-)
  

Patch

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 48b9df0fef..2cac4caa95 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2969,20 +2969,28 @@  instr_mov_translate(struct rte_swx_pipeline *p,
 
 	fdst = struct_field_parse(p, NULL, dst, &dst_struct_id);
 	CHECK(fdst, EINVAL);
-	CHECK(!fdst->var_size && (fdst->n_bits <= 64), EINVAL);
+	CHECK(!fdst->var_size, EINVAL);
 
-	/* MOV, MOV_MH, MOV_HM or MOV_HH. */
+	/* MOV, MOV_MH, MOV_HM, MOV_HH, MOV16, MOVDMA. */
 	fsrc = struct_field_parse(p, action, src, &src_struct_id);
 	if (fsrc) {
-		CHECK(!fsrc->var_size && (fsrc->n_bits <= 64), EINVAL);
+		CHECK(!fsrc->var_size, EINVAL);
+
+		if (fdst->n_bits <= 64 && fsrc->n_bits <= 64) {
+			instr->type = INSTR_MOV;
+			if (dst[0] != 'h' && src[0] == 'h')
+				instr->type = INSTR_MOV_MH;
+			if (dst[0] == 'h' && src[0] != 'h')
+				instr->type = INSTR_MOV_HM;
+			if (dst[0] == 'h' && src[0] == 'h')
+				instr->type = INSTR_MOV_HH;
+		} else {
+			CHECK(fdst->n_bits == fsrc->n_bits, EINVAL);
 
-		instr->type = INSTR_MOV;
-		if (dst[0] != 'h' && src[0] == 'h')
-			instr->type = INSTR_MOV_MH;
-		if (dst[0] == 'h' && src[0] != 'h')
-			instr->type = INSTR_MOV_HM;
-		if (dst[0] == 'h' && src[0] == 'h')
-			instr->type = INSTR_MOV_HH;
+			instr->type = INSTR_MOV_DMA;
+			if (fdst->n_bits == 128)
+				instr->type = INSTR_MOV_128;
+		}
 
 		instr->mov.dst.struct_id = (uint8_t)dst_struct_id;
 		instr->mov.dst.n_bits = fdst->n_bits;
@@ -2994,6 +3002,7 @@  instr_mov_translate(struct rte_swx_pipeline *p,
 	}
 
 	/* MOV_I. */
+	CHECK(fdst->n_bits <= 64, EINVAL);
 	src_val = strtoull(src, &src, 0);
 	CHECK(!src[0], EINVAL);
 
@@ -3056,6 +3065,30 @@  instr_mov_hh_exec(struct rte_swx_pipeline *p)
 	thread_ip_inc(p);
 }
 
+static inline void
+instr_mov_dma_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_mov_dma_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc(p);
+}
+
+static inline void
+instr_mov_128_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_mov_128_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc(p);
+}
+
 static inline void
 instr_mov_i_exec(struct rte_swx_pipeline *p)
 {
@@ -6781,12 +6814,14 @@  instr_pattern_validate_mov_all_search(struct rte_swx_pipeline *p,
 	if (!a || !a->st)
 		return 0;
 
-	/* First instruction: HDR_VALIDATE. Second instruction: MOV_HM. */
+	/* First instruction: HDR_VALIDATE. Second instruction: MOV_HM, MOV_DMA or MOV_128. */
 	if (data[0].invalid ||
 	    (instr[0].type != INSTR_HDR_VALIDATE) ||
 	    (n_instr < 2) ||
 	    data[1].invalid ||
-	    (instr[1].type != INSTR_MOV_HM) ||
+	    (instr[1].type != INSTR_MOV_HM &&
+	     instr[1].type != INSTR_MOV_DMA &&
+	     instr[1].type != INSTR_MOV_128) ||
 	    instr[1].mov.src.struct_id)
 		return 0;
 
@@ -6807,7 +6842,9 @@  instr_pattern_validate_mov_all_search(struct rte_swx_pipeline *p,
 	for (i = 0; i < h->st->n_fields; i++)
 		if (data[1 + i].invalid ||
 		    data[1 + i].n_users ||
-		    (instr[1 + i].type != INSTR_MOV_HM) ||
+		    (instr[1 + i].type != INSTR_MOV_HM &&
+		     instr[1 + i].type != INSTR_MOV_DMA &&
+		     instr[1 + i].type != INSTR_MOV_128) ||
 		    (instr[1 + i].mov.dst.struct_id != h->struct_id) ||
 		    (instr[1 + i].mov.dst.offset != h->st->fields[i].offset / 8) ||
 		    (instr[1 + i].mov.dst.n_bits != h->st->fields[i].n_bits) ||
@@ -7147,6 +7184,8 @@  static instr_exec_t instruction_table[] = {
 	[INSTR_MOV_MH] = instr_mov_mh_exec,
 	[INSTR_MOV_HM] = instr_mov_hm_exec,
 	[INSTR_MOV_HH] = instr_mov_hh_exec,
+	[INSTR_MOV_DMA] = instr_mov_dma_exec,
+	[INSTR_MOV_128] = instr_mov_128_exec,
 	[INSTR_MOV_I] = instr_mov_i_exec,
 
 	[INSTR_DMA_HT] = instr_dma_ht_exec,
@@ -10950,6 +10989,8 @@  instr_type_to_name(struct instruction *instr)
 	case INSTR_MOV_MH: return "INSTR_MOV_MH";
 	case INSTR_MOV_HM: return "INSTR_MOV_HM";
 	case INSTR_MOV_HH: return "INSTR_MOV_HH";
+	case INSTR_MOV_DMA: return "INSTR_MOV_DMA";
+	case INSTR_MOV_128: return "INSTR_MOV_128";
 	case INSTR_MOV_I: return "INSTR_MOV_I";
 
 	case INSTR_DMA_HT: return "INSTR_DMA_HT";
@@ -11938,6 +11979,8 @@  static instruction_export_t export_table[] = {
 	[INSTR_MOV_MH] = instr_mov_export,
 	[INSTR_MOV_HM] = instr_mov_export,
 	[INSTR_MOV_HH] = instr_mov_export,
+	[INSTR_MOV_DMA] = instr_mov_export,
+	[INSTR_MOV_128] = instr_mov_export,
 	[INSTR_MOV_I] = instr_mov_export,
 
 	[INSTR_DMA_HT]  = instr_dma_ht_export,
@@ -12162,6 +12205,8 @@  instr_type_to_func(struct instruction *instr)
 	case INSTR_MOV_MH: return "__instr_mov_mh_exec";
 	case INSTR_MOV_HM: return "__instr_mov_hm_exec";
 	case INSTR_MOV_HH: return "__instr_mov_hh_exec";
+	case INSTR_MOV_DMA: return "__instr_mov_dma_exec";
+	case INSTR_MOV_128: return "__instr_mov_128_exec";
 	case INSTR_MOV_I: return "__instr_mov_i_exec";
 
 	case INSTR_DMA_HT: return "__instr_dma_ht_exec";
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 588cad62b5..6d65b635c6 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -307,11 +307,13 @@  enum instruction_type {
 	 * dst = src
 	 * dst = HMEF, src = HMEFTI
 	 */
-	INSTR_MOV,    /* dst = MEF, src = MEFT */
-	INSTR_MOV_MH, /* dst = MEF, src = H */
-	INSTR_MOV_HM, /* dst = H, src = MEFT */
-	INSTR_MOV_HH, /* dst = H, src = H */
-	INSTR_MOV_I,  /* dst = HMEF, src = I */
+	INSTR_MOV,     /* dst = MEF, src = MEFT; size(dst) <= 64 bits, size(src) <= 64 bits. */
+	INSTR_MOV_MH,  /* dst = MEF, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
+	INSTR_MOV_HM,  /* dst = H, src = MEFT; size(dst) <= 64 bits, size(src) <= 64 bits. */
+	INSTR_MOV_HH,  /* dst = H, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
+	INSTR_MOV_DMA, /* dst = HMEF, src = HMEF; size(dst) = size(src) > 64 bits, NBO format. */
+	INSTR_MOV_128, /* dst = HMEF, src = HMEF; size(dst) = size(src) = 128 bits, NBO format. */
+	INSTR_MOV_I,   /* dst = HMEF, src = I; size(dst) <= 64 bits. */
 
 	/* dma h.header t.field
 	 * memcpy(h.header, t.field, sizeof(h.header))
@@ -2485,6 +2487,72 @@  __instr_mov_hh_exec(struct rte_swx_pipeline *p __rte_unused,
 	MOV_HH(t, ip);
 }
 
+static inline void
+__instr_mov_dma_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	uint8_t *dst_struct = t->structs[ip->mov.dst.struct_id];
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->mov.dst.offset];
+	uint32_t *dst32_ptr;
+	uint16_t *dst16_ptr;
+	uint8_t *dst8_ptr;
+
+	uint8_t *src_struct = t->structs[ip->mov.src.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->mov.src.offset];
+	uint32_t *src32_ptr;
+	uint16_t *src16_ptr;
+	uint8_t *src8_ptr;
+
+	uint32_t n = ip->mov.dst.n_bits >> 3, i;
+
+	TRACE("[Thread %2u] mov (dma) %u bytes\n", p->thread_id, n);
+
+	/* 8-byte transfers. */
+	for (i = 0; i < n >> 3; i++)
+		*dst64_ptr++ = *src64_ptr++;
+
+	/* 4-byte transfers. */
+	n &= 7;
+	dst32_ptr = (uint32_t *)dst64_ptr;
+	src32_ptr = (uint32_t *)src64_ptr;
+
+	for (i = 0; i < n >> 2; i++)
+		*dst32_ptr++ = *src32_ptr++;
+
+	/* 2-byte transfers. */
+	n &= 3;
+	dst16_ptr = (uint16_t *)dst32_ptr;
+	src16_ptr = (uint16_t *)src32_ptr;
+
+	for (i = 0; i < n >> 1; i++)
+		*dst16_ptr++ = *src16_ptr++;
+
+	/* 1-byte transfer. */
+	n &= 1;
+	dst8_ptr = (uint8_t *)dst16_ptr;
+	src8_ptr = (uint8_t *)src16_ptr;
+	if (n)
+		*dst8_ptr = *src8_ptr;
+}
+
+static inline void
+__instr_mov_128_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	uint8_t *dst_struct = t->structs[ip->mov.dst.struct_id];
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->mov.dst.offset];
+
+	uint8_t *src_struct = t->structs[ip->mov.src.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->mov.src.offset];
+
+	TRACE("[Thread %2u] mov (128)\n", p->thread_id);
+
+	dst64_ptr[0] = src64_ptr[0];
+	dst64_ptr[1] = src64_ptr[1];
+}
+
 static inline void
 __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
 		   struct thread *t,