@@ -3218,11 +3218,24 @@ instr_mov_translate(struct rte_swx_pipeline *p,
if (dst[0] == 'h' && src[0] == 'h')
instr->type = INSTR_MOV_HH;
} else {
- CHECK(fdst->n_bits == fsrc->n_bits, EINVAL);
+ /* The big fields (field with size > 64 bits) are always expected in NBO,
+ * regardless of their type (H or MEFT). In case a big field is involved as
+ * either dst or src, the other field must also be NBO.
+ *
+ * In case the dst field is big, the src field must be either a big field
+ * (of the same or different size as dst) or a small H field. Similarly,
+ * in case the src field is big, the dst field must be either a big field
+ * (of the same or different size as src) or a small H field. Any other case
+ * involving a big field as either dst or src is rejected.
+ */
+ CHECK(fdst->n_bits > 64 || dst[0] == 'h', EINVAL);
+ CHECK(fsrc->n_bits > 64 || src[0] == 'h', EINVAL);
instr->type = INSTR_MOV_DMA;
- if (fdst->n_bits == 128)
+ if (fdst->n_bits == 128 && fsrc->n_bits == 128)
instr->type = INSTR_MOV_128;
+ if (fdst->n_bits == 128 && fsrc->n_bits == 32)
+ instr->type = INSTR_MOV_128_32;
}
instr->mov.dst.struct_id = (uint8_t)dst_struct_id;
@@ -3322,6 +3335,18 @@ instr_mov_128_exec(struct rte_swx_pipeline *p)
thread_ip_inc(p);
}
+static inline void
+instr_mov_128_32_exec(struct rte_swx_pipeline *p)
+{
+ struct thread *t = &p->threads[p->thread_id];
+ struct instruction *ip = t->ip;
+
+ __instr_mov_128_32_exec(p, t, ip);
+
+ /* Thread. */
+ thread_ip_inc(p);
+}
+
static inline void
instr_mov_i_exec(struct rte_swx_pipeline *p)
{
@@ -7435,6 +7460,7 @@ static instr_exec_t instruction_table[] = {
[INSTR_MOV_HH] = instr_mov_hh_exec,
[INSTR_MOV_DMA] = instr_mov_dma_exec,
[INSTR_MOV_128] = instr_mov_128_exec,
+ [INSTR_MOV_128_32] = instr_mov_128_32_exec,
[INSTR_MOV_I] = instr_mov_i_exec,
[INSTR_DMA_HT] = instr_dma_ht_exec,
@@ -11757,6 +11783,7 @@ instr_type_to_name(struct instruction *instr)
case INSTR_MOV_HH: return "INSTR_MOV_HH";
case INSTR_MOV_DMA: return "INSTR_MOV_DMA";
case INSTR_MOV_128: return "INSTR_MOV_128";
+ case INSTR_MOV_128_32: return "INSTR_MOV_128_32";
case INSTR_MOV_I: return "INSTR_MOV_I";
case INSTR_DMA_HT: return "INSTR_DMA_HT";
@@ -12797,6 +12824,7 @@ static instruction_export_t export_table[] = {
[INSTR_MOV_HH] = instr_mov_export,
[INSTR_MOV_DMA] = instr_mov_export,
[INSTR_MOV_128] = instr_mov_export,
+ [INSTR_MOV_128_32] = instr_mov_export,
[INSTR_MOV_I] = instr_mov_export,
[INSTR_DMA_HT] = instr_dma_ht_export,
@@ -13025,6 +13053,7 @@ instr_type_to_func(struct instruction *instr)
case INSTR_MOV_HH: return "__instr_mov_hh_exec";
case INSTR_MOV_DMA: return "__instr_mov_dma_exec";
case INSTR_MOV_128: return "__instr_mov_128_exec";
+ case INSTR_MOV_128_32: return "__instr_mov_128_32_exec";
case INSTR_MOV_I: return "__instr_mov_i_exec";
case INSTR_DMA_HT: return "__instr_dma_ht_exec";
@@ -327,8 +327,9 @@ enum instruction_type {
INSTR_MOV_MH, /* dst = MEF, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
INSTR_MOV_HM, /* dst = H, src = MEFT; size(dst) <= 64 bits, size(src) <= 64 bits. */
INSTR_MOV_HH, /* dst = H, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
- INSTR_MOV_DMA, /* dst = HMEF, src = HMEF; size(dst) = size(src) > 64 bits, NBO format. */
- INSTR_MOV_128, /* dst = HMEF, src = HMEF; size(dst) = size(src) = 128 bits, NBO format. */
+ INSTR_MOV_DMA, /* dst and src in NBO format. */
+ INSTR_MOV_128, /* dst and src in NBO format, size(dst) = size(src) = 128 bits. */
+ INSTR_MOV_128_32, /* dst and src in NBO format, size(dst) = 128 bits, size(src) = 32 b. */
INSTR_MOV_I, /* dst = HMEF, src = I; size(dst) <= 64 bits. */
/* dma h.header t.field
@@ -2611,48 +2612,31 @@ __instr_mov_dma_exec(struct rte_swx_pipeline *p __rte_unused,
struct thread *t,
const struct instruction *ip)
{
- uint8_t *dst_struct = t->structs[ip->mov.dst.struct_id];
- uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->mov.dst.offset];
- uint32_t *dst32_ptr;
- uint16_t *dst16_ptr;
- uint8_t *dst8_ptr;
+ uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
+ uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
- uint8_t *src_struct = t->structs[ip->mov.src.struct_id];
- uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->mov.src.offset];
- uint32_t *src32_ptr;
- uint16_t *src16_ptr;
- uint8_t *src8_ptr;
-
- uint32_t n = ip->mov.dst.n_bits >> 3, i;
+ uint32_t n_dst = ip->mov.dst.n_bits >> 3;
+ uint32_t n_src = ip->mov.src.n_bits >> 3;
TRACE("[Thread %2u] mov (dma) %u bytes\n", p->thread_id, n);
- /* 8-byte transfers. */
- for (i = 0; i < n >> 3; i++)
- *dst64_ptr++ = *src64_ptr++;
-
- /* 4-byte transfers. */
- n &= 7;
- dst32_ptr = (uint32_t *)dst64_ptr;
- src32_ptr = (uint32_t *)src64_ptr;
+ /* Both dst and src are in NBO format. */
+ if (n_dst > n_src) {
+ uint32_t n_dst_zero = n_dst - n_src;
- for (i = 0; i < n >> 2; i++)
- *dst32_ptr++ = *src32_ptr++;
+ /* Zero padding the most significant bytes in dst. */
+ memset(dst, 0, n_dst_zero);
+ dst += n_dst_zero;
- /* 2-byte transfers. */
- n &= 3;
- dst16_ptr = (uint16_t *)dst32_ptr;
- src16_ptr = (uint16_t *)src32_ptr;
+ /* Copy src to dst. */
+ memcpy(dst, src, n_src);
+ } else {
+ uint32_t n_src_skipped = n_src - n_dst;
- for (i = 0; i < n >> 1; i++)
- *dst16_ptr++ = *src16_ptr++;
-
- /* 1-byte transfer. */
- n &= 1;
- dst8_ptr = (uint8_t *)dst16_ptr;
- src8_ptr = (uint8_t *)src16_ptr;
- if (n)
- *dst8_ptr = *src8_ptr;
+ /* Copy src to dst. */
+ src += n_src_skipped;
+ memcpy(dst, src, n_dst);
+ }
}
static inline void
@@ -2672,6 +2656,25 @@ __instr_mov_128_exec(struct rte_swx_pipeline *p __rte_unused,
dst64_ptr[1] = src64_ptr[1];
}
+static inline void
+__instr_mov_128_32_exec(struct rte_swx_pipeline *p __rte_unused,
+ struct thread *t,
+ const struct instruction *ip)
+{
+ uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
+ uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
+
+ uint32_t *dst32 = (uint32_t *)dst;
+ uint32_t *src32 = (uint32_t *)src;
+
+ TRACE("[Thread %2u] mov (128 <- 32)\n", p->thread_id);
+
+ dst32[0] = 0;
+ dst32[1] = 0;
+ dst32[2] = 0;
+ dst32[3] = src32[0];
+}
+
static inline void
__instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
struct thread *t,