[2/2] app/testeventdev: resolve issues with crypto producer

Message ID 20221103175347.651579-3-vfialko@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series app/testseventdev: crypto producer fixes |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/iol-intel-Functional success Functional Testing PASS
ci/intel-Testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/github-robot: build success github build: passed

Commit Message

Volodymyr Fialko Nov. 3, 2022, 5:53 p.m. UTC
  Resolve issues with crypto producer in configuration with multiple stages.

1) With symmetric crypto producer and enabled `--fwd_latency` we will
treat rte_mbuf as perf_elt which will lead to rte_mbuf header
corruption. Use rte_mbuf data to store time stamp information.

2) For asymmetric crypto producer check for event type in
`process_crypto_request` will not pass in case of multiple stages, due
to overwrite of event type during event forward. Use producer type to
dispatch.

Signed-off-by: Volodymyr Fialko <vfialko@marvell.com>
---
 app/test-eventdev/test_perf_atq.c    |  74 +++++------------
 app/test-eventdev/test_perf_common.c |  48 ++++++++---
 app/test-eventdev/test_perf_common.h | 116 +++++++++++++++++++++------
 app/test-eventdev/test_perf_queue.c  |  81 ++++++-------------
 4 files changed, 173 insertions(+), 146 deletions(-)
  

Comments

Jerin Jacob Nov. 4, 2022, 8:44 a.m. UTC | #1
On Thu, Nov 3, 2022 at 11:24 PM Volodymyr Fialko <vfialko@marvell.com> wrote:
>
> Resolve issues with crypto producer in configuration with multiple stages.
>
> 1) With symmetric crypto producer and enabled `--fwd_latency` we will
> treat rte_mbuf as perf_elt which will lead to rte_mbuf header
> corruption. Use rte_mbuf data to store time stamp information.
>
> 2) For asymmetric crypto producer check for event type in
> `process_crypto_request` will not pass in case of multiple stages, due
> to overwrite of event type during event forward. Use producer type to
> dispatch.

Please split as two patches, each patch for each issue.

>
> Signed-off-by: Volodymyr Fialko <vfialko@marvell.com>
> ---
>  app/test-eventdev/test_perf_atq.c    |  74 +++++------------
>  app/test-eventdev/test_perf_common.c |  48 ++++++++---
>  app/test-eventdev/test_perf_common.h | 116 +++++++++++++++++++++------
>  app/test-eventdev/test_perf_queue.c  |  81 ++++++-------------
>  4 files changed, 173 insertions(+), 146 deletions(-)
>
> diff --git a/app/test-eventdev/test_perf_atq.c b/app/test-eventdev/test_perf_atq.c
> index 8326f54045..9d30081117 100644
> --- a/app/test-eventdev/test_perf_atq.c
> +++ b/app/test-eventdev/test_perf_atq.c
> @@ -14,16 +14,6 @@ atq_nb_event_queues(struct evt_options *opt)
>                 rte_eth_dev_count_avail() : evt_nr_active_lcores(opt->plcores);
>  }
>
> -static __rte_always_inline void
> -atq_mark_fwd_latency(struct rte_event *const ev)
> -{
> -       if (unlikely(ev->sub_event_type == 0)) {
> -               struct perf_elt *const m = ev->event_ptr;
> -
> -               m->timestamp = rte_get_timer_cycles();
> -       }
> -}
> -
>  static __rte_always_inline void
>  atq_fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list,
>                 const uint8_t nb_stages)
> @@ -37,9 +27,11 @@ atq_fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list,
>  static int
>  perf_atq_worker(void *arg, const int enable_fwd_latency)
>  {
> +       struct perf_elt *pe = NULL;
>         uint16_t enq = 0, deq = 0;
>         struct rte_event ev;
>         PERF_WORKER_INIT;
> +       uint8_t stage;
>
>         while (t->done == false) {
>                 deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
> @@ -49,35 +41,23 @@ perf_atq_worker(void *arg, const int enable_fwd_latency)
>                         continue;
>                 }
>
> -               if (prod_crypto_type &&
> -                   (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
> -                       struct rte_crypto_op *op = ev.event_ptr;
> -
> -                       if (op->status == RTE_CRYPTO_OP_STATUS_SUCCESS) {
> -                               if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
> -                                       if (op->sym->m_dst == NULL)
> -                                               ev.event_ptr = op->sym->m_src;
> -                                       else
> -                                               ev.event_ptr = op->sym->m_dst;
> -                                       rte_crypto_op_free(op);
> -                               }
> -                       } else {
> -                               rte_crypto_op_free(op);
> +               if (prod_crypto_type && (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
> +                       if (perf_handle_crypto_ev(&ev, &pe, enable_fwd_latency))
>                                 continue;
> -                       }
>                 }
>
> -               if (enable_fwd_latency && !prod_timer_type)
> +               stage = ev.sub_event_type % nb_stages;
> +               if (enable_fwd_latency && !prod_timer_type && stage == 0)
>                 /* first stage in pipeline, mark ts to compute fwd latency */
> -                       atq_mark_fwd_latency(&ev);
> +                       perf_mark_fwd_latency(ev.event_ptr);
>
>                 /* last stage in pipeline */
> -               if (unlikely((ev.sub_event_type % nb_stages) == laststage)) {
> +               if (unlikely(stage == laststage)) {
>                         if (enable_fwd_latency)
> -                               cnt = perf_process_last_stage_latency(pool,
> +                               cnt = perf_process_last_stage_latency(pool, prod_crypto_type,
>                                         &ev, w, bufs, sz, cnt);
>                         else
> -                               cnt = perf_process_last_stage(pool, &ev, w,
> +                               cnt = perf_process_last_stage(pool, prod_crypto_type, &ev, w,
>                                          bufs, sz, cnt);
>                 } else {
>                         atq_fwd_event(&ev, sched_type_list, nb_stages);
> @@ -99,7 +79,9 @@ perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
>         /* +1 to avoid prefetch out of array check */
>         struct rte_event ev[BURST_SIZE + 1];
>         uint16_t enq = 0, nb_rx = 0;
> +       struct perf_elt *pe = NULL;
>         PERF_WORKER_INIT;
> +       uint8_t stage;
>         uint16_t i;
>
>         while (t->done == false) {
> @@ -111,40 +93,26 @@ perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
>                 }
>
>                 for (i = 0; i < nb_rx; i++) {
> -                       if (prod_crypto_type &&
> -                           (ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
> -                               struct rte_crypto_op *op = ev[i].event_ptr;
> -
> -                               if (op->status ==
> -                                   RTE_CRYPTO_OP_STATUS_SUCCESS) {
> -                                       if (op->sym->m_dst == NULL)
> -                                               ev[i].event_ptr =
> -                                                       op->sym->m_src;
> -                                       else
> -                                               ev[i].event_ptr =
> -                                                       op->sym->m_dst;
> -                                       rte_crypto_op_free(op);
> -                               } else {
> -                                       rte_crypto_op_free(op);
> +                       if (prod_crypto_type && (ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
> +                               if (perf_handle_crypto_ev(&ev[i], &pe, enable_fwd_latency))
>                                         continue;
> -                               }
>                         }
>
> -                       if (enable_fwd_latency && !prod_timer_type) {
> +                       stage = ev[i].sub_event_type % nb_stages;
> +                       if (enable_fwd_latency && !prod_timer_type && stage == 0) {
>                                 rte_prefetch0(ev[i+1].event_ptr);
>                                 /* first stage in pipeline.
>                                  * mark time stamp to compute fwd latency
>                                  */
> -                               atq_mark_fwd_latency(&ev[i]);
> +                               perf_mark_fwd_latency(ev[i].event_ptr);
>                         }
>                         /* last stage in pipeline */
> -                       if (unlikely((ev[i].sub_event_type % nb_stages)
> -                                               == laststage)) {
> +                       if (unlikely(stage == laststage)) {
>                                 if (enable_fwd_latency)
> -                                       cnt = perf_process_last_stage_latency(
> -                                               pool, &ev[i], w, bufs, sz, cnt);
> +                                       cnt = perf_process_last_stage_latency(pool,
> +                                               prod_crypto_type, &ev[i], w, bufs, sz, cnt);
>                                 else
> -                                       cnt = perf_process_last_stage(pool,
> +                                       cnt = perf_process_last_stage(pool, prod_crypto_type,
>                                                 &ev[i], w, bufs, sz, cnt);
>
>                                 ev[i].op = RTE_EVENT_OP_RELEASE;
> diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c
> index 6aae18fddb..6d04a5265c 100644
> --- a/app/test-eventdev/test_perf_common.c
> +++ b/app/test-eventdev/test_perf_common.c
> @@ -370,16 +370,17 @@ crypto_adapter_enq_op_new(struct prod_data *p)
>         uint64_t alloc_failures = 0;
>         uint32_t flow_counter = 0;
>         struct rte_crypto_op *op;
> +       uint16_t len, offset;
>         struct rte_mbuf *m;
>         uint64_t count = 0;
> -       uint16_t len;
>
>         if (opt->verbose_level > 1)
>                 printf("%s(): lcore %d queue %d cdev_id %u cdev_qp_id %u\n",
>                        __func__, rte_lcore_id(), p->queue_id, p->ca.cdev_id,
>                        p->ca.cdev_qp_id);
>
> -       len = opt->mbuf_sz ? opt->mbuf_sz : RTE_ETHER_MIN_LEN;
> +       offset = sizeof(struct perf_elt);
> +       len = RTE_MAX(RTE_ETHER_MIN_LEN + offset, opt->mbuf_sz);
>
>         while (count < nb_pkts && t->done == false) {
>                 if (opt->crypto_op_type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
> @@ -402,19 +403,24 @@ crypto_adapter_enq_op_new(struct prod_data *p)
>                         rte_pktmbuf_append(m, len);
>                         sym_op = op->sym;
>                         sym_op->m_src = m;
> -                       sym_op->cipher.data.offset = 0;
> -                       sym_op->cipher.data.length = len;
> +                       sym_op->cipher.data.offset = offset;
> +                       sym_op->cipher.data.length = len - offset;
>                         rte_crypto_op_attach_sym_session(
>                                 op, p->ca.crypto_sess[flow_counter++ % nb_flows]);
>                 } else {
>                         struct rte_crypto_asym_op *asym_op;
> -                       uint8_t *result = rte_zmalloc(NULL,
> -                                       modex_test_case.result_len, 0);
> +                       uint8_t *result;
> +
> +                       if (rte_mempool_get(pool, (void **)&result)) {
> +                               alloc_failures++;
> +                               continue;
> +                       }
>
>                         op = rte_crypto_op_alloc(t->ca_op_pool,
>                                          RTE_CRYPTO_OP_TYPE_ASYMMETRIC);
>                         if (unlikely(op == NULL)) {
>                                 alloc_failures++;
> +                               rte_mempool_put(pool, result);
>                                 continue;
>                         }
>
> @@ -451,10 +457,10 @@ crypto_adapter_enq_op_fwd(struct prod_data *p)
>         uint64_t alloc_failures = 0;
>         uint32_t flow_counter = 0;
>         struct rte_crypto_op *op;
> +       uint16_t len, offset;
>         struct rte_event ev;
>         struct rte_mbuf *m;
>         uint64_t count = 0;
> -       uint16_t len;
>
>         if (opt->verbose_level > 1)
>                 printf("%s(): lcore %d port %d queue %d cdev_id %u cdev_qp_id %u\n",
> @@ -466,7 +472,9 @@ crypto_adapter_enq_op_fwd(struct prod_data *p)
>         ev.queue_id = p->queue_id;
>         ev.sched_type = RTE_SCHED_TYPE_ATOMIC;
>         ev.event_type = RTE_EVENT_TYPE_CPU;
> -       len = opt->mbuf_sz ? opt->mbuf_sz : RTE_ETHER_MIN_LEN;
> +
> +       offset = sizeof(struct perf_elt);
> +       len = RTE_MAX(RTE_ETHER_MIN_LEN + offset, opt->mbuf_sz);
>
>         while (count < nb_pkts && t->done == false) {
>                 if (opt->crypto_op_type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
> @@ -489,19 +497,24 @@ crypto_adapter_enq_op_fwd(struct prod_data *p)
>                         rte_pktmbuf_append(m, len);
>                         sym_op = op->sym;
>                         sym_op->m_src = m;
> -                       sym_op->cipher.data.offset = 0;
> -                       sym_op->cipher.data.length = len;
> +                       sym_op->cipher.data.offset = offset;
> +                       sym_op->cipher.data.length = len - offset;
>                         rte_crypto_op_attach_sym_session(
>                                 op, p->ca.crypto_sess[flow_counter++ % nb_flows]);
>                 } else {
>                         struct rte_crypto_asym_op *asym_op;
> -                       uint8_t *result = rte_zmalloc(NULL,
> -                                       modex_test_case.result_len, 0);
> +                       uint8_t *result;
> +
> +                       if (rte_mempool_get(pool, (void **)&result)) {
> +                               alloc_failures++;
> +                               continue;
> +                       }
>
>                         op = rte_crypto_op_alloc(t->ca_op_pool,
>                                          RTE_CRYPTO_OP_TYPE_ASYMMETRIC);
>                         if (unlikely(op == NULL)) {
>                                 alloc_failures++;
> +                               rte_mempool_put(pool, result);
>                                 continue;
>                         }
>
> @@ -1360,6 +1373,7 @@ perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt)
>                 return -ENODEV;
>         }
>
> +
>         t->ca_op_pool = rte_crypto_op_pool_create(
>                 "crypto_op_pool", opt->crypto_op_type, opt->pool_sz,
>                 128, sizeof(union rte_event_crypto_metadata),
> @@ -1510,6 +1524,16 @@ perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
>                                 0, NULL, NULL,
>                                 perf_elt_init, /* obj constructor */
>                                 NULL, opt->socket_id, 0); /* flags */
> +       } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR &&
> +                       opt->crypto_op_type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC)  {
> +               t->pool = rte_mempool_create(test->name, /* mempool name */
> +                               opt->pool_sz, /* number of elements*/
> +                               sizeof(struct perf_elt) + modex_test_case.result_len,
> +                               /* element size*/
> +                               512, /* cache size*/
> +                               0, NULL, NULL,
> +                               NULL, /* obj constructor */
> +                               NULL, opt->socket_id, 0); /* flags */
>         } else {
>                 t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */
>                                 opt->pool_sz, /* number of elements*/
> diff --git a/app/test-eventdev/test_perf_common.h b/app/test-eventdev/test_perf_common.h
> index d06d52cdf8..503b6aa1db 100644
> --- a/app/test-eventdev/test_perf_common.h
> +++ b/app/test-eventdev/test_perf_common.h
> @@ -107,11 +107,50 @@ struct perf_elt {
>                 printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\
>                                 rte_lcore_id(), dev, port)
>
> +static __rte_always_inline void
> +perf_mark_fwd_latency(struct perf_elt *const pe)
> +{
> +       pe->timestamp = rte_get_timer_cycles();
> +}
> +
> +static __rte_always_inline int
> +perf_handle_crypto_ev(struct rte_event *ev, struct perf_elt **pe, int enable_fwd_latency)
> +{
> +       struct rte_crypto_op *op = ev->event_ptr;
> +       struct rte_mbuf *m;
> +
> +
> +       if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
> +               rte_crypto_op_free(op);
> +               return op->status;
> +       }
> +
> +       /* Forward latency not enabled - perf data will not be accessed */
> +       if (!enable_fwd_latency)
> +               return 0;
> +
> +       /* Get pointer to perf data */
> +       if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
> +               if (op->sym->m_dst == NULL)
> +                       m = op->sym->m_src;
> +               else
> +                       m = op->sym->m_dst;
> +               *pe = rte_pktmbuf_mtod(m, struct perf_elt *);
> +       } else {
> +               *pe = RTE_PTR_ADD(op->asym->modex.result.data, op->asym->modex.result.length);
> +       }
> +
> +       return 0;
> +}
> +
> +
>  static __rte_always_inline int
> -perf_process_last_stage(struct rte_mempool *const pool,
> +perf_process_last_stage(struct rte_mempool *const pool, uint8_t prod_crypto_type,
>                 struct rte_event *const ev, struct worker_data *const w,
>                 void *bufs[], int const buf_sz, uint8_t count)
>  {
> +       void *to_free_in_bulk;
> +
>         /* release fence here ensures event_prt is
>          * stored before updating the number of
>          * processed packets for worker lcores
> @@ -119,30 +158,42 @@ perf_process_last_stage(struct rte_mempool *const pool,
>         rte_atomic_thread_fence(__ATOMIC_RELEASE);
>         w->processed_pkts++;
>
> -       if (ev->event_type == RTE_EVENT_TYPE_CRYPTODEV &&
> -                       ((struct rte_crypto_op *)ev->event_ptr)->type ==
> -                               RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
> +       if (prod_crypto_type) {
>                 struct rte_crypto_op *op = ev->event_ptr;
> +               struct rte_mbuf *m;
> +
> +               if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
> +                       if (op->sym->m_dst == NULL)
> +                               m = op->sym->m_src;
> +                       else
> +                               m = op->sym->m_dst;
>
> -               rte_free(op->asym->modex.result.data);
> +                       to_free_in_bulk = m;
> +               } else {
> +                       to_free_in_bulk = op->asym->modex.result.data;
> +               }
>                 rte_crypto_op_free(op);
>         } else {
> -               bufs[count++] = ev->event_ptr;
> -               if (unlikely(count == buf_sz)) {
> -                       count = 0;
> -                       rte_mempool_put_bulk(pool, bufs, buf_sz);
> -               }
> +               to_free_in_bulk = ev->event_ptr;
>         }
> +
> +       bufs[count++] = to_free_in_bulk;
> +       if (unlikely(count == buf_sz)) {
> +               count = 0;
> +               rte_mempool_put_bulk(pool, bufs, buf_sz);
> +       }
> +
>         return count;
>  }
>
>  static __rte_always_inline uint8_t
> -perf_process_last_stage_latency(struct rte_mempool *const pool,
> +perf_process_last_stage_latency(struct rte_mempool *const pool, uint8_t prod_crypto_type,
>                 struct rte_event *const ev, struct worker_data *const w,
>                 void *bufs[], int const buf_sz, uint8_t count)
>  {
>         uint64_t latency;
> -       struct perf_elt *const m = ev->event_ptr;
> +       struct perf_elt *pe;
> +       void *to_free_in_bulk;
>
>         /* release fence here ensures event_prt is
>          * stored before updating the number of
> @@ -151,23 +202,38 @@ perf_process_last_stage_latency(struct rte_mempool *const pool,
>         rte_atomic_thread_fence(__ATOMIC_RELEASE);
>         w->processed_pkts++;
>
> -       if (ev->event_type == RTE_EVENT_TYPE_CRYPTODEV &&
> -                       ((struct rte_crypto_op *)m)->type ==
> -                               RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
> -               rte_free(((struct rte_crypto_op *)m)->asym->modex.result.data);
> -               rte_crypto_op_free((struct rte_crypto_op *)m);
> -       } else {
> -               bufs[count++] = ev->event_ptr;
> -               if (unlikely(count == buf_sz)) {
> -                       count = 0;
> -                       latency = rte_get_timer_cycles() - m->timestamp;
> -                       rte_mempool_put_bulk(pool, bufs, buf_sz);
> +       if (prod_crypto_type) {
> +               struct rte_crypto_op *op = ev->event_ptr;
> +               struct rte_mbuf *m;
> +
> +               if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
> +                       if (op->sym->m_dst == NULL)
> +                               m = op->sym->m_src;
> +                       else
> +                               m = op->sym->m_dst;
> +
> +                       to_free_in_bulk = m;
> +                       pe = rte_pktmbuf_mtod(m, struct perf_elt *);
>                 } else {
> -                       latency = rte_get_timer_cycles() - m->timestamp;
> +                       pe = RTE_PTR_ADD(op->asym->modex.result.data,
> +                                        op->asym->modex.result.length);
> +                       to_free_in_bulk = op->asym->modex.result.data;
>                 }
> +               rte_crypto_op_free(op);
> +       } else {
> +               pe = ev->event_ptr;
> +               to_free_in_bulk = pe;
> +       }
>
> -               w->latency += latency;
> +       latency = rte_get_timer_cycles() - pe->timestamp;
> +       w->latency += latency;
> +
> +       bufs[count++] = to_free_in_bulk;
> +       if (unlikely(count == buf_sz)) {
> +               count = 0;
> +               rte_mempool_put_bulk(pool, bufs, buf_sz);
>         }
> +
>         return count;
>  }
>
> diff --git a/app/test-eventdev/test_perf_queue.c b/app/test-eventdev/test_perf_queue.c
> index 814ab9f9bd..69ef0ebbac 100644
> --- a/app/test-eventdev/test_perf_queue.c
> +++ b/app/test-eventdev/test_perf_queue.c
> @@ -15,17 +15,6 @@ perf_queue_nb_event_queues(struct evt_options *opt)
>         return nb_prod * opt->nb_stages;
>  }
>
> -static __rte_always_inline void
> -mark_fwd_latency(struct rte_event *const ev,
> -               const uint8_t nb_stages)
> -{
> -       if (unlikely((ev->queue_id % nb_stages) == 0)) {
> -               struct perf_elt *const m = ev->event_ptr;
> -
> -               m->timestamp = rte_get_timer_cycles();
> -       }
> -}
> -
>  static __rte_always_inline void
>  fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list,
>                 const uint8_t nb_stages)
> @@ -39,9 +28,12 @@ fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list,
>  static int
>  perf_queue_worker(void *arg, const int enable_fwd_latency)
>  {
> +       struct perf_elt *pe = NULL;
>         uint16_t enq = 0, deq = 0;
>         struct rte_event ev;
>         PERF_WORKER_INIT;
> +       uint8_t stage;
> +
>
>         while (t->done == false) {
>                 deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
> @@ -51,41 +43,30 @@ perf_queue_worker(void *arg, const int enable_fwd_latency)
>                         continue;
>                 }
>
> -               if (prod_crypto_type &&
> -                   (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
> -                       struct rte_crypto_op *op = ev.event_ptr;
> -
> -                       if (op->status == RTE_CRYPTO_OP_STATUS_SUCCESS) {
> -                               if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
> -                                       if (op->sym->m_dst == NULL)
> -                                               ev.event_ptr = op->sym->m_src;
> -                                       else
> -                                               ev.event_ptr = op->sym->m_dst;
> -                                       rte_crypto_op_free(op);
> -                               }
> -                       } else {
> -                               rte_crypto_op_free(op);
> +               if (prod_crypto_type && (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
> +                       if (perf_handle_crypto_ev(&ev, &pe, enable_fwd_latency))
>                                 continue;
> -                       }
> +               } else {
> +                       pe = ev.event_ptr;
>                 }
>
> -               if (enable_fwd_latency && !prod_timer_type)
> +               stage = ev.queue_id % nb_stages;
> +               if (enable_fwd_latency && !prod_timer_type && stage == 0)
>                 /* first q in pipeline, mark timestamp to compute fwd latency */
> -                       mark_fwd_latency(&ev, nb_stages);
> +                       perf_mark_fwd_latency(pe);
>
>                 /* last stage in pipeline */
> -               if (unlikely((ev.queue_id % nb_stages) == laststage)) {
> +               if (unlikely(stage == laststage)) {
>                         if (enable_fwd_latency)
> -                               cnt = perf_process_last_stage_latency(pool,
> +                               cnt = perf_process_last_stage_latency(pool, prod_crypto_type,
>                                         &ev, w, bufs, sz, cnt);
>                         else
> -                               cnt = perf_process_last_stage(pool,
> +                               cnt = perf_process_last_stage(pool, prod_crypto_type,
>                                         &ev, w, bufs, sz, cnt);
>                 } else {
>                         fwd_event(&ev, sched_type_list, nb_stages);
>                         do {
> -                               enq = rte_event_enqueue_burst(dev, port, &ev,
> -                                                             1);
> +                               enq = rte_event_enqueue_burst(dev, port, &ev, 1);
>                         } while (!enq && !t->done);
>                 }
>         }
> @@ -101,7 +82,9 @@ perf_queue_worker_burst(void *arg, const int enable_fwd_latency)
>         /* +1 to avoid prefetch out of array check */
>         struct rte_event ev[BURST_SIZE + 1];
>         uint16_t enq = 0, nb_rx = 0;
> +       struct perf_elt *pe = NULL;
>         PERF_WORKER_INIT;
> +       uint8_t stage;
>         uint16_t i;
>
>         while (t->done == false) {
> @@ -113,40 +96,26 @@ perf_queue_worker_burst(void *arg, const int enable_fwd_latency)
>                 }
>
>                 for (i = 0; i < nb_rx; i++) {
> -                       if (prod_crypto_type &&
> -                           (ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
> -                               struct rte_crypto_op *op = ev[i].event_ptr;
> -
> -                               if (op->status ==
> -                                   RTE_CRYPTO_OP_STATUS_SUCCESS) {
> -                                       if (op->sym->m_dst == NULL)
> -                                               ev[i].event_ptr =
> -                                                       op->sym->m_src;
> -                                       else
> -                                               ev[i].event_ptr =
> -                                                       op->sym->m_dst;
> -                                       rte_crypto_op_free(op);
> -                               } else {
> -                                       rte_crypto_op_free(op);
> +                       if (prod_crypto_type && (ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
> +                               if (perf_handle_crypto_ev(&ev[i], &pe, enable_fwd_latency))
>                                         continue;
> -                               }
>                         }
>
> -                       if (enable_fwd_latency && !prod_timer_type) {
> +                       stage = ev[i].queue_id % nb_stages;
> +                       if (enable_fwd_latency && !prod_timer_type && stage == 0) {
>                                 rte_prefetch0(ev[i+1].event_ptr);
>                                 /* first queue in pipeline.
>                                  * mark time stamp to compute fwd latency
>                                  */
> -                               mark_fwd_latency(&ev[i], nb_stages);
> +                               perf_mark_fwd_latency(ev[i].event_ptr);
>                         }
>                         /* last stage in pipeline */
> -                       if (unlikely((ev[i].queue_id % nb_stages) ==
> -                                                laststage)) {
> +                       if (unlikely(stage == laststage)) {
>                                 if (enable_fwd_latency)
> -                                       cnt = perf_process_last_stage_latency(
> -                                               pool, &ev[i], w, bufs, sz, cnt);
> +                                       cnt = perf_process_last_stage_latency(pool,
> +                                               prod_crypto_type, &ev[i], w, bufs, sz, cnt);
>                                 else
> -                                       cnt = perf_process_last_stage(pool,
> +                                       cnt = perf_process_last_stage(pool, prod_crypto_type,
>                                                 &ev[i], w, bufs, sz, cnt);
>
>                                 ev[i].op = RTE_EVENT_OP_RELEASE;
> --
> 2.25.1
>
  

Patch

diff --git a/app/test-eventdev/test_perf_atq.c b/app/test-eventdev/test_perf_atq.c
index 8326f54045..9d30081117 100644
--- a/app/test-eventdev/test_perf_atq.c
+++ b/app/test-eventdev/test_perf_atq.c
@@ -14,16 +14,6 @@  atq_nb_event_queues(struct evt_options *opt)
 		rte_eth_dev_count_avail() : evt_nr_active_lcores(opt->plcores);
 }
 
-static __rte_always_inline void
-atq_mark_fwd_latency(struct rte_event *const ev)
-{
-	if (unlikely(ev->sub_event_type == 0)) {
-		struct perf_elt *const m = ev->event_ptr;
-
-		m->timestamp = rte_get_timer_cycles();
-	}
-}
-
 static __rte_always_inline void
 atq_fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list,
 		const uint8_t nb_stages)
@@ -37,9 +27,11 @@  atq_fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list,
 static int
 perf_atq_worker(void *arg, const int enable_fwd_latency)
 {
+	struct perf_elt *pe = NULL;
 	uint16_t enq = 0, deq = 0;
 	struct rte_event ev;
 	PERF_WORKER_INIT;
+	uint8_t stage;
 
 	while (t->done == false) {
 		deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -49,35 +41,23 @@  perf_atq_worker(void *arg, const int enable_fwd_latency)
 			continue;
 		}
 
-		if (prod_crypto_type &&
-		    (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
-			struct rte_crypto_op *op = ev.event_ptr;
-
-			if (op->status == RTE_CRYPTO_OP_STATUS_SUCCESS) {
-				if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
-					if (op->sym->m_dst == NULL)
-						ev.event_ptr = op->sym->m_src;
-					else
-						ev.event_ptr = op->sym->m_dst;
-					rte_crypto_op_free(op);
-				}
-			} else {
-				rte_crypto_op_free(op);
+		if (prod_crypto_type && (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
+			if (perf_handle_crypto_ev(&ev, &pe, enable_fwd_latency))
 				continue;
-			}
 		}
 
-		if (enable_fwd_latency && !prod_timer_type)
+		stage = ev.sub_event_type % nb_stages;
+		if (enable_fwd_latency && !prod_timer_type && stage == 0)
 		/* first stage in pipeline, mark ts to compute fwd latency */
-			atq_mark_fwd_latency(&ev);
+			perf_mark_fwd_latency(ev.event_ptr);
 
 		/* last stage in pipeline */
-		if (unlikely((ev.sub_event_type % nb_stages) == laststage)) {
+		if (unlikely(stage == laststage)) {
 			if (enable_fwd_latency)
-				cnt = perf_process_last_stage_latency(pool,
+				cnt = perf_process_last_stage_latency(pool, prod_crypto_type,
 					&ev, w, bufs, sz, cnt);
 			else
-				cnt = perf_process_last_stage(pool, &ev, w,
+				cnt = perf_process_last_stage(pool, prod_crypto_type, &ev, w,
 					 bufs, sz, cnt);
 		} else {
 			atq_fwd_event(&ev, sched_type_list, nb_stages);
@@ -99,7 +79,9 @@  perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
 	/* +1 to avoid prefetch out of array check */
 	struct rte_event ev[BURST_SIZE + 1];
 	uint16_t enq = 0, nb_rx = 0;
+	struct perf_elt *pe = NULL;
 	PERF_WORKER_INIT;
+	uint8_t stage;
 	uint16_t i;
 
 	while (t->done == false) {
@@ -111,40 +93,26 @@  perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
 		}
 
 		for (i = 0; i < nb_rx; i++) {
-			if (prod_crypto_type &&
-			    (ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
-				struct rte_crypto_op *op = ev[i].event_ptr;
-
-				if (op->status ==
-				    RTE_CRYPTO_OP_STATUS_SUCCESS) {
-					if (op->sym->m_dst == NULL)
-						ev[i].event_ptr =
-							op->sym->m_src;
-					else
-						ev[i].event_ptr =
-							op->sym->m_dst;
-					rte_crypto_op_free(op);
-				} else {
-					rte_crypto_op_free(op);
+			if (prod_crypto_type && (ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
+				if (perf_handle_crypto_ev(&ev[i], &pe, enable_fwd_latency))
 					continue;
-				}
 			}
 
-			if (enable_fwd_latency && !prod_timer_type) {
+			stage = ev[i].sub_event_type % nb_stages;
+			if (enable_fwd_latency && !prod_timer_type && stage == 0) {
 				rte_prefetch0(ev[i+1].event_ptr);
 				/* first stage in pipeline.
 				 * mark time stamp to compute fwd latency
 				 */
-				atq_mark_fwd_latency(&ev[i]);
+				perf_mark_fwd_latency(ev[i].event_ptr);
 			}
 			/* last stage in pipeline */
-			if (unlikely((ev[i].sub_event_type % nb_stages)
-						== laststage)) {
+			if (unlikely(stage == laststage)) {
 				if (enable_fwd_latency)
-					cnt = perf_process_last_stage_latency(
-						pool, &ev[i], w, bufs, sz, cnt);
+					cnt = perf_process_last_stage_latency(pool,
+						prod_crypto_type, &ev[i], w, bufs, sz, cnt);
 				else
-					cnt = perf_process_last_stage(pool,
+					cnt = perf_process_last_stage(pool, prod_crypto_type,
 						&ev[i], w, bufs, sz, cnt);
 
 				ev[i].op = RTE_EVENT_OP_RELEASE;
diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c
index 6aae18fddb..6d04a5265c 100644
--- a/app/test-eventdev/test_perf_common.c
+++ b/app/test-eventdev/test_perf_common.c
@@ -370,16 +370,17 @@  crypto_adapter_enq_op_new(struct prod_data *p)
 	uint64_t alloc_failures = 0;
 	uint32_t flow_counter = 0;
 	struct rte_crypto_op *op;
+	uint16_t len, offset;
 	struct rte_mbuf *m;
 	uint64_t count = 0;
-	uint16_t len;
 
 	if (opt->verbose_level > 1)
 		printf("%s(): lcore %d queue %d cdev_id %u cdev_qp_id %u\n",
 		       __func__, rte_lcore_id(), p->queue_id, p->ca.cdev_id,
 		       p->ca.cdev_qp_id);
 
-	len = opt->mbuf_sz ? opt->mbuf_sz : RTE_ETHER_MIN_LEN;
+	offset = sizeof(struct perf_elt);
+	len = RTE_MAX(RTE_ETHER_MIN_LEN + offset, opt->mbuf_sz);
 
 	while (count < nb_pkts && t->done == false) {
 		if (opt->crypto_op_type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
@@ -402,19 +403,24 @@  crypto_adapter_enq_op_new(struct prod_data *p)
 			rte_pktmbuf_append(m, len);
 			sym_op = op->sym;
 			sym_op->m_src = m;
-			sym_op->cipher.data.offset = 0;
-			sym_op->cipher.data.length = len;
+			sym_op->cipher.data.offset = offset;
+			sym_op->cipher.data.length = len - offset;
 			rte_crypto_op_attach_sym_session(
 				op, p->ca.crypto_sess[flow_counter++ % nb_flows]);
 		} else {
 			struct rte_crypto_asym_op *asym_op;
-			uint8_t *result = rte_zmalloc(NULL,
-					modex_test_case.result_len, 0);
+			uint8_t *result;
+
+			if (rte_mempool_get(pool, (void **)&result)) {
+				alloc_failures++;
+				continue;
+			}
 
 			op = rte_crypto_op_alloc(t->ca_op_pool,
 					 RTE_CRYPTO_OP_TYPE_ASYMMETRIC);
 			if (unlikely(op == NULL)) {
 				alloc_failures++;
+				rte_mempool_put(pool, result);
 				continue;
 			}
 
@@ -451,10 +457,10 @@  crypto_adapter_enq_op_fwd(struct prod_data *p)
 	uint64_t alloc_failures = 0;
 	uint32_t flow_counter = 0;
 	struct rte_crypto_op *op;
+	uint16_t len, offset;
 	struct rte_event ev;
 	struct rte_mbuf *m;
 	uint64_t count = 0;
-	uint16_t len;
 
 	if (opt->verbose_level > 1)
 		printf("%s(): lcore %d port %d queue %d cdev_id %u cdev_qp_id %u\n",
@@ -466,7 +472,9 @@  crypto_adapter_enq_op_fwd(struct prod_data *p)
 	ev.queue_id = p->queue_id;
 	ev.sched_type = RTE_SCHED_TYPE_ATOMIC;
 	ev.event_type = RTE_EVENT_TYPE_CPU;
-	len = opt->mbuf_sz ? opt->mbuf_sz : RTE_ETHER_MIN_LEN;
+
+	offset = sizeof(struct perf_elt);
+	len = RTE_MAX(RTE_ETHER_MIN_LEN + offset, opt->mbuf_sz);
 
 	while (count < nb_pkts && t->done == false) {
 		if (opt->crypto_op_type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
@@ -489,19 +497,24 @@  crypto_adapter_enq_op_fwd(struct prod_data *p)
 			rte_pktmbuf_append(m, len);
 			sym_op = op->sym;
 			sym_op->m_src = m;
-			sym_op->cipher.data.offset = 0;
-			sym_op->cipher.data.length = len;
+			sym_op->cipher.data.offset = offset;
+			sym_op->cipher.data.length = len - offset;
 			rte_crypto_op_attach_sym_session(
 				op, p->ca.crypto_sess[flow_counter++ % nb_flows]);
 		} else {
 			struct rte_crypto_asym_op *asym_op;
-			uint8_t *result = rte_zmalloc(NULL,
-					modex_test_case.result_len, 0);
+			uint8_t *result;
+
+			if (rte_mempool_get(pool, (void **)&result)) {
+				alloc_failures++;
+				continue;
+			}
 
 			op = rte_crypto_op_alloc(t->ca_op_pool,
 					 RTE_CRYPTO_OP_TYPE_ASYMMETRIC);
 			if (unlikely(op == NULL)) {
 				alloc_failures++;
+				rte_mempool_put(pool, result);
 				continue;
 			}
 
@@ -1360,6 +1373,7 @@  perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt)
 		return -ENODEV;
 	}
 
+
 	t->ca_op_pool = rte_crypto_op_pool_create(
 		"crypto_op_pool", opt->crypto_op_type, opt->pool_sz,
 		128, sizeof(union rte_event_crypto_metadata),
@@ -1510,6 +1524,16 @@  perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
 				0, NULL, NULL,
 				perf_elt_init, /* obj constructor */
 				NULL, opt->socket_id, 0); /* flags */
+	} else if (opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR &&
+			opt->crypto_op_type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC)  {
+		t->pool = rte_mempool_create(test->name, /* mempool name */
+				opt->pool_sz, /* number of elements*/
+				sizeof(struct perf_elt) + modex_test_case.result_len,
+				/* element size*/
+				512, /* cache size*/
+				0, NULL, NULL,
+				NULL, /* obj constructor */
+				NULL, opt->socket_id, 0); /* flags */
 	} else {
 		t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */
 				opt->pool_sz, /* number of elements*/
diff --git a/app/test-eventdev/test_perf_common.h b/app/test-eventdev/test_perf_common.h
index d06d52cdf8..503b6aa1db 100644
--- a/app/test-eventdev/test_perf_common.h
+++ b/app/test-eventdev/test_perf_common.h
@@ -107,11 +107,50 @@  struct perf_elt {
 		printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\
 				rte_lcore_id(), dev, port)
 
+static __rte_always_inline void
+perf_mark_fwd_latency(struct perf_elt *const pe)
+{
+	pe->timestamp = rte_get_timer_cycles();
+}
+
+static __rte_always_inline int
+perf_handle_crypto_ev(struct rte_event *ev, struct perf_elt **pe, int enable_fwd_latency)
+{
+	struct rte_crypto_op *op = ev->event_ptr;
+	struct rte_mbuf *m;
+
+
+	if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
+		rte_crypto_op_free(op);
+		return op->status;
+	}
+
+	/* Forward latency not enabled - perf data will not be accessed */
+	if (!enable_fwd_latency)
+		return 0;
+
+	/* Get pointer to perf data */
+	if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
+		if (op->sym->m_dst == NULL)
+			m = op->sym->m_src;
+		else
+			m = op->sym->m_dst;
+		*pe = rte_pktmbuf_mtod(m, struct perf_elt *);
+	} else {
+		*pe = RTE_PTR_ADD(op->asym->modex.result.data, op->asym->modex.result.length);
+	}
+
+	return 0;
+}
+
+
 static __rte_always_inline int
-perf_process_last_stage(struct rte_mempool *const pool,
+perf_process_last_stage(struct rte_mempool *const pool, uint8_t prod_crypto_type,
 		struct rte_event *const ev, struct worker_data *const w,
 		void *bufs[], int const buf_sz, uint8_t count)
 {
+	void *to_free_in_bulk;
+
 	/* release fence here ensures event_prt is
 	 * stored before updating the number of
 	 * processed packets for worker lcores
@@ -119,30 +158,42 @@  perf_process_last_stage(struct rte_mempool *const pool,
 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
 	w->processed_pkts++;
 
-	if (ev->event_type == RTE_EVENT_TYPE_CRYPTODEV &&
-			((struct rte_crypto_op *)ev->event_ptr)->type ==
-				RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
+	if (prod_crypto_type) {
 		struct rte_crypto_op *op = ev->event_ptr;
+		struct rte_mbuf *m;
+
+		if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
+			if (op->sym->m_dst == NULL)
+				m = op->sym->m_src;
+			else
+				m = op->sym->m_dst;
 
-		rte_free(op->asym->modex.result.data);
+			to_free_in_bulk = m;
+		} else {
+			to_free_in_bulk = op->asym->modex.result.data;
+		}
 		rte_crypto_op_free(op);
 	} else {
-		bufs[count++] = ev->event_ptr;
-		if (unlikely(count == buf_sz)) {
-			count = 0;
-			rte_mempool_put_bulk(pool, bufs, buf_sz);
-		}
+		to_free_in_bulk = ev->event_ptr;
 	}
+
+	bufs[count++] = to_free_in_bulk;
+	if (unlikely(count == buf_sz)) {
+		count = 0;
+		rte_mempool_put_bulk(pool, bufs, buf_sz);
+	}
+
 	return count;
 }
 
 static __rte_always_inline uint8_t
-perf_process_last_stage_latency(struct rte_mempool *const pool,
+perf_process_last_stage_latency(struct rte_mempool *const pool, uint8_t prod_crypto_type,
 		struct rte_event *const ev, struct worker_data *const w,
 		void *bufs[], int const buf_sz, uint8_t count)
 {
 	uint64_t latency;
-	struct perf_elt *const m = ev->event_ptr;
+	struct perf_elt *pe;
+	void *to_free_in_bulk;
 
 	/* release fence here ensures event_prt is
 	 * stored before updating the number of
@@ -151,23 +202,38 @@  perf_process_last_stage_latency(struct rte_mempool *const pool,
 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
 	w->processed_pkts++;
 
-	if (ev->event_type == RTE_EVENT_TYPE_CRYPTODEV &&
-			((struct rte_crypto_op *)m)->type ==
-				RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
-		rte_free(((struct rte_crypto_op *)m)->asym->modex.result.data);
-		rte_crypto_op_free((struct rte_crypto_op *)m);
-	} else {
-		bufs[count++] = ev->event_ptr;
-		if (unlikely(count == buf_sz)) {
-			count = 0;
-			latency = rte_get_timer_cycles() - m->timestamp;
-			rte_mempool_put_bulk(pool, bufs, buf_sz);
+	if (prod_crypto_type) {
+		struct rte_crypto_op *op = ev->event_ptr;
+		struct rte_mbuf *m;
+
+		if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
+			if (op->sym->m_dst == NULL)
+				m = op->sym->m_src;
+			else
+				m = op->sym->m_dst;
+
+			to_free_in_bulk = m;
+			pe = rte_pktmbuf_mtod(m, struct perf_elt *);
 		} else {
-			latency = rte_get_timer_cycles() - m->timestamp;
+			pe = RTE_PTR_ADD(op->asym->modex.result.data,
+					 op->asym->modex.result.length);
+			to_free_in_bulk = op->asym->modex.result.data;
 		}
+		rte_crypto_op_free(op);
+	} else {
+		pe = ev->event_ptr;
+		to_free_in_bulk = pe;
+	}
 
-		w->latency += latency;
+	latency = rte_get_timer_cycles() - pe->timestamp;
+	w->latency += latency;
+
+	bufs[count++] = to_free_in_bulk;
+	if (unlikely(count == buf_sz)) {
+		count = 0;
+		rte_mempool_put_bulk(pool, bufs, buf_sz);
 	}
+
 	return count;
 }
 
diff --git a/app/test-eventdev/test_perf_queue.c b/app/test-eventdev/test_perf_queue.c
index 814ab9f9bd..69ef0ebbac 100644
--- a/app/test-eventdev/test_perf_queue.c
+++ b/app/test-eventdev/test_perf_queue.c
@@ -15,17 +15,6 @@  perf_queue_nb_event_queues(struct evt_options *opt)
 	return nb_prod * opt->nb_stages;
 }
 
-static __rte_always_inline void
-mark_fwd_latency(struct rte_event *const ev,
-		const uint8_t nb_stages)
-{
-	if (unlikely((ev->queue_id % nb_stages) == 0)) {
-		struct perf_elt *const m = ev->event_ptr;
-
-		m->timestamp = rte_get_timer_cycles();
-	}
-}
-
 static __rte_always_inline void
 fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list,
 		const uint8_t nb_stages)
@@ -39,9 +28,12 @@  fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list,
 static int
 perf_queue_worker(void *arg, const int enable_fwd_latency)
 {
+	struct perf_elt *pe = NULL;
 	uint16_t enq = 0, deq = 0;
 	struct rte_event ev;
 	PERF_WORKER_INIT;
+	uint8_t stage;
+
 
 	while (t->done == false) {
 		deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -51,41 +43,30 @@  perf_queue_worker(void *arg, const int enable_fwd_latency)
 			continue;
 		}
 
-		if (prod_crypto_type &&
-		    (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
-			struct rte_crypto_op *op = ev.event_ptr;
-
-			if (op->status == RTE_CRYPTO_OP_STATUS_SUCCESS) {
-				if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
-					if (op->sym->m_dst == NULL)
-						ev.event_ptr = op->sym->m_src;
-					else
-						ev.event_ptr = op->sym->m_dst;
-					rte_crypto_op_free(op);
-				}
-			} else {
-				rte_crypto_op_free(op);
+		if (prod_crypto_type && (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
+			if (perf_handle_crypto_ev(&ev, &pe, enable_fwd_latency))
 				continue;
-			}
+		} else {
+			pe = ev.event_ptr;
 		}
 
-		if (enable_fwd_latency && !prod_timer_type)
+		stage = ev.queue_id % nb_stages;
+		if (enable_fwd_latency && !prod_timer_type && stage == 0)
 		/* first q in pipeline, mark timestamp to compute fwd latency */
-			mark_fwd_latency(&ev, nb_stages);
+			perf_mark_fwd_latency(pe);
 
 		/* last stage in pipeline */
-		if (unlikely((ev.queue_id % nb_stages) == laststage)) {
+		if (unlikely(stage == laststage)) {
 			if (enable_fwd_latency)
-				cnt = perf_process_last_stage_latency(pool,
+				cnt = perf_process_last_stage_latency(pool, prod_crypto_type,
 					&ev, w, bufs, sz, cnt);
 			else
-				cnt = perf_process_last_stage(pool,
+				cnt = perf_process_last_stage(pool, prod_crypto_type,
 					&ev, w, bufs, sz, cnt);
 		} else {
 			fwd_event(&ev, sched_type_list, nb_stages);
 			do {
-				enq = rte_event_enqueue_burst(dev, port, &ev,
-							      1);
+				enq = rte_event_enqueue_burst(dev, port, &ev, 1);
 			} while (!enq && !t->done);
 		}
 	}
@@ -101,7 +82,9 @@  perf_queue_worker_burst(void *arg, const int enable_fwd_latency)
 	/* +1 to avoid prefetch out of array check */
 	struct rte_event ev[BURST_SIZE + 1];
 	uint16_t enq = 0, nb_rx = 0;
+	struct perf_elt *pe = NULL;
 	PERF_WORKER_INIT;
+	uint8_t stage;
 	uint16_t i;
 
 	while (t->done == false) {
@@ -113,40 +96,26 @@  perf_queue_worker_burst(void *arg, const int enable_fwd_latency)
 		}
 
 		for (i = 0; i < nb_rx; i++) {
-			if (prod_crypto_type &&
-			    (ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
-				struct rte_crypto_op *op = ev[i].event_ptr;
-
-				if (op->status ==
-				    RTE_CRYPTO_OP_STATUS_SUCCESS) {
-					if (op->sym->m_dst == NULL)
-						ev[i].event_ptr =
-							op->sym->m_src;
-					else
-						ev[i].event_ptr =
-							op->sym->m_dst;
-					rte_crypto_op_free(op);
-				} else {
-					rte_crypto_op_free(op);
+			if (prod_crypto_type && (ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
+				if (perf_handle_crypto_ev(&ev[i], &pe, enable_fwd_latency))
 					continue;
-				}
 			}
 
-			if (enable_fwd_latency && !prod_timer_type) {
+			stage = ev[i].queue_id % nb_stages;
+			if (enable_fwd_latency && !prod_timer_type && stage == 0) {
 				rte_prefetch0(ev[i+1].event_ptr);
 				/* first queue in pipeline.
 				 * mark time stamp to compute fwd latency
 				 */
-				mark_fwd_latency(&ev[i], nb_stages);
+				perf_mark_fwd_latency(ev[i].event_ptr);
 			}
 			/* last stage in pipeline */
-			if (unlikely((ev[i].queue_id % nb_stages) ==
-						 laststage)) {
+			if (unlikely(stage == laststage)) {
 				if (enable_fwd_latency)
-					cnt = perf_process_last_stage_latency(
-						pool, &ev[i], w, bufs, sz, cnt);
+					cnt = perf_process_last_stage_latency(pool,
+						prod_crypto_type, &ev[i], w, bufs, sz, cnt);
 				else
-					cnt = perf_process_last_stage(pool,
+					cnt = perf_process_last_stage(pool, prod_crypto_type,
 						&ev[i], w, bufs, sz, cnt);
 
 				ev[i].op = RTE_EVENT_OP_RELEASE;