@@ -41,6 +41,7 @@ perf_atq_worker(void *arg, const int enable_fwd_latency)
struct rte_event ev;
PERF_WORKER_INIT;
+ RTE_SET_USED(pe);
while (t->done == false) {
deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -51,14 +52,14 @@ perf_atq_worker(void *arg, const int enable_fwd_latency)
if ((prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) &&
(ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
- if (perf_handle_crypto_ev(&ev, &pe, enable_fwd_latency))
+ if (perf_handle_crypto_ev(&ev))
continue;
}
stage = ev.sub_event_type % nb_stages;
if (enable_fwd_latency && !prod_timer_type && stage == 0)
- /* first stage in pipeline, mark ts to compute fwd latency */
- perf_mark_fwd_latency(ev.event_ptr);
+ /* first stage in pipeline, mark ts to compute fwd latency */
+ perf_mark_fwd_latency(prod_type, &ev);
/* last stage in pipeline */
if (unlikely(stage == laststage)) {
@@ -91,6 +92,7 @@ perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
PERF_WORKER_INIT;
uint16_t i;
+ RTE_SET_USED(pe);
while (t->done == false) {
nb_rx = rte_event_dequeue_burst(dev, port, ev, BURST_SIZE, 0);
@@ -102,7 +104,7 @@ perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
for (i = 0; i < nb_rx; i++) {
if ((prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) &&
(ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
- if (perf_handle_crypto_ev(&ev[i], &pe, enable_fwd_latency))
+ if (perf_handle_crypto_ev(&ev[i]))
continue;
}
@@ -112,7 +114,7 @@ perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
/* first stage in pipeline.
* mark time stamp to compute fwd latency
*/
- perf_mark_fwd_latency(ev[i].event_ptr);
+ perf_mark_fwd_latency(prod_type, &ev[i]);
}
/* last stage in pipeline */
if (unlikely(stage == laststage)) {
@@ -167,7 +169,7 @@ perf_atq_worker_vector(void *arg, const int enable_fwd_latency)
stage = ev.sub_event_type % nb_stages;
/* First q in pipeline, mark timestamp to compute fwd latency */
if (enable_fwd_latency && !prod_timer_type && stage == 0)
- perf_mark_fwd_latency(pe);
+ pe->timestamp = rte_get_timer_cycles();
/* Last stage in pipeline */
if (unlikely(stage == laststage)) {
@@ -562,37 +562,76 @@ crypto_adapter_enq_op_fwd(struct prod_data *p)
static inline void
dma_adapter_enq_op_fwd(struct prod_data *p)
{
+ struct rte_event_dma_adapter_op *ops[BURST_SIZE] = {NULL};
struct test_perf *t = p->t;
const uint32_t nb_flows = t->nb_flows;
const uint64_t nb_pkts = t->nb_pkts;
- struct rte_event_dma_adapter_op *op;
+ struct rte_event_dma_adapter_op op;
+ struct rte_event evts[BURST_SIZE];
const uint8_t dev_id = p->dev_id;
struct evt_options *opt = t->opt;
const uint8_t port = p->port_id;
uint32_t flow_counter = 0;
+ struct rte_mempool *pool;
struct rte_event ev;
+ uint8_t *src, *dst;
uint64_t count = 0;
+ uint32_t flow;
+ int i;
+ pool = t->pool;
if (opt->verbose_level > 1)
printf("%s(): lcore %d port %d queue %d dma_dev_id %u dma_dev_vchan_id %u\n",
__func__, rte_lcore_id(), port, p->queue_id,
p->da.dma_dev_id, p->da.vchan_id);
+ src = rte_zmalloc(NULL, nb_flows * RTE_CACHE_LINE_SIZE, RTE_CACHE_LINE_SIZE);
+ dst = rte_zmalloc(NULL, nb_flows * RTE_CACHE_LINE_SIZE, RTE_CACHE_LINE_SIZE);
+ if (!src || !dst) {
+ rte_free(src);
+ rte_free(dst);
+ evt_err("Failed to alloc memory for src/dst");
+ return;
+ }
+
ev.event = 0;
ev.op = RTE_EVENT_OP_NEW;
ev.queue_id = p->queue_id;
ev.sched_type = RTE_SCHED_TYPE_ATOMIC;
ev.event_type = RTE_EVENT_TYPE_CPU;
+ op.dma_dev_id = p->da.dma_dev_id;
+ op.vchan = p->da.vchan_id;
+ op.op_mp = pool;
+ op.flags = RTE_DMA_OP_FLAG_SUBMIT;
+ op.nb_src = 1;
+ op.nb_dst = 1;
+
while (count < nb_pkts && t->done == false) {
- op = p->da.dma_op[flow_counter++ % nb_flows];
- ev.event_ptr = op;
+ if (rte_mempool_get_bulk(pool, (void **)ops, BURST_SIZE) < 0)
+ continue;
+ for (i = 0; i < BURST_SIZE; i++) {
+ flow = flow_counter++ % nb_flows;
+ *ops[i] = op;
+ ops[i]->src_dst_seg[0].addr = (rte_iova_t)&src[flow * RTE_CACHE_LINE_SIZE];
+ ops[i]->src_dst_seg[1].addr = (rte_iova_t)&dst[flow * RTE_CACHE_LINE_SIZE];
+ ops[i]->src_dst_seg[0].length = RTE_CACHE_LINE_SIZE;
+ ops[i]->src_dst_seg[1].length = RTE_CACHE_LINE_SIZE;
+
+ evts[i].event = ev.event;
+ evts[i].flow_id = flow;
+ evts[i].event_ptr = ops[i];
+ }
- while (rte_event_dma_adapter_enqueue(dev_id, port, &ev, 1) != 1 &&
- t->done == false)
+ i = rte_event_dma_adapter_enqueue(dev_id, port, evts, BURST_SIZE);
+ while (i < BURST_SIZE) {
+ i += rte_event_dma_adapter_enqueue(dev_id, port, evts + i, BURST_SIZE - i);
+ if (t->done)
+ break;
rte_pause();
+ }
- count++;
+ count += BURST_SIZE;
}
}
@@ -1489,8 +1528,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
}
} else if (opt->prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
struct rte_event_port_conf conf = *port_conf;
- struct rte_event_dma_adapter_op *op;
- struct rte_mempool *pool = t->pool;
uint8_t dma_dev_id = 0;
uint16_t vchan_id = 0;
@@ -1503,39 +1540,18 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
prod = 0;
for (; port < perf_nb_event_ports(opt); port++) {
struct prod_data *p = &t->prod[port];
- uint32_t flow_id;
p->dev_id = opt->dev_id;
p->port_id = port;
p->queue_id = prod * stride;
p->da.dma_dev_id = dma_dev_id;
p->da.vchan_id = vchan_id;
- p->da.dma_op = rte_zmalloc_socket(NULL, sizeof(void *) * t->nb_flows,
- RTE_CACHE_LINE_SIZE, opt->socket_id);
-
p->t = t;
ret = perf_event_dma_adapter_setup(t, p);
if (ret)
return ret;
- for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
- rte_mempool_get(t->da_op_pool, (void **)&op);
-
- op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
- op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
- op->src_dst_seg[0].length = 1024;
- op->src_dst_seg[1].length = 1024;
- op->nb_src = 1;
- op->nb_dst = 1;
- op->flags = RTE_DMA_OP_FLAG_SUBMIT;
- op->op_mp = t->da_op_pool;
- op->dma_dev_id = dma_dev_id;
- op->vchan = vchan_id;
-
- p->da.dma_op[flow_id] = op;
- }
-
conf.event_port_cfg |=
RTE_EVENT_PORT_CFG_HINT_PRODUCER |
RTE_EVENT_PORT_CFG_HINT_CONSUMER;
@@ -2011,12 +2027,11 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt)
.direction = RTE_DMA_DIR_MEM_TO_MEM,
.nb_desc = 1024,
};
- struct test_perf *t = evt_test_priv(test);
uint8_t dma_dev_count, dma_dev_id = 0;
- unsigned int elt_size;
int vchan_id;
int ret;
+ RTE_SET_USED(test);
if (opt->prod_type != EVT_PROD_TYPE_EVENT_DMA_ADPTR)
return 0;
@@ -2026,14 +2041,6 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt)
return -ENODEV;
}
- elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
- t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256,
- 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
- if (t->da_op_pool == NULL) {
- evt_err("Failed to create dma op pool");
- return -ENOMEM;
- }
-
ret = rte_dma_configure(dma_dev_id, &conf);
if (ret) {
evt_err("Failed to configure dma dev (%u)", dma_dev_id);
@@ -2052,7 +2059,6 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt)
return 0;
err:
rte_dma_close(dma_dev_id);
- rte_mempool_free(t->da_op_pool);
return ret;
}
@@ -2069,16 +2075,6 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt)
for (port = t->nb_workers; port < perf_nb_event_ports(opt); port++) {
struct prod_data *p = &t->prod[port];
- struct rte_event_dma_adapter_op *op;
- uint32_t flow_id;
-
- for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
- op = p->da.dma_op[flow_id];
-
- rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[0].addr);
- rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[1].addr);
- rte_mempool_put(op->op_mp, op);
- }
rte_event_dma_adapter_vchan_del(TEST_PERF_DA_ID, p->da.dma_dev_id, p->da.vchan_id);
}
@@ -2087,8 +2083,6 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt)
rte_dma_stop(dma_dev_id);
rte_dma_close(dma_dev_id);
-
- rte_mempool_free(t->da_op_pool);
}
int
@@ -2117,6 +2111,14 @@ perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
0, NULL, NULL,
NULL, /* obj constructor */
NULL, opt->socket_id, 0); /* flags */
+ } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
+ t->pool = rte_mempool_create(test->name, /* mempool name */
+ opt->pool_sz, /* number of elements*/
+ sizeof(struct rte_event_dma_adapter_op) +
+ (sizeof(struct rte_dma_sge) * 2),
+ cache_sz, /* cache size*/
+ 0, NULL, NULL, NULL, /* obj constructor */
+ NULL, opt->socket_id, 0); /* flags */
} else {
t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */
opt->pool_sz, /* number of elements*/
@@ -48,7 +48,6 @@ struct crypto_adptr_data {
struct dma_adptr_data {
uint8_t dma_dev_id;
uint16_t vchan_id;
- void **dma_op;
};
struct __rte_cache_aligned prod_data {
@@ -81,7 +80,6 @@ struct __rte_cache_aligned test_perf {
struct rte_mempool *ca_sess_pool;
struct rte_mempool *ca_asym_sess_pool;
struct rte_mempool *ca_vector_pool;
- struct rte_mempool *da_op_pool;
};
struct __rte_cache_aligned perf_elt {
@@ -120,38 +118,46 @@ struct __rte_cache_aligned perf_elt {
rte_lcore_id(), dev, port)
static __rte_always_inline void
-perf_mark_fwd_latency(struct perf_elt *const pe)
+perf_mark_fwd_latency(enum evt_prod_type prod_type, struct rte_event *const ev)
{
- pe->timestamp = rte_get_timer_cycles();
+ struct perf_elt *pe;
+
+ if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) {
+ struct rte_crypto_op *op = ev->event_ptr;
+ struct rte_mbuf *m;
+
+ if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
+ if (op->sym->m_dst == NULL)
+ m = op->sym->m_src;
+ else
+ m = op->sym->m_dst;
+
+ pe = rte_pktmbuf_mtod(m, struct perf_elt *);
+ } else {
+ pe = RTE_PTR_ADD(op->asym->modex.result.data,
+ op->asym->modex.result.length);
+ }
+ pe->timestamp = rte_get_timer_cycles();
+ } else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
+ struct rte_event_dma_adapter_op *op = ev->event_ptr;
+
+ op->user_meta = rte_get_timer_cycles();
+ } else {
+ pe = ev->event_ptr;
+ pe->timestamp = rte_get_timer_cycles();
+ }
}
static __rte_always_inline int
-perf_handle_crypto_ev(struct rte_event *ev, struct perf_elt **pe, int enable_fwd_latency)
+perf_handle_crypto_ev(struct rte_event *ev)
{
struct rte_crypto_op *op = ev->event_ptr;
- struct rte_mbuf *m;
-
if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
rte_crypto_op_free(op);
return op->status;
}
- /* Forward latency not enabled - perf data will not be accessed */
- if (!enable_fwd_latency)
- return 0;
-
- /* Get pointer to perf data */
- if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
- if (op->sym->m_dst == NULL)
- m = op->sym->m_src;
- else
- m = op->sym->m_dst;
- *pe = rte_pktmbuf_mtod(m, struct perf_elt *);
- } else {
- *pe = RTE_PTR_ADD(op->asym->modex.result.data, op->asym->modex.result.length);
- }
-
return 0;
}
@@ -243,8 +249,6 @@ perf_process_last_stage(struct rte_mempool *const pool, enum evt_prod_type prod_
to_free_in_bulk = op->asym->modex.result.data;
}
rte_crypto_op_free(op);
- } else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
- return count;
} else {
to_free_in_bulk = ev->event_ptr;
}
@@ -263,7 +267,7 @@ perf_process_last_stage_latency(struct rte_mempool *const pool, enum evt_prod_ty
struct rte_event *const ev, struct worker_data *const w,
void *bufs[], int const buf_sz, uint8_t count)
{
- uint64_t latency;
+ uint64_t latency, tstamp;
struct perf_elt *pe;
void *to_free_in_bulk;
@@ -290,15 +294,20 @@ perf_process_last_stage_latency(struct rte_mempool *const pool, enum evt_prod_ty
op->asym->modex.result.length);
to_free_in_bulk = op->asym->modex.result.data;
}
+ tstamp = pe->timestamp;
rte_crypto_op_free(op);
} else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
- return count;
+ struct rte_event_dma_adapter_op *op = ev->event_ptr;
+
+ to_free_in_bulk = op;
+ tstamp = op->user_meta;
} else {
pe = ev->event_ptr;
+ tstamp = pe->timestamp;
to_free_in_bulk = pe;
}
- latency = rte_get_timer_cycles() - pe->timestamp;
+ latency = rte_get_timer_cycles() - tstamp;
w->latency += latency;
bufs[count++] = to_free_in_bulk;
@@ -42,6 +42,7 @@ perf_queue_worker(void *arg, const int enable_fwd_latency)
struct rte_event ev;
PERF_WORKER_INIT;
+ RTE_SET_USED(pe);
while (t->done == false) {
deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -52,7 +53,7 @@ perf_queue_worker(void *arg, const int enable_fwd_latency)
if ((prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) &&
(ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
- if (perf_handle_crypto_ev(&ev, &pe, enable_fwd_latency))
+ if (perf_handle_crypto_ev(&ev))
continue;
} else {
pe = ev.event_ptr;
@@ -60,8 +61,8 @@ perf_queue_worker(void *arg, const int enable_fwd_latency)
stage = ev.queue_id % nb_stages;
if (enable_fwd_latency && !prod_timer_type && stage == 0)
- /* first q in pipeline, mark timestamp to compute fwd latency */
- perf_mark_fwd_latency(pe);
+ /* first q in pipeline, mark timestamp to compute fwd latency */
+ perf_mark_fwd_latency(prod_type, &ev);
/* last stage in pipeline */
if (unlikely(stage == laststage)) {
@@ -93,6 +94,7 @@ perf_queue_worker_burst(void *arg, const int enable_fwd_latency)
PERF_WORKER_INIT;
uint16_t i;
+ RTE_SET_USED(pe);
while (t->done == false) {
nb_rx = rte_event_dequeue_burst(dev, port, ev, BURST_SIZE, 0);
@@ -104,7 +106,7 @@ perf_queue_worker_burst(void *arg, const int enable_fwd_latency)
for (i = 0; i < nb_rx; i++) {
if ((prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) &&
(ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
- if (perf_handle_crypto_ev(&ev[i], &pe, enable_fwd_latency))
+ if (perf_handle_crypto_ev(&ev[i]))
continue;
}
@@ -114,7 +116,7 @@ perf_queue_worker_burst(void *arg, const int enable_fwd_latency)
/* first queue in pipeline.
* mark time stamp to compute fwd latency
*/
- perf_mark_fwd_latency(ev[i].event_ptr);
+ perf_mark_fwd_latency(prod_type, &ev[i]);
}
/* last stage in pipeline */
if (unlikely(stage == laststage)) {
@@ -169,7 +171,7 @@ perf_queue_worker_vector(void *arg, const int enable_fwd_latency)
stage = ev.queue_id % nb_stages;
/* First q in pipeline, mark timestamp to compute fwd latency */
if (enable_fwd_latency && !prod_timer_type && stage == 0)
- perf_mark_fwd_latency(pe);
+ pe->timestamp = rte_get_timer_cycles();
/* Last stage in pipeline */
if (unlikely(stage == laststage)) {