[1/2] event/dlb2: update rolling mask used for dequeue
Checks
Commit Message
Update the rolling mask used in dequeue operations. Fixes
vector optimized dequeue.
Fixes: 000a7b8e7582 ("event/dlb2: optimize dequeue operation")
Signed-off-by: Timothy McDaniel <timothy.mcdaniel@intel.com>
---
drivers/event/dlb2/dlb2.c | 28 +++++++++++++++++++++-------
1 file changed, 21 insertions(+), 7 deletions(-)
Comments
On Thu, Feb 17, 2022 at 1:09 AM Timothy McDaniel
<timothy.mcdaniel@intel.com> wrote:
>
> Update the rolling mask used in dequeue operations. Fixes
> vector optimized dequeue.
>
> Fixes: 000a7b8e7582 ("event/dlb2: optimize dequeue operation")
>
> Signed-off-by: Timothy McDaniel <timothy.mcdaniel@intel.com>
Series applied to dpdk-next-net-eventdev/for-main. Thanks
> ---
> drivers/event/dlb2/dlb2.c | 28 +++++++++++++++++++++-------
> 1 file changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c
> index d75f12e382..09abdd1660 100644
> --- a/drivers/event/dlb2/dlb2.c
> +++ b/drivers/event/dlb2/dlb2.c
> @@ -3897,31 +3897,45 @@ dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
> while (num < max_num) {
> struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
> int num_avail;
> +
> if (use_scalar) {
> + int n_iter = 0;
> + uint64_t m_rshift, m_lshift, m2_rshift, m2_lshift;
> +
> num_avail = dlb2_recv_qe_sparse(qm_port, qes);
> num_avail = RTE_MIN(num_avail, max_num - num);
> dlb2_inc_cq_idx(qm_port, num_avail << 2);
> if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
> - num += dlb2_process_dequeue_four_qes(ev_port,
> - qm_port,
> - &events[num],
> - &qes[0]);
> + n_iter = dlb2_process_dequeue_four_qes(ev_port,
> + qm_port,
> + &events[num],
> + &qes[0]);
> else if (num_avail)
> - num += dlb2_process_dequeue_qes(ev_port,
> + n_iter = dlb2_process_dequeue_qes(ev_port,
> qm_port,
> &events[num],
> &qes[0],
> num_avail);
> + num += n_iter;
> + /* update rolling_mask for vector code support */
> + m_rshift = qm_port->cq_rolling_mask >> n_iter;
> + m_lshift = qm_port->cq_rolling_mask << (64 - n_iter);
> + m2_rshift = qm_port->cq_rolling_mask_2 >> n_iter;
> + m2_lshift = qm_port->cq_rolling_mask_2 <<
> + (64 - n_iter);
> + qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
> + qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
> } else { /* !use_scalar */
> num_avail = dlb2_recv_qe_sparse_vec(qm_port,
> &events[num],
> max_num - num);
> - num += num_avail;
> dlb2_inc_cq_idx(qm_port, num_avail << 2);
> + num += num_avail;
> DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
> }
> if (!num_avail) {
> - if (num > 0)
> + if ((timeout == 0) || (num > 0))
> + /* Not waiting in any form or 1+ events recd */
> break;
> else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
> timeout, start_ticks))
> --
> 2.23.0
>
@@ -3897,31 +3897,45 @@ dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
while (num < max_num) {
struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
int num_avail;
+
if (use_scalar) {
+ int n_iter = 0;
+ uint64_t m_rshift, m_lshift, m2_rshift, m2_lshift;
+
num_avail = dlb2_recv_qe_sparse(qm_port, qes);
num_avail = RTE_MIN(num_avail, max_num - num);
dlb2_inc_cq_idx(qm_port, num_avail << 2);
if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
- num += dlb2_process_dequeue_four_qes(ev_port,
- qm_port,
- &events[num],
- &qes[0]);
+ n_iter = dlb2_process_dequeue_four_qes(ev_port,
+ qm_port,
+ &events[num],
+ &qes[0]);
else if (num_avail)
- num += dlb2_process_dequeue_qes(ev_port,
+ n_iter = dlb2_process_dequeue_qes(ev_port,
qm_port,
&events[num],
&qes[0],
num_avail);
+ num += n_iter;
+ /* update rolling_mask for vector code support */
+ m_rshift = qm_port->cq_rolling_mask >> n_iter;
+ m_lshift = qm_port->cq_rolling_mask << (64 - n_iter);
+ m2_rshift = qm_port->cq_rolling_mask_2 >> n_iter;
+ m2_lshift = qm_port->cq_rolling_mask_2 <<
+ (64 - n_iter);
+ qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
+ qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
} else { /* !use_scalar */
num_avail = dlb2_recv_qe_sparse_vec(qm_port,
&events[num],
max_num - num);
- num += num_avail;
dlb2_inc_cq_idx(qm_port, num_avail << 2);
+ num += num_avail;
DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
}
if (!num_avail) {
- if (num > 0)
+ if ((timeout == 0) || (num > 0))
+ /* Not waiting in any form or 1+ events recd */
break;
else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
timeout, start_ticks))