[v2] event/octeontx2: fix unconditional Tx flush

Message ID 20201120104147.1473-1-pbhagavatula@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series [v2] event/octeontx2: fix unconditional Tx flush |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/travis-robot success Travis build: passed
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-testing warning Testing issues

Commit Message

Pavan Nikhilesh Bhagavatula Nov. 20, 2020, 10:41 a.m. UTC
  From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Fix unconditional Tx flush, handle packet retransmit cases where
flush has to be differed.

Fixes: cb7ee83b6365 ("event/octeontx2: improve single flow performance")
Cc: stable@dpdk.org

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/octeontx2/otx2_evdev.h  |  1 +
 drivers/event/octeontx2/otx2_worker.c | 14 +++++++++-----
 drivers/event/octeontx2/otx2_worker.h | 20 +++++++++++++++-----
 3 files changed, 25 insertions(+), 10 deletions(-)
  

Comments

Jerin Jacob Nov. 20, 2020, 12:28 p.m. UTC | #1
On Fri, Nov 20, 2020 at 4:12 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Fix unconditional Tx flush, handle packet retransmit cases where
> flush has to be differed.
>
> Fixes: cb7ee83b6365 ("event/octeontx2: improve single flow performance")
> Cc: stable@dpdk.org
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

Reworded the git commit message to:

    event/octeontx2: fix unconditional Tx flush

    Fix unconditional Tx flush, in case of Tx only we need to check if
    work slot is non-empty before issuing flush.
    Also, in packet retransmit cases added check for the reference
    count and flush the work slot only for the last packet.

    Fixes: cb7ee83b6365 ("event/octeontx2: improve single flow performance")
    Cc: stable@dpdk.org

    Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>


Applied to dpdk-next-net-eventdev/for-main. Thanks



> ---
>  drivers/event/octeontx2/otx2_evdev.h  |  1 +
>  drivers/event/octeontx2/otx2_worker.c | 14 +++++++++-----
>  drivers/event/octeontx2/otx2_worker.h | 20 +++++++++++++++-----
>  3 files changed, 25 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
> index 547e29d4a..49a865e6f 100644
> --- a/drivers/event/octeontx2/otx2_evdev.h
> +++ b/drivers/event/octeontx2/otx2_evdev.h
> @@ -79,6 +79,7 @@
>  #define SSOW_LF_GWS_OP_GWC_INVAL            (0xe00ull)
>
>  #define OTX2_SSOW_GET_BASE_ADDR(_GW)        ((_GW) - SSOW_LF_GWS_OP_GET_WORK)
> +#define OTX2_SSOW_TT_FROM_TAG(x)           (((x) >> 32) & SSO_TT_EMPTY)
>
>  #define NSEC2USEC(__ns)                        ((__ns) / 1E3)
>  #define USEC2NSEC(__us)                 ((__us) * 1E3)
> diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
> index 1d427e4a3..b098407e0 100644
> --- a/drivers/event/octeontx2/otx2_worker.c
> +++ b/drivers/event/octeontx2/otx2_worker.c
> @@ -274,12 +274,14 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],      \
>  {                                                                      \
>         struct otx2_ssogws *ws = port;                                  \
>         uint64_t cmd[sz];                                               \
> +       int i;                                                          \
>                                                                         \
> -       RTE_SET_USED(nb_events);                                        \
> -       return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t        \
> +       for (i = 0; i < nb_events; i++)                                 \
> +               otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t   \
>                                     (*)[RTE_MAX_QUEUES_PER_PORT])       \
>                                     &ws->tx_adptr_data,                 \
>                                     flags);                             \
> +       return nb_events;                                               \
>  }
>  SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
>  #undef T
> @@ -289,14 +291,16 @@ uint16_t __rte_hot                                                        \
>  otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
>                                       uint16_t nb_events)               \
>  {                                                                      \
> -       struct otx2_ssogws *ws = port;                                  \
>         uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];                 \
> +       struct otx2_ssogws *ws = port;                                  \
> +       int i;                                                          \
>                                                                         \
> -       RTE_SET_USED(nb_events);                                        \
> -       return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t        \
> +       for (i = 0; i < nb_events; i++)                                 \
> +               otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t   \
>                                     (*)[RTE_MAX_QUEUES_PER_PORT])       \
>                                     &ws->tx_adptr_data,                 \
>                                     (flags) | NIX_TX_MULTI_SEG_F);      \
> +       return nb_events;                                               \
>  }
>  SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
>  #undef T
> diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
> index 3efd3ba97..0a7d6671c 100644
> --- a/drivers/event/octeontx2/otx2_worker.h
> +++ b/drivers/event/octeontx2/otx2_worker.h
> @@ -198,6 +198,10 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws)
>  static __rte_always_inline void
>  otx2_ssogws_swtag_flush(struct otx2_ssogws *ws)
>  {
> +       if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) {
> +               ws->cur_tt = SSO_SYNC_EMPTY;
> +               return;
> +       }
>         otx2_write64(0, ws->swtag_flush_op);
>         ws->cur_tt = SSO_SYNC_EMPTY;
>  }
> @@ -272,13 +276,14 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
>  }
>
>  static __rte_always_inline uint16_t
> -otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[],
> -                    uint64_t *cmd, const uint64_t
> -                    txq_data[][RTE_MAX_QUEUES_PER_PORT],
> +otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
> +                    uint64_t *cmd,
> +                    const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
>                      const uint32_t flags)
>  {
> -       struct rte_mbuf *m = ev[0].mbuf;
> +       struct rte_mbuf *m = ev->mbuf;
>         const struct otx2_eth_txq *txq;
> +       uint16_t ref_cnt = m->refcnt;
>
>         if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
>             (m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
> @@ -329,7 +334,12 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[],
>                 }
>         }
>
> -       otx2_write64(0, ws->swtag_flush_op);
> +       if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> +               if (ref_cnt > 1)
> +                       return 1;
> +       }
> +
> +       otx2_ssogws_swtag_flush(ws);
>
>         return 1;
>  }
> --
> 2.17.1
>
  

Patch

diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
index 547e29d4a..49a865e6f 100644
--- a/drivers/event/octeontx2/otx2_evdev.h
+++ b/drivers/event/octeontx2/otx2_evdev.h
@@ -79,6 +79,7 @@ 
 #define SSOW_LF_GWS_OP_GWC_INVAL            (0xe00ull)
 
 #define OTX2_SSOW_GET_BASE_ADDR(_GW)        ((_GW) - SSOW_LF_GWS_OP_GET_WORK)
+#define OTX2_SSOW_TT_FROM_TAG(x)	    (((x) >> 32) & SSO_TT_EMPTY)
 
 #define NSEC2USEC(__ns)			((__ns) / 1E3)
 #define USEC2NSEC(__us)                 ((__us) * 1E3)
diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
index 1d427e4a3..b098407e0 100644
--- a/drivers/event/octeontx2/otx2_worker.c
+++ b/drivers/event/octeontx2/otx2_worker.c
@@ -274,12 +274,14 @@  otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],	\
 {									\
 	struct otx2_ssogws *ws = port;					\
 	uint64_t cmd[sz];						\
+	int i;								\
 									\
-	RTE_SET_USED(nb_events);					\
-	return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t	\
+	for (i = 0; i < nb_events; i++)					\
+		otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t	\
 				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
 				    &ws->tx_adptr_data,			\
 				    flags);				\
+	return nb_events;						\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -289,14 +291,16 @@  uint16_t __rte_hot							\
 otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
 				      uint16_t nb_events)		\
 {									\
-	struct otx2_ssogws *ws = port;					\
 	uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];			\
+	struct otx2_ssogws *ws = port;					\
+	int i;								\
 									\
-	RTE_SET_USED(nb_events);					\
-	return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t	\
+	for (i = 0; i < nb_events; i++)					\
+		otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t	\
 				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
 				    &ws->tx_adptr_data,			\
 				    (flags) | NIX_TX_MULTI_SEG_F);	\
+	return nb_events;						\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
index 3efd3ba97..0a7d6671c 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -198,6 +198,10 @@  otx2_ssogws_swtag_untag(struct otx2_ssogws *ws)
 static __rte_always_inline void
 otx2_ssogws_swtag_flush(struct otx2_ssogws *ws)
 {
+	if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) {
+		ws->cur_tt = SSO_SYNC_EMPTY;
+		return;
+	}
 	otx2_write64(0, ws->swtag_flush_op);
 	ws->cur_tt = SSO_SYNC_EMPTY;
 }
@@ -272,13 +276,14 @@  otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
 }
 
 static __rte_always_inline uint16_t
-otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[],
-		     uint64_t *cmd, const uint64_t
-		     txq_data[][RTE_MAX_QUEUES_PER_PORT],
+otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
+		     uint64_t *cmd,
+		     const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
 		     const uint32_t flags)
 {
-	struct rte_mbuf *m = ev[0].mbuf;
+	struct rte_mbuf *m = ev->mbuf;
 	const struct otx2_eth_txq *txq;
+	uint16_t ref_cnt = m->refcnt;
 
 	if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
 	    (m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
@@ -329,7 +334,12 @@  otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[],
 		}
 	}
 
-	otx2_write64(0, ws->swtag_flush_op);
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+		if (ref_cnt > 1)
+			return 1;
+	}
+
+	otx2_ssogws_swtag_flush(ws);
 
 	return 1;
 }