@@ -376,25 +376,6 @@ mlx5_rx_mb2mr(struct mlx5_rxq_data *rxq, struct rte_mbuf *mb)
return mlx5_mr_mempool2mr_bh(mr_ctrl, mb->pool, addr);
}
-/**
- * Convert timestamp from HW format to linear counter
- * from Packet Pacing Clock Queue CQE timestamp format.
- *
- * @param sh
- * Pointer to the device shared context. Might be needed
- * to convert according current device configuration.
- * @param ts
- * Timestamp from CQE to convert.
- * @return
- * UTC in nanoseconds
- */
-static __rte_always_inline uint64_t
-mlx5_txpp_convert_rx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t ts)
-{
- RTE_SET_USED(sh);
- return (ts & UINT32_MAX) + (ts >> 32) * NS_PER_S;
-}
-
/**
* Set timestamp in mbuf dynamic field.
*
@@ -43,4 +43,23 @@ int mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
int mlx5_queue_state_modify(struct rte_eth_dev *dev,
struct mlx5_mp_arg_queue_state_modify *sm);
+/**
+ * Convert timestamp from HW format to linear counter
+ * from Packet Pacing Clock Queue CQE timestamp format.
+ *
+ * @param sh
+ * Pointer to the device shared context. Might be needed
+ * to convert according current device configuration.
+ * @param ts
+ * Timestamp from CQE to convert.
+ * @return
+ * UTC in nanoseconds
+ */
+static __rte_always_inline uint64_t
+mlx5_txpp_convert_rx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t ts)
+{
+ RTE_SET_USED(sh);
+ return (ts & UINT32_MAX) + (ts >> 32) * NS_PER_S;
+}
+
#endif /* RTE_PMD_MLX5_RXTX_H_ */
@@ -232,6 +232,15 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) ==
cqe->wqe_counter);
#endif
+ if (__rte_trace_point_fp_is_enabled()) {
+ uint64_t ts = rte_be_to_cpu_64(cqe->timestamp);
+ uint16_t wqe_id = rte_be_to_cpu_16(cqe->wqe_counter);
+
+ if (txq->rt_timestamp)
+ ts = mlx5_txpp_convert_rx_ts(NULL, ts);
+ rte_pmd_mlx5_trace_tx_complete(txq->port_id, txq->idx,
+ wqe_id, ts);
+ }
ring_doorbell = true;
++txq->cq_ci;
last_cqe = cqe;
@@ -19,6 +19,8 @@
#include "mlx5.h"
#include "mlx5_autoconf.h"
+#include "mlx5_trace.h"
+#include "mlx5_rxtx.h"
/* TX burst subroutines return codes. */
enum mlx5_txcmp_code {
@@ -764,6 +766,9 @@ mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq,
cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
MLX5_COMP_MODE_OFFSET);
cs->misc = RTE_BE32(0);
+ if (__rte_trace_point_fp_is_enabled() && !loc->pkts_sent)
+ rte_pmd_mlx5_trace_tx_entry(txq->port_id, txq->idx);
+ rte_pmd_mlx5_trace_tx_wqe((txq->wqe_ci << 8) | opcode);
}
/**
@@ -1692,6 +1697,7 @@ mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq,
if (txq->wait_on_time) {
/* The wait on time capability should be used. */
ts -= sh->txpp.skew;
+ rte_pmd_mlx5_trace_tx_wait(ts);
mlx5_tx_cseg_init(txq, loc, wqe,
1 + sizeof(struct mlx5_wqe_wseg) /
MLX5_WSEG_SIZE,
@@ -1706,6 +1712,7 @@ mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq,
if (unlikely(wci < 0))
return MLX5_TXCMP_CODE_SINGLE;
/* Build the WAIT WQE with specified completion. */
+ rte_pmd_mlx5_trace_tx_wait(ts - sh->txpp.skew);
mlx5_tx_cseg_init(txq, loc, wqe,
1 + sizeof(struct mlx5_wqe_qseg) /
MLX5_WSEG_SIZE,
@@ -1810,6 +1817,7 @@ mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq,
wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m);
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx);
wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
txq->wqe_ci += (ds + 3) / 4;
@@ -1892,6 +1900,7 @@ mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq,
wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m);
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_eseg_none(txq, loc, wqe, olx);
dseg = &wqe->dseg[0];
do {
@@ -2115,6 +2124,7 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq,
wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m);
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx);
wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
txq->wqe_ci += (ds + 3) / 4;
@@ -2318,8 +2328,8 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq,
*/
wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m);
loc->wqe_last = wqe;
- mlx5_tx_cseg_init(txq, loc, wqe, ds,
- MLX5_OPCODE_TSO, olx);
+ mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_TSO, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx);
dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan;
dlen -= hlen - vlan;
@@ -2688,6 +2698,7 @@ mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq,
/* Update sent data bytes counter. */
slen += dlen;
#endif
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_dseg_ptr
(txq, loc, dseg,
rte_pktmbuf_mtod(loc->mbuf, uint8_t *),
@@ -2926,6 +2937,7 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq,
tlen += sizeof(struct rte_vlan_hdr);
if (room < tlen)
break;
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
dseg = mlx5_tx_dseg_vlan(txq, loc, dseg,
dptr, dlen, olx);
#ifdef MLX5_PMD_SOFT_COUNTERS
@@ -2935,6 +2947,7 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq,
} else {
if (room < tlen)
break;
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
dseg = mlx5_tx_dseg_empw(txq, loc, dseg,
dptr, dlen, olx);
}
@@ -2980,6 +2993,7 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq,
if (MLX5_TXOFF_CONFIG(VLAN))
MLX5_ASSERT(!(loc->mbuf->ol_flags &
RTE_MBUF_F_TX_VLAN));
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx);
/* We have to store mbuf in elts.*/
txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf;
@@ -3194,6 +3208,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq,
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, seg_n,
MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_eseg_data(txq, loc, wqe,
vlan, inlen, 0, olx);
txq->wqe_ci += wqe_n;
@@ -3256,6 +3271,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq,
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, ds,
MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan,
txq->inlen_mode,
0, olx);
@@ -3297,6 +3313,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq,
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, 4,
MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx);
dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) +
MLX5_ESEG_MIN_INLINE_SIZE - vlan;
@@ -3338,6 +3355,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq,
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, 3,
MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_eseg_none(txq, loc, wqe, olx);
mlx5_tx_dseg_ptr
(txq, loc, &wqe->dseg[0],
@@ -3707,6 +3725,9 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq,
#endif
if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free)
__mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx);
+ /* Trace productive bursts only. */
+ if (__rte_trace_point_fp_is_enabled() && loc.pkts_sent)
+ rte_pmd_mlx5_trace_tx_exit(loc.pkts_sent, pkts_n);
return loc.pkts_sent;
}