@@ -31,6 +31,7 @@ sources = files(
'mlx5_rxtx.c',
'mlx5_stats.c',
'mlx5_trigger.c',
+ 'mlx5_trace.c',
'mlx5_tx.c',
'mlx5_tx_empw.c',
'mlx5_tx_mpw.c',
@@ -377,25 +377,6 @@ mlx5_rx_mb2mr(struct mlx5_rxq_data *rxq, struct rte_mbuf *mb)
return mlx5_mr_mempool2mr_bh(mr_ctrl, mb->pool, addr);
}
-/**
- * Convert timestamp from HW format to linear counter
- * from Packet Pacing Clock Queue CQE timestamp format.
- *
- * @param sh
- * Pointer to the device shared context. Might be needed
- * to convert according current device configuration.
- * @param ts
- * Timestamp from CQE to convert.
- * @return
- * UTC in nanoseconds
- */
-static __rte_always_inline uint64_t
-mlx5_txpp_convert_rx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t ts)
-{
- RTE_SET_USED(sh);
- return (ts & UINT32_MAX) + (ts >> 32) * NS_PER_S;
-}
-
/**
* Set timestamp in mbuf dynamic field.
*
@@ -43,4 +43,23 @@ int mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
int mlx5_queue_state_modify(struct rte_eth_dev *dev,
struct mlx5_mp_arg_queue_state_modify *sm);
+/**
+ * Convert timestamp from HW format to linear counter
+ * from Packet Pacing Clock Queue CQE timestamp format.
+ *
+ * @param sh
+ * Pointer to the device shared context. Might be needed
+ * to convert according current device configuration.
+ * @param ts
+ * Timestamp from CQE to convert.
+ * @return
+ * UTC in nanoseconds
+ */
+static __rte_always_inline uint64_t
+mlx5_txpp_convert_rx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t ts)
+{
+ RTE_SET_USED(sh);
+ return (ts & UINT32_MAX) + (ts >> 32) * NS_PER_S;
+}
+
#endif /* RTE_PMD_MLX5_RXTX_H_ */
new file mode 100644
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 NVIDIA Corporation & Affiliates
+ */
+
+#include <rte_trace_point_register.h>
+#include <mlx5_trace.h>
+
+/* TX burst subroutines trace points. */
+RTE_TRACE_POINT_REGISTER(rte_pmd_mlx5_trace_tx_entry,
+ pmd.net.mlx5.tx.entry)
+
+RTE_TRACE_POINT_REGISTER(rte_pmd_mlx5_trace_tx_exit,
+ pmd.net.mlx5.tx.exit)
+
+RTE_TRACE_POINT_REGISTER(rte_pmd_mlx5_trace_tx_wqe,
+ pmd.net.mlx5.tx.wqe)
+
+RTE_TRACE_POINT_REGISTER(rte_pmd_mlx5_trace_tx_wait,
+ pmd.net.mlx5.tx.wait)
+
+RTE_TRACE_POINT_REGISTER(rte_pmd_mlx5_trace_tx_push,
+ pmd.net.mlx5.tx.push)
+
+RTE_TRACE_POINT_REGISTER(rte_pmd_mlx5_trace_tx_complete,
+ pmd.net.mlx5.tx.complete)
new file mode 100644
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 NVIDIA Corporation & Affiliates
+ */
+
+#ifndef RTE_PMD_MLX5_TRACE_H_
+#define RTE_PMD_MLX5_TRACE_H_
+
+/**
+ * @file
+ *
+ * API for mlx5 PMD trace support
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <mlx5_prm.h>
+#include <rte_mbuf.h>
+#include <rte_trace_point.h>
+
+/* TX burst subroutines trace points. */
+RTE_TRACE_POINT_FP(
+ rte_pmd_mlx5_trace_tx_entry,
+ RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t queue_id),
+ rte_trace_point_emit_u16(port_id);
+ rte_trace_point_emit_u16(queue_id);
+)
+
+RTE_TRACE_POINT_FP(
+ rte_pmd_mlx5_trace_tx_exit,
+ RTE_TRACE_POINT_ARGS(uint16_t nb_sent, uint16_t nb_req),
+ rte_trace_point_emit_u16(nb_sent);
+ rte_trace_point_emit_u16(nb_req);
+)
+
+RTE_TRACE_POINT_FP(
+ rte_pmd_mlx5_trace_tx_wqe,
+ RTE_TRACE_POINT_ARGS(uint32_t opcode),
+ rte_trace_point_emit_u32(opcode);
+)
+
+RTE_TRACE_POINT_FP(
+ rte_pmd_mlx5_trace_tx_wait,
+ RTE_TRACE_POINT_ARGS(uint64_t ts),
+ rte_trace_point_emit_u64(ts);
+)
+
+
+RTE_TRACE_POINT_FP(
+ rte_pmd_mlx5_trace_tx_push,
+ RTE_TRACE_POINT_ARGS(const struct rte_mbuf *mbuf, uint16_t wqe_id),
+ rte_trace_point_emit_ptr(mbuf);
+ rte_trace_point_emit_u32(mbuf->pkt_len);
+ rte_trace_point_emit_u16(mbuf->nb_segs);
+ rte_trace_point_emit_u16(wqe_id);
+)
+
+RTE_TRACE_POINT_FP(
+ rte_pmd_mlx5_trace_tx_complete,
+ RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t queue_id,
+ uint16_t wqe_id, uint64_t ts),
+ rte_trace_point_emit_u16(port_id);
+ rte_trace_point_emit_u16(queue_id);
+ rte_trace_point_emit_u64(ts);
+ rte_trace_point_emit_u16(wqe_id);
+)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_PMD_MLX5_TRACE_H_ */
@@ -232,6 +232,15 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) ==
cqe->wqe_counter);
#endif
+ if (__rte_trace_point_fp_is_enabled()) {
+ uint64_t ts = rte_be_to_cpu_64(cqe->timestamp);
+ uint16_t wqe_id = rte_be_to_cpu_16(cqe->wqe_counter);
+
+ if (txq->rt_timestamp)
+ ts = mlx5_txpp_convert_rx_ts(NULL, ts);
+ rte_pmd_mlx5_trace_tx_complete(txq->port_id, txq->idx,
+ wqe_id, ts);
+ }
ring_doorbell = true;
++txq->cq_ci;
last_cqe = cqe;
@@ -13,12 +13,15 @@
#include <rte_mempool.h>
#include <rte_common.h>
#include <rte_spinlock.h>
+#include <rte_trace_point.h>
#include <mlx5_common.h>
#include <mlx5_common_mr.h>
#include "mlx5.h"
#include "mlx5_autoconf.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_trace.h"
/* TX burst subroutines return codes. */
enum mlx5_txcmp_code {
@@ -764,6 +767,9 @@ mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq,
cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
MLX5_COMP_MODE_OFFSET);
cs->misc = RTE_BE32(0);
+ if (__rte_trace_point_fp_is_enabled() && !loc->pkts_sent)
+ rte_pmd_mlx5_trace_tx_entry(txq->port_id, txq->idx);
+ rte_pmd_mlx5_trace_tx_wqe((txq->wqe_ci << 8) | opcode);
}
/**
@@ -1692,6 +1698,7 @@ mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq,
if (txq->wait_on_time) {
/* The wait on time capability should be used. */
ts -= sh->txpp.skew;
+ rte_pmd_mlx5_trace_tx_wait(ts);
mlx5_tx_cseg_init(txq, loc, wqe,
1 + sizeof(struct mlx5_wqe_wseg) /
MLX5_WSEG_SIZE,
@@ -1706,6 +1713,7 @@ mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq,
if (unlikely(wci < 0))
return MLX5_TXCMP_CODE_SINGLE;
/* Build the WAIT WQE with specified completion. */
+ rte_pmd_mlx5_trace_tx_wait(ts - sh->txpp.skew);
mlx5_tx_cseg_init(txq, loc, wqe,
1 + sizeof(struct mlx5_wqe_qseg) /
MLX5_WSEG_SIZE,
@@ -1810,6 +1818,7 @@ mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq,
wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m);
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx);
wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
txq->wqe_ci += (ds + 3) / 4;
@@ -1892,6 +1901,7 @@ mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq,
wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m);
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_eseg_none(txq, loc, wqe, olx);
dseg = &wqe->dseg[0];
do {
@@ -2115,6 +2125,7 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq,
wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m);
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx);
wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
txq->wqe_ci += (ds + 3) / 4;
@@ -2318,8 +2329,8 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq,
*/
wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m);
loc->wqe_last = wqe;
- mlx5_tx_cseg_init(txq, loc, wqe, ds,
- MLX5_OPCODE_TSO, olx);
+ mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_TSO, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx);
dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan;
dlen -= hlen - vlan;
@@ -2688,6 +2699,7 @@ mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq,
/* Update sent data bytes counter. */
slen += dlen;
#endif
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_dseg_ptr
(txq, loc, dseg,
rte_pktmbuf_mtod(loc->mbuf, uint8_t *),
@@ -2926,6 +2938,7 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq,
tlen += sizeof(struct rte_vlan_hdr);
if (room < tlen)
break;
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
dseg = mlx5_tx_dseg_vlan(txq, loc, dseg,
dptr, dlen, olx);
#ifdef MLX5_PMD_SOFT_COUNTERS
@@ -2935,6 +2948,7 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq,
} else {
if (room < tlen)
break;
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
dseg = mlx5_tx_dseg_empw(txq, loc, dseg,
dptr, dlen, olx);
}
@@ -2980,6 +2994,7 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq,
if (MLX5_TXOFF_CONFIG(VLAN))
MLX5_ASSERT(!(loc->mbuf->ol_flags &
RTE_MBUF_F_TX_VLAN));
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx);
/* We have to store mbuf in elts.*/
txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf;
@@ -3194,6 +3209,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq,
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, seg_n,
MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_eseg_data(txq, loc, wqe,
vlan, inlen, 0, olx);
txq->wqe_ci += wqe_n;
@@ -3256,6 +3272,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq,
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, ds,
MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan,
txq->inlen_mode,
0, olx);
@@ -3297,6 +3314,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq,
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, 4,
MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx);
dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) +
MLX5_ESEG_MIN_INLINE_SIZE - vlan;
@@ -3338,6 +3356,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq,
loc->wqe_last = wqe;
mlx5_tx_cseg_init(txq, loc, wqe, 3,
MLX5_OPCODE_SEND, olx);
+ rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci);
mlx5_tx_eseg_none(txq, loc, wqe, olx);
mlx5_tx_dseg_ptr
(txq, loc, &wqe->dseg[0],
@@ -3707,6 +3726,9 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq,
#endif
if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free)
__mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx);
+ /* Trace productive bursts only. */
+ if (__rte_trace_point_fp_is_enabled() && loc.pkts_sent)
+ rte_pmd_mlx5_trace_tx_exit(loc.pkts_sent, pkts_n);
return loc.pkts_sent;
}