[v2] drivers: ignore non-critical syndromes for Tx queues

Message ID 20241003202815.870524-1-akozyrev@nvidia.com (mailing list archive)
State Rejected
Delegated to: Raslan Darawsheh
Headers
Series [v2] drivers: ignore non-critical syndromes for Tx queues |

Checks

Context Check Description
ci/loongarch-compilation warning apply patch failure
ci/checkpatch success coding style OK
ci/Intel-compilation warning apply issues
ci/iol-testing warning apply patch failure

Commit Message

Alexander Kozyrev Oct. 3, 2024, 8:28 p.m. UTC
Only 3 syndromes are considered critical and warrant a queue restart.
All other syndromes can be safely ignored. We ignore them for Rx queues.
Skip non-critical error CQEs for Tx queues as well.

Fixes: 957e45fb7b ("net/mlx5: handle Tx completion with error")
Cc: stable@dpdk.org

Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h | 17 +++++++++++++++++
 drivers/net/mlx5/mlx5_rx.c     |  4 +---
 drivers/net/mlx5/mlx5_tx.c     |  4 ++--
 3 files changed, 20 insertions(+), 5 deletions(-)
  

Patch

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 210158350d..e4034699d8 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -5614,4 +5614,21 @@  mlx5_ts_format_conv(uint32_t ts_format)
 			MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT;
 }
 
+/**
+ * Check if an error CQE syndrome is critical.
+ *
+ * @param syndrome
+ *   Error CQE syndrome to check.
+ *
+ * @return
+ *   Positive value if critical, 0 otherwise.
+ */
+static inline uint32_t
+mlx5_critical_syndrome(uint8_t syndrome)
+{
+	return (syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+		syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
+		syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+}
+
 #endif /* RTE_PMD_MLX5_PRM_H_ */
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index 5e58eb8bc9..a562daa7c3 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -479,9 +479,7 @@  mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
 		for (i = 0; i < (int)err_n; i++) {
 			u.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec - i) & cqe_mask];
 			if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR) {
-				if (u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
-				    u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
-				    u.err_cqe->syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR)
+				if (mlx5_critical_syndrome(u.err_cqe->syndrome))
 					critical_syndrome = true;
 				break;
 			}
diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c
index 2f48bbc82e..2c53feeb9c 100644
--- a/drivers/net/mlx5/mlx5_tx.c
+++ b/drivers/net/mlx5/mlx5_tx.c
@@ -85,7 +85,7 @@  static int
 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
 			 volatile struct mlx5_error_cqe *err_cqe)
 {
-	if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
+	if (mlx5_critical_syndrome(err_cqe->syndrome)) {
 		const uint16_t wqe_m = ((1 << txq->wqe_n) - 1);
 		struct mlx5_txq_ctrl *txq_ctrl =
 				container_of(txq, struct mlx5_txq_ctrl, txq);
@@ -217,7 +217,7 @@  mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
 			}
 			/*
 			 * We are going to fetch all entries with
-			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
+			 * non-critical error syndromes.
 			 * The send queue is supposed to be empty.
 			 */
 			ring_doorbell = true;