[05/13] net/txgbe: enable Tx descriptor error interrupt

Message ID 20241023064836.2017879-6-jiawenwu@trustnetic.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series Wangxun fixes |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Jiawen Wu Oct. 23, 2024, 6:48 a.m. UTC
Enable Tdm_desc_chk then handle the interrupt of TDM desc error. If it's a
non-fatal error, clear the error to reset the queue. If it's a fatal error,
require users to manually restart the port. This flow prevents the hardware
from PCIe pending due to Tx hang, resulting in a reboot to recover.

But remarkably, when packet layer length does not match the packet type in
TX descriptor, it will cause non-fatal error if Tdm_desc_chk is enabled.
But it can be transmitted normally if Tdm_desc_chk is disabled. So in order
to prevent hardware over check, fix the layer length on the basis of packet
type.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 drivers/net/txgbe/base/txgbe_regs.h |   6 ++
 drivers/net/txgbe/txgbe_ethdev.c    |  64 ++++++++++++++++++
 drivers/net/txgbe/txgbe_ethdev.h    |   3 +
 drivers/net/txgbe/txgbe_rxtx.c      | 100 ++++++++++++++++++++++++++++
 drivers/net/txgbe/txgbe_rxtx.h      |   1 +
 5 files changed, 174 insertions(+)
  

Patch

diff --git a/drivers/net/txgbe/base/txgbe_regs.h b/drivers/net/txgbe/base/txgbe_regs.h
index b46d65331e..7a9ba6976f 100644
--- a/drivers/net/txgbe/base/txgbe_regs.h
+++ b/drivers/net/txgbe/base/txgbe_regs.h
@@ -1197,6 +1197,7 @@  enum txgbe_5tuple_protocol {
 #define   TXGBE_ICRMISC_ANDONE MS(19, 0x1) /* link auto-nego done */
 #define   TXGBE_ICRMISC_ERRIG  MS(20, 0x1) /* integrity error */
 #define   TXGBE_ICRMISC_SPI    MS(21, 0x1) /* SPI interface */
+#define   TXGBE_ICRMISC_TXDESC MS(22, 0x1) /* TDM desc error */
 #define   TXGBE_ICRMISC_VFMBX  MS(23, 0x1) /* VF-PF message box */
 #define   TXGBE_ICRMISC_GPIO   MS(26, 0x1) /* GPIO interrupt */
 #define   TXGBE_ICRMISC_ERRPCI MS(27, 0x1) /* pcie request error */
@@ -1382,6 +1383,11 @@  enum txgbe_5tuple_protocol {
 #define   TXGBE_TXCFG_WTHRESH(v)        LS(v, 16, 0x7F)
 #define   TXGBE_TXCFG_FLUSH             MS(26, 0x1)
 
+#define TXGBE_TDM_DESC_CHK(i)		(0x0180B0 + (i) * 4) /*0-3*/
+#define TXGBE_TDM_DESC_NONFATAL(i)	(0x0180C0 + (i) * 4) /*0-3*/
+#define TXGBE_TDM_DESC_FATAL(i)		(0x0180D0 + (i) * 4) /*0-3*/
+#define TXGBE_TDM_DESC_MASK(v)		MS(v, 0x1)
+
 /* interrupt registers */
 #define TXGBE_ITRI                      0x000180
 #define TXGBE_ITR(i)                    (0x000200 + 4 * (i))
diff --git a/drivers/net/txgbe/txgbe_ethdev.c b/drivers/net/txgbe/txgbe_ethdev.c
index 4aa3bfd0bc..bafa9cf829 100644
--- a/drivers/net/txgbe/txgbe_ethdev.c
+++ b/drivers/net/txgbe/txgbe_ethdev.c
@@ -1936,6 +1936,7 @@  txgbe_dev_stop(struct rte_eth_dev *dev)
 	PMD_INIT_FUNC_TRACE();
 
 	rte_eal_alarm_cancel(txgbe_dev_detect_sfp, dev);
+	rte_eal_alarm_cancel(txgbe_tx_queue_clear_error, dev);
 	txgbe_dev_wait_setup_link_complete(dev, 0);
 
 	/* disable interrupts */
@@ -2838,6 +2839,60 @@  txgbe_dev_setup_link_alarm_handler(void *param)
 	intr->flags &= ~TXGBE_FLAG_NEED_LINK_CONFIG;
 }
 
+static void
+txgbe_do_reset(struct rte_eth_dev *dev)
+{
+	struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
+	struct txgbe_tx_queue *txq;
+	u32 i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		txq = dev->data->tx_queues[i];
+		txq->resetting = true;
+	}
+
+	rte_delay_ms(1);
+	wr32(hw, TXGBE_RST, TXGBE_RST_LAN(hw->bus.lan_id));
+	txgbe_flush(hw);
+
+	PMD_DRV_LOG(ERR, "Please manually restart the port %d",
+		dev->data->port_id);
+}
+
+static void
+txgbe_tx_ring_recovery(struct rte_eth_dev *dev)
+{
+	struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
+	u32 desc_error[4] = {0, 0, 0, 0};
+	struct txgbe_tx_queue *txq;
+	u32 i;
+
+	/* check tdm fatal error */
+	for (i = 0; i < 4; i++) {
+		desc_error[i] = rd32(hw, TXGBE_TDM_DESC_FATAL(i));
+		if (desc_error[i] != 0) {
+			PMD_DRV_LOG(ERR, "TDM fatal error reg[%d]: 0x%x", i, desc_error[i]);
+			txgbe_do_reset(dev);
+			return;
+		}
+	}
+
+	/* check tdm non-fatal error */
+	for (i = 0; i < 4; i++)
+		desc_error[i] = rd32(hw, TXGBE_TDM_DESC_NONFATAL(i));
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		if (desc_error[i / 32] & (1 << i % 32)) {
+			PMD_DRV_LOG(ERR, "TDM non-fatal error, reset port[%d] queue[%d]",
+				dev->data->port_id, i);
+			dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
+			txq = dev->data->tx_queues[i];
+			txq->resetting = true;
+			rte_eal_alarm_set(1000, txgbe_tx_queue_clear_error, (void *)dev);
+		}
+	}
+}
+
 /*
  * If @timeout_ms was 0, it means that it will not return until link complete.
  * It returns 1 on complete, return 0 on timeout.
@@ -3096,6 +3151,7 @@  txgbe_dev_misc_interrupt_setup(struct rte_eth_dev *dev)
 	intr->mask |= mask;
 	intr->mask_misc |= TXGBE_ICRMISC_GPIO;
 	intr->mask_misc |= TXGBE_ICRMISC_ANDONE;
+	intr->mask_misc |= TXGBE_ICRMISC_TXDESC;
 	return 0;
 }
 
@@ -3191,6 +3247,9 @@  txgbe_dev_interrupt_get_status(struct rte_eth_dev *dev,
 	if (eicr & TXGBE_ICRMISC_HEAT)
 		intr->flags |= TXGBE_FLAG_OVERHEAT;
 
+	if (eicr & TXGBE_ICRMISC_TXDESC)
+		intr->flags |= TXGBE_FLAG_TX_DESC_ERR;
+
 	((u32 *)hw->isb_mem)[TXGBE_ISB_MISC] = 0;
 
 	return 0;
@@ -3310,6 +3369,11 @@  txgbe_dev_interrupt_action(struct rte_eth_dev *dev,
 		intr->flags &= ~TXGBE_FLAG_OVERHEAT;
 	}
 
+	if (intr->flags & TXGBE_FLAG_TX_DESC_ERR) {
+		txgbe_tx_ring_recovery(dev);
+		intr->flags &= ~TXGBE_FLAG_TX_DESC_ERR;
+	}
+
 	PMD_DRV_LOG(DEBUG, "enable intr immediately");
 	txgbe_enable_intr(dev);
 	rte_intr_enable(intr_handle);
diff --git a/drivers/net/txgbe/txgbe_ethdev.h b/drivers/net/txgbe/txgbe_ethdev.h
index f0f4ced5b0..302ea9f037 100644
--- a/drivers/net/txgbe/txgbe_ethdev.h
+++ b/drivers/net/txgbe/txgbe_ethdev.h
@@ -31,6 +31,7 @@ 
 #define TXGBE_FLAG_NEED_LINK_CONFIG (uint32_t)(1 << 4)
 #define TXGBE_FLAG_NEED_AN_CONFIG   (uint32_t)(1 << 5)
 #define TXGBE_FLAG_OVERHEAT         (uint32_t)(1 << 6)
+#define TXGBE_FLAG_TX_DESC_ERR      (uint32_t)(1 << 7)
 
 /*
  * Defines that were not part of txgbe_type.h as they are not used by the
@@ -474,6 +475,8 @@  int txgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 
 int txgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 
+void txgbe_tx_queue_clear_error(void *param);
+
 void txgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo);
 
diff --git a/drivers/net/txgbe/txgbe_rxtx.c b/drivers/net/txgbe/txgbe_rxtx.c
index c12726553c..2d2b437643 100644
--- a/drivers/net/txgbe/txgbe_rxtx.c
+++ b/drivers/net/txgbe/txgbe_rxtx.c
@@ -728,6 +728,66 @@  txgbe_get_tun_len(struct rte_mbuf *mbuf)
 	return tun_len;
 }
 
+static inline void
+txgbe_fix_offload_len(union txgbe_tx_offload *ol)
+{
+	uint8_t ptid = ol->ptid;
+
+	if (ptid & TXGBE_PTID_PKT_TUN) {
+		if (ol->outer_l2_len == 0)
+			ol->outer_l2_len = sizeof(struct rte_ether_hdr);
+		if (ol->outer_l3_len == 0) {
+			if (ptid & TXGBE_PTID_TUN_IPV6)
+				ol->outer_l3_len = sizeof(struct rte_ipv6_hdr);
+			else
+				ol->outer_l3_len = sizeof(struct rte_ipv4_hdr);
+		}
+		if ((ptid & 0xF) == 0) {
+			ol->l3_len = 0;
+			ol->l4_len = 0;
+		} else {
+			goto inner;
+		}
+	}
+
+	if ((ptid & 0xF0) == TXGBE_PTID_PKT_MAC) {
+		if (ol->l2_len == 0)
+			ol->l2_len = sizeof(struct rte_ether_hdr);
+		ol->l3_len = 0;
+		ol->l4_len = 0;
+	} else if ((ptid & 0xF0) == TXGBE_PTID_PKT_IP) {
+		if (ol->l2_len == 0)
+			ol->l2_len = sizeof(struct rte_ether_hdr);
+inner:
+		if (ol->l3_len == 0) {
+			if (ptid & TXGBE_PTID_PKT_IPV6)
+				ol->l3_len = sizeof(struct rte_ipv6_hdr);
+			else
+				ol->l3_len = sizeof(struct rte_ipv4_hdr);
+		}
+		switch (ptid & 0x7) {
+		case 0x1:
+		case 0x2:
+			ol->l4_len = 0;
+			break;
+		case 0x3:
+			if (ol->l4_len == 0)
+				ol->l4_len =  sizeof(struct rte_udp_hdr);
+			break;
+		case 0x4:
+			if (ol->l4_len == 0)
+				ol->l4_len =  sizeof(struct rte_tcp_hdr);
+			break;
+		case 0x5:
+			if (ol->l4_len == 0)
+				ol->l4_len =  sizeof(struct rte_sctp_hdr);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
 static inline uint8_t
 txgbe_parse_tun_ptid(struct rte_mbuf *tx_pkt, uint8_t tun_len)
 {
@@ -782,6 +842,10 @@  txgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint8_t use_ipsec;
 #endif
 
+	txq = tx_queue;
+	if (txq->resetting)
+		return 0;
+
 	tx_offload.data[0] = 0;
 	tx_offload.data[1] = 0;
 	txq = tx_queue;
@@ -826,6 +890,7 @@  txgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			if (tx_offload.ptid & TXGBE_PTID_PKT_TUN)
 				tx_offload.ptid |= txgbe_parse_tun_ptid(tx_pkt,
 							tx_offload.outer_tun_len);
+			txgbe_fix_offload_len(&tx_offload);
 
 #ifdef RTE_LIB_SECURITY
 			if (use_ipsec) {
@@ -4570,6 +4635,11 @@  txgbe_dev_tx_init(struct rte_eth_dev *dev)
 		wr32(hw, TXGBE_TXWP(txq->reg_idx), 0);
 	}
 
+#ifndef RTE_LIB_SECURITY
+	for (i = 0; i < 4; i++)
+		wr32(hw, TXGBE_TDM_DESC_CHK(i), 0xFFFFFFFF);
+#endif
+
 	/* Device configured with multiple TX queues. */
 	txgbe_dev_mq_tx_configure(dev);
 }
@@ -4806,6 +4876,7 @@  txgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 	rte_wmb();
 	wr32(hw, TXGBE_TXWP(txq->reg_idx), txq->tx_tail);
 	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
+	txq->resetting = false;
 
 	return 0;
 }
@@ -4863,6 +4934,35 @@  txgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 	return 0;
 }
 
+void
+txgbe_tx_queue_clear_error(void *param)
+{
+	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+	struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
+	struct txgbe_tx_queue *txq;
+	u32 i;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		txq = dev->data->tx_queues[i];
+		if (!txq->resetting)
+			continue;
+
+		txgbe_dev_save_tx_queue(hw, i);
+
+		/* tx ring reset */
+		wr32(hw, TXGBE_TDM_DESC_NONFATAL(i / 32),
+			TXGBE_TDM_DESC_MASK(i % 32));
+
+		if (txq->ops != NULL) {
+			txq->ops->release_mbufs(txq);
+			txq->ops->reset(txq);
+		}
+
+		txgbe_dev_store_tx_queue(hw, i);
+		txgbe_dev_tx_queue_start(dev, i);
+	}
+}
+
 void
 txgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo)
diff --git a/drivers/net/txgbe/txgbe_rxtx.h b/drivers/net/txgbe/txgbe_rxtx.h
index 9155eb1f70..e668b60b1e 100644
--- a/drivers/net/txgbe/txgbe_rxtx.h
+++ b/drivers/net/txgbe/txgbe_rxtx.h
@@ -412,6 +412,7 @@  struct txgbe_tx_queue {
 	/**< indicates that IPsec TX feature is in use */
 #endif
 	const struct rte_memzone *mz;
+	bool		    resetting;
 };
 
 struct txgbe_txq_ops {