diff mbox series

[v3,6/7] net/ena: add check for missing Tx completions

Message ID 20211019105629.11731-7-mk@semihalf.com (mailing list archive)
State Accepted, archived
Delegated to: Ferruh Yigit
Headers show
Series net/ena: update ENA PMD to v2.5.0 | expand

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Michal Krawczyk Oct. 19, 2021, 10:56 a.m. UTC
In some cases Tx descriptors may be uncompleted by the HW and as a
result they will never be released.

This patch adds checking for the missing Tx completions to the ENA timer
service, so in order to use this feature, the application must call the
function rte_timer_manage().

Missing Tx completion reset threshold is determined dynamically, by
taking into consideration ring size and the default value.

Tx cleanup is associated with the Tx burst function. As DPDK
applications can call Tx burst function dynamically, time when last
cleanup was called must be traced to avoid false detection of the
missing Tx completion.

Signed-off-by: Michal Krawczyk <mk@semihalf.com>
Reviewed-by: Igor Chauskin <igorch@amazon.com>
Reviewed-by: Shai Brandes <shaibran@amazon.com>
---
 doc/guides/rel_notes/release_21_11.rst |   1 +
 drivers/net/ena/ena_ethdev.c           | 118 +++++++++++++++++++++++++
 drivers/net/ena/ena_ethdev.h           |  15 ++++
 3 files changed, 134 insertions(+)

Comments

Ferruh Yigit Oct. 19, 2021, 12:40 p.m. UTC | #1
On 10/19/2021 11:56 AM, Michal Krawczyk wrote:
> +static int check_for_tx_completion_in_queue(struct ena_adapter *adapter,
> +					    struct ena_ring *tx_ring)
> +{
> +	struct ena_tx_buffer *tx_buf;
> +	uint64_t timestamp;
> +	uint64_t completion_delay;
> +	uint32_t missed_tx = 0;
> +	unsigned int i;
> +	int rc = 0;
> +
> +	for (i = 0; i < tx_ring->ring_size; ++i) {
> +		tx_buf = &tx_ring->tx_buffer_info[i];
> +		timestamp = tx_buf->timestamp;
> +
> +		if (timestamp == 0)
> +			continue;
> +
> +		completion_delay = rte_get_timer_cycles() - timestamp;
> +		if (completion_delay > adapter->missing_tx_completion_to) {
> +			if (unlikely(!tx_buf->print_once)) {
> +				PMD_TX_LOG(WARNING,
> +					"Found a Tx that wasn't completed on time, qid %d, index %d. Missing Tx outstanding for %" PRIu64 " msecs.\n",

This line is too long, normally we allow long line for logs, but the
intention there is to enable user to search a log message in the code;
when line is broken search fails.
But when there is a format specifier in the log, it already break the
search and there is no point to keep the string in single line, which
reduces code readability.

I will break the line while merging.
diff mbox series

Patch

diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst
index 6ac867321b..c5f76081e5 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -109,6 +109,7 @@  New Features
 
   * Support for the tx_free_thresh and rx_free_thresh configuration parameters.
   * NUMA aware allocations for the queue helper structures.
+  * Watchdog's feature which is checking for missing Tx completions.
 
 * **Updated Broadcom bnxt PMD.**
 
diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 4e9925b6be..1a70cd781c 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -99,6 +99,7 @@  static const struct ena_stats ena_stats_tx_strings[] = {
 	ENA_STAT_TX_ENTRY(doorbells),
 	ENA_STAT_TX_ENTRY(bad_req_id),
 	ENA_STAT_TX_ENTRY(available_desc),
+	ENA_STAT_TX_ENTRY(missed_tx),
 };
 
 static const struct ena_stats ena_stats_rx_strings[] = {
@@ -1164,6 +1165,7 @@  static int ena_tx_queue_setup(struct rte_eth_dev *dev,
 	txq->size_mask = nb_desc - 1;
 	txq->numa_socket_id = socket_id;
 	txq->pkts_without_db = false;
+	txq->last_cleanup_ticks = 0;
 
 	txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info",
 		sizeof(struct ena_tx_buffer) * txq->ring_size,
@@ -1213,6 +1215,9 @@  static int ena_tx_queue_setup(struct rte_eth_dev *dev,
 			txq->ring_size - ENA_REFILL_THRESH_PACKET);
 	}
 
+	txq->missing_tx_completion_threshold =
+		RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP);
+
 	/* Store pointer to this queue in upper layer */
 	txq->configured = 1;
 	dev->data->tx_queues[queue_idx] = txq;
@@ -1539,6 +1544,85 @@  static void check_for_admin_com_state(struct ena_adapter *adapter)
 	}
 }
 
+static int check_for_tx_completion_in_queue(struct ena_adapter *adapter,
+					    struct ena_ring *tx_ring)
+{
+	struct ena_tx_buffer *tx_buf;
+	uint64_t timestamp;
+	uint64_t completion_delay;
+	uint32_t missed_tx = 0;
+	unsigned int i;
+	int rc = 0;
+
+	for (i = 0; i < tx_ring->ring_size; ++i) {
+		tx_buf = &tx_ring->tx_buffer_info[i];
+		timestamp = tx_buf->timestamp;
+
+		if (timestamp == 0)
+			continue;
+
+		completion_delay = rte_get_timer_cycles() - timestamp;
+		if (completion_delay > adapter->missing_tx_completion_to) {
+			if (unlikely(!tx_buf->print_once)) {
+				PMD_TX_LOG(WARNING,
+					"Found a Tx that wasn't completed on time, qid %d, index %d. Missing Tx outstanding for %" PRIu64 " msecs.\n",
+					tx_ring->id, i,	completion_delay /
+					rte_get_timer_hz() * 1000);
+				tx_buf->print_once = true;
+			}
+			++missed_tx;
+		}
+	}
+
+	if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) {
+		PMD_DRV_LOG(ERR,
+			"The number of lost Tx completions is above the threshold (%d > %d). Trigger the device reset.\n",
+			missed_tx,
+			tx_ring->missing_tx_completion_threshold);
+		adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
+		adapter->trigger_reset = true;
+		rc = -EIO;
+	}
+
+	tx_ring->tx_stats.missed_tx += missed_tx;
+
+	return rc;
+}
+
+static void check_for_tx_completions(struct ena_adapter *adapter)
+{
+	struct ena_ring *tx_ring;
+	uint64_t tx_cleanup_delay;
+	size_t qid;
+	int budget;
+	uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues;
+
+	if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
+		return;
+
+	nb_tx_queues = adapter->edev_data->nb_tx_queues;
+	budget = adapter->missing_tx_completion_budget;
+
+	qid = adapter->last_tx_comp_qid;
+	while (budget-- > 0) {
+		tx_ring = &adapter->tx_ring[qid];
+
+		/* Tx cleanup is called only by the burst function and can be
+		 * called dynamically by the application. Also cleanup is
+		 * limited by the threshold. To avoid false detection of the
+		 * missing HW Tx completion, get the delay since last cleanup
+		 * function was called.
+		 */
+		tx_cleanup_delay = rte_get_timer_cycles() -
+			tx_ring->last_cleanup_ticks;
+		if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay)
+			check_for_tx_completion_in_queue(adapter, tx_ring);
+		qid = (qid + 1) % nb_tx_queues;
+	}
+
+	adapter->last_tx_comp_qid = qid;
+}
+
 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer,
 				  void *arg)
 {
@@ -1547,6 +1631,7 @@  static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer,
 
 	check_for_missing_keep_alive(adapter);
 	check_for_admin_com_state(adapter);
+	check_for_tx_completions(adapter);
 
 	if (unlikely(adapter->trigger_reset)) {
 		PMD_DRV_LOG(ERR, "Trigger reset is on\n");
@@ -1926,6 +2011,20 @@  static int ena_dev_configure(struct rte_eth_dev *dev)
 	 */
 	dev->data->scattered_rx = 1;
 
+	adapter->last_tx_comp_qid = 0;
+
+	adapter->missing_tx_completion_budget =
+		RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues);
+
+	adapter->missing_tx_completion_to = ENA_TX_TIMEOUT;
+	/* To avoid detection of the spurious Tx completion timeout due to
+	 * application not calling the Tx cleanup function, set timeout for the
+	 * Tx queue which should be half of the missing completion timeout for a
+	 * safety. If there will be a lot of missing Tx completions in the
+	 * queue, they will be detected sooner or later.
+	 */
+	adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2;
+
 	adapter->tx_selected_offloads = dev->data->dev_conf.txmode.offloads;
 	adapter->rx_selected_offloads = dev->data->dev_conf.rxmode.offloads;
 
@@ -2433,6 +2532,20 @@  static void ena_update_hints(struct ena_adapter *adapter,
 		adapter->ena_dev.mmio_read.reg_read_to =
 			hints->mmio_read_timeout * 1000;
 
+	if (hints->missing_tx_completion_timeout) {
+		if (hints->missing_tx_completion_timeout ==
+		    ENA_HW_HINTS_NO_TIMEOUT) {
+			adapter->missing_tx_completion_to =
+				ENA_HW_HINTS_NO_TIMEOUT;
+		} else {
+			/* Convert from msecs to ticks */
+			adapter->missing_tx_completion_to = rte_get_timer_hz() *
+				hints->missing_tx_completion_timeout / 1000;
+			adapter->tx_cleanup_stall_delay =
+				adapter->missing_tx_completion_to / 2;
+		}
+	}
+
 	if (hints->driver_watchdog_timeout) {
 		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
 			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
@@ -2623,6 +2736,7 @@  static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf)
 	}
 
 	tx_info->tx_descs = nb_hw_desc;
+	tx_info->timestamp = rte_get_timer_cycles();
 
 	tx_ring->tx_stats.cnt++;
 	tx_ring->tx_stats.bytes += mbuf->pkt_len;
@@ -2655,6 +2769,7 @@  static void ena_tx_cleanup(struct ena_ring *tx_ring)
 
 		/* Get Tx info & store how many descs were processed  */
 		tx_info = &tx_ring->tx_buffer_info[req_id];
+		tx_info->timestamp = 0;
 
 		mbuf = tx_info->mbuf;
 		rte_pktmbuf_free(mbuf);
@@ -2675,6 +2790,9 @@  static void ena_tx_cleanup(struct ena_ring *tx_ring)
 		ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs);
 		ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
 	}
+
+	/* Notify completion handler that the cleanup was just called */
+	tx_ring->last_cleanup_ticks = rte_get_timer_cycles();
 }
 
 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/ena/ena_ethdev.h b/drivers/net/ena/ena_ethdev.h
index 176d713dff..4f4142ed12 100644
--- a/drivers/net/ena/ena_ethdev.h
+++ b/drivers/net/ena/ena_ethdev.h
@@ -36,6 +36,10 @@ 
 #define ENA_WD_TIMEOUT_SEC	3
 #define ENA_DEVICE_KALIVE_TIMEOUT (ENA_WD_TIMEOUT_SEC * rte_get_timer_hz())
 
+#define ENA_TX_TIMEOUT			(5 * rte_get_timer_hz())
+#define ENA_MONITORED_TX_QUEUES		3
+#define ENA_DEFAULT_MISSING_COMP	256U
+
 /* While processing submitted and completed descriptors (rx and tx path
  * respectively) in a loop it is desired to:
  *  - perform batch submissions while populating sumbissmion queue
@@ -75,6 +79,8 @@  struct ena_tx_buffer {
 	struct rte_mbuf *mbuf;
 	unsigned int tx_descs;
 	unsigned int num_of_bufs;
+	uint64_t timestamp;
+	bool print_once;
 	struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
 };
 
@@ -103,6 +109,7 @@  struct ena_stats_tx {
 	u64 doorbells;
 	u64 bad_req_id;
 	u64 available_desc;
+	u64 missed_tx;
 };
 
 struct ena_stats_rx {
@@ -118,6 +125,7 @@  struct ena_stats_rx {
 struct ena_ring {
 	u16 next_to_use;
 	u16 next_to_clean;
+	uint64_t last_cleanup_ticks;
 
 	enum ena_ring_type type;
 	enum ena_admin_placement_policy_type tx_mem_queue_type;
@@ -171,6 +179,8 @@  struct ena_ring {
 	};
 
 	unsigned int numa_socket_id;
+
+	uint32_t missing_tx_completion_threshold;
 } __rte_cache_aligned;
 
 enum ena_adapter_state {
@@ -291,6 +301,11 @@  struct ena_adapter {
 	bool wd_state;
 
 	bool use_large_llq_hdr;
+
+	uint32_t last_tx_comp_qid;
+	uint64_t missing_tx_completion_to;
+	uint64_t missing_tx_completion_budget;
+	uint64_t tx_cleanup_stall_delay;
 };
 
 int ena_rss_reta_update(struct rte_eth_dev *dev,