[1/2] event/sw: add selftest for ordered history list

Message ID 20230831164736.2472671-1-harry.van.haaren@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series [1/2] event/sw: add selftest for ordered history list |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Van Haaren, Harry Aug. 31, 2023, 4:47 p.m. UTC
This commit adds a unit test for an issue identified
where ordered history-list entries are not correctly
cleared when the returned event is of op RELEASE type.

The result of the history-list bug is that a future event
which re-uses that history-list slot, but has an op type
of FORWARD will incorrectly be reordered.

The existing unit-tests did not cover the RELEASE of an
ORDERED queue, and then stress-test the history-list by
iterating HIST_LIST times afterwards.

Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
---
 drivers/event/sw/sw_evdev_selftest.c | 132 +++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)
  

Comments

Bruce Richardson Aug. 31, 2023, 5:10 p.m. UTC | #1
On Thu, Aug 31, 2023 at 05:47:35PM +0100, Harry van Haaren wrote:
> This commit adds a unit test for an issue identified
> where ordered history-list entries are not correctly
> cleared when the returned event is of op RELEASE type.
> 
> The result of the history-list bug is that a future event
> which re-uses that history-list slot, but has an op type
> of FORWARD will incorrectly be reordered.
> 
> The existing unit-tests did not cover the RELEASE of an
> ORDERED queue, and then stress-test the history-list by
> iterating HIST_LIST times afterwards.
> 
> Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
> ---
Hi Harry,

having the test first is good for showing up the bug, but won't that mean
that our unit tests are broken until after the second patch is applied? If
so, I suggest reversing the order of the series so the tests always return
clean. [If no V2 necessary, the order can maybe be switched on apply]

/Bruce
  

Patch

diff --git a/drivers/event/sw/sw_evdev_selftest.c b/drivers/event/sw/sw_evdev_selftest.c
index 3aa8d76ca8..59afa260c6 100644
--- a/drivers/event/sw/sw_evdev_selftest.c
+++ b/drivers/event/sw/sw_evdev_selftest.c
@@ -2959,6 +2959,132 @@  dev_stop_flush(struct test *t) /* test to check we can properly flush events */
 	return -1;
 }
 
+static int
+ordered_atomic_hist_completion(struct test *t)
+{
+	const int rx_enq = 0;
+	int err;
+
+	/* Create instance with 1 atomic QID going to 3 ports + 1 prod port */
+	if (init(t, 2, 2) < 0 ||
+			create_ports(t, 2) < 0 ||
+			create_ordered_qids(t, 1) < 0 ||
+			create_atomic_qids(t, 1) < 0)
+		return -1;
+
+	/* Helpers to identify queues */
+	const uint8_t qid_ordered = t->qid[0];
+	const uint8_t qid_atomic = t->qid[1];
+
+	/* CQ mapping to QID */
+	if (rte_event_port_link(evdev, t->port[1], &t->qid[0], NULL, 1) != 1) {
+		printf("%d: error mapping port 1 qid\n", __LINE__);
+		return -1;
+	}
+	if (rte_event_port_link(evdev, t->port[1], &t->qid[1], NULL, 1) != 1) {
+		printf("%d: error mapping port 1 qid\n", __LINE__);
+		return -1;
+	}
+	if (rte_event_dev_start(evdev) < 0) {
+		printf("%d: Error with start call\n", __LINE__);
+		return -1;
+	}
+
+	/* Enqueue 1x ordered event, to be RELEASE-ed by the worker
+	 * CPU, which may cause hist-list corruption (by not comleting)
+	 */
+	struct rte_event ord_ev = {
+		.op = RTE_EVENT_OP_NEW,
+		.queue_id = qid_ordered,
+		.event_type = RTE_EVENT_TYPE_CPU,
+		.priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
+	};
+	err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ord_ev, 1);
+	if (err != 1) {
+		printf("%d: Failed to enqueue\n", __LINE__);
+		return -1;
+	}
+
+	/* call the scheduler. This schedules the above event as a single
+	 * event in an ORDERED queue, to the worker.
+	 */
+	rte_service_run_iter_on_app_lcore(t->service_id, 1);
+
+	/* Dequeue ORDERED event 0 from port 1, so that we can then drop */
+	struct rte_event ev;
+	if (!rte_event_dequeue_burst(evdev, t->port[1], &ev, 1, 0)) {
+		printf("%d: failed to dequeue\n", __LINE__);
+		return -1;
+	}
+
+	/* drop the ORDERED event. Here the history list should be completed,
+	 * but might not be if the hist-list bug exists. Call scheduler to make
+	 * it act on the RELEASE that was enqueued.
+	 */
+	rte_event_enqueue_burst(evdev, t->port[1], &release_ev, 1);
+	rte_service_run_iter_on_app_lcore(t->service_id, 1);
+
+	/* Enqueue 1x atomic event, to then FORWARD to trigger atomic hist-list
+	 * completion. If the bug exists, the ORDERED entry may be completed in
+	 * error (aka, using the ORDERED-ROB for the ATOMIC event). This is the
+	 * main focus of this unit test.
+	 */
+	{
+		struct rte_event ev = {
+			.op = RTE_EVENT_OP_NEW,
+			.queue_id = qid_atomic,
+			.event_type = RTE_EVENT_TYPE_CPU,
+			.priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
+			.flow_id = 123,
+		};
+
+		err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ev, 1);
+		if (err != 1) {
+			printf("%d: Failed to enqueue\n", __LINE__);
+			return -1;
+		}
+	}
+	rte_service_run_iter_on_app_lcore(t->service_id, 1);
+
+	/* Deq ATM event, then forward it for more than HIST_LIST_SIZE times,
+	 * to re-use the history list entry that may be corrupted previously.
+	 */
+	for (int i = 0; i < SW_PORT_HIST_LIST + 2; i++) {
+		if (!rte_event_dequeue_burst(evdev, t->port[1], &ev, 1, 0)) {
+			printf("%d: failed to dequeue, did corrupt ORD hist "
+				"list steal this ATM event?\n", __LINE__);
+			return -1;
+		}
+
+		/* Re-enqueue the ATM event as FWD, trigger hist-list. */
+		ev.op = RTE_EVENT_OP_FORWARD;
+		err = rte_event_enqueue_burst(evdev, t->port[1], &ev, 1);
+		if (err != 1) {
+			printf("%d: Failed to enqueue\n", __LINE__);
+			return -1;
+		}
+
+		rte_service_run_iter_on_app_lcore(t->service_id, 1);
+	}
+
+	/* If HIST-LIST + N count of dequeues succeed above, the hist list
+	 * has not been corrupted. If it is corrupted, the ATM event is pushed
+	 * into the ORDERED-ROB and will not dequeue.
+	 */
+
+	/* release the ATM event that's been forwarded HIST_LIST times */
+	err = rte_event_enqueue_burst(evdev, t->port[1], &release_ev, 1);
+	if (err != 1) {
+		printf("%d: Failed to enqueue\n", __LINE__);
+		return -1;
+	}
+
+	rte_service_run_iter_on_app_lcore(t->service_id, 1);
+
+	cleanup(t);
+	return 0;
+}
+
 static int
 worker_loopback_worker_fn(void *arg)
 {
@@ -3388,6 +3514,12 @@  test_sw_eventdev(void)
 		printf("ERROR - Stop Flush test FAILED.\n");
 		goto test_fail;
 	}
+	printf("*** Running Ordered & Atomic hist-list completion test...\n");
+	ret = ordered_atomic_hist_completion(t);
+	if (ret != 0) {
+		printf("ERROR - Ordered & Atomic hist-list test FAILED.\n");
+		goto test_fail;
+	}
 	if (rte_lcore_count() >= 3) {
 		printf("*** Running Worker loopback test...\n");
 		ret = worker_loopback(t, 0);