[v1] event/dlb2: add port probing enhancements

Message ID 20221012143247.3239356-1-abdullah.sevincer@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series [v1] event/dlb2: add port probing enhancements |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/github-robot: build success github build: passed
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS

Commit Message

Abdullah Sevincer Oct. 12, 2022, 2:32 p.m. UTC
This commit is an enhancement for previously
implemented port probing optimizations. Changes
are below:

1-Since cos is now per port, remove the device and
domain specific cos from dev_args struct.

2-Changes for using best cos as default cos during
LDB port selection.

3-Changed LDB_COS_DEFAULT to 255 (UINT8_MAX) from -1.

4-Add check for valid producer coremask during probing dbl2
resources.

Signed-off-by: Abdullah Sevincer <abdullah.sevincer@intel.com>
---
 drivers/event/dlb2/dlb2.c                  |   9 +-
 drivers/event/dlb2/dlb2_priv.h             |   4 +-
 drivers/event/dlb2/dlb2_user.h             |   2 +-
 drivers/event/dlb2/pf/base/dlb2_hw_types.h |   2 +-
 drivers/event/dlb2/pf/base/dlb2_resource.c | 106 +++++++++++++--------
 5 files changed, 72 insertions(+), 51 deletions(-)
  

Comments

Jerin Jacob Oct. 12, 2022, 3:11 p.m. UTC | #1
On Wed, Oct 12, 2022 at 8:02 PM Abdullah Sevincer
<abdullah.sevincer@intel.com> wrote:
>
> This commit is an enhancement for previously
> implemented port probing optimizations. Changes
> are below:
>
> 1-Since cos is now per port, remove the device and
> domain specific cos from dev_args struct.
>
> 2-Changes for using best cos as default cos during
> LDB port selection.
>
> 3-Changed LDB_COS_DEFAULT to 255 (UINT8_MAX) from -1.
>
> 4-Add check for valid producer coremask during probing dbl2
> resources.

4 items, Make it as  patches with enough information why the change per patch
  

Patch

diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c
index 7fd89e940b..02f0e57208 100644
--- a/drivers/event/dlb2/dlb2.c
+++ b/drivers/event/dlb2/dlb2.c
@@ -180,11 +180,12 @@  dlb2_init_port_cos(struct dlb2_eventdev *dlb2, int *port_cos)
 {
 	int q;
 
-	for (q = 0; q < DLB2_MAX_NUM_PORTS_ALL; q++)
+	for (q = 0; q < DLB2_MAX_NUM_PORTS_ALL; q++) {
+		dlb2->ev_ports[q].cos_id = port_cos[q];
 		if (port_cos[q] != DLB2_COS_DEFAULT) {
-			dlb2->ev_ports[q].cos_id = port_cos[q];
 			dlb2->cos_ports[port_cos[q]]++;
 		}
+	}
 }
 
 static void
@@ -847,10 +848,11 @@  dlb2_hw_create_sched_domain(struct dlb2_eventdev *dlb2,
 	}
 
 	cfg->cos_strict = 0; /* Best effort */
-	cfg->num_cos_ldb_ports[0] = resources_asked->num_ldb_ports - cos_ports;
+	cfg->num_cos_ldb_ports[0] = dlb2->cos_ports[0];
 	cfg->num_cos_ldb_ports[1] = dlb2->cos_ports[1];
 	cfg->num_cos_ldb_ports[2] = dlb2->cos_ports[2];
 	cfg->num_cos_ldb_ports[3] = dlb2->cos_ports[3];
+	cfg->num_ldb_ports = resources_asked->num_ldb_ports - cos_ports;
 
 	if (device_version == DLB2_HW_V2)
 		cfg->num_ldb_credits = resources_asked->num_ldb_credits;
@@ -4762,7 +4764,6 @@  dlb2_parse_params(const char *params,
 					     DLB2_NUM_DIR_CREDITS,
 					     DEV_ID_ARG,
 					     DLB2_QID_DEPTH_THRESH_ARG,
-					     DLB2_COS_ARG,
 					     DLB2_POLL_INTERVAL_ARG,
 					     DLB2_SW_CREDIT_QUANTA_ARG,
 					     DLB2_HW_CREDIT_QUANTA_ARG,
diff --git a/drivers/event/dlb2/dlb2_priv.h b/drivers/event/dlb2/dlb2_priv.h
index 9ef5bcb901..085dcf9bdc 100644
--- a/drivers/event/dlb2/dlb2_priv.h
+++ b/drivers/event/dlb2/dlb2_priv.h
@@ -40,7 +40,6 @@ 
 #define DLB2_NUM_DIR_CREDITS "num_dir_credits"
 #define DEV_ID_ARG "dev_id"
 #define DLB2_QID_DEPTH_THRESH_ARG "qid_depth_thresh"
-#define DLB2_COS_ARG "cos"
 #define DLB2_POLL_INTERVAL_ARG "poll_interval"
 #define DLB2_SW_CREDIT_QUANTA_ARG "sw_credit_quanta"
 #define DLB2_HW_CREDIT_QUANTA_ARG "hw_credit_quanta"
@@ -421,7 +420,7 @@  struct dlb2_config {
 };
 
 enum dlb2_cos {
-	DLB2_COS_DEFAULT = -1,
+	DLB2_COS_DEFAULT = 255,
 	DLB2_COS_0 = 0,
 	DLB2_COS_1,
 	DLB2_COS_2,
@@ -661,7 +660,6 @@  struct dlb2_devargs {
 	int num_dir_credits_override;
 	int dev_id;
 	struct dlb2_qid_depth_thresholds qid_depth_thresholds;
-	enum dlb2_cos cos_id;
 	int poll_interval;
 	int sw_credit_quanta;
 	int hw_credit_quanta;
diff --git a/drivers/event/dlb2/dlb2_user.h b/drivers/event/dlb2/dlb2_user.h
index 28c6aaaf43..8739e2a5ac 100644
--- a/drivers/event/dlb2/dlb2_user.h
+++ b/drivers/event/dlb2/dlb2_user.h
@@ -450,7 +450,7 @@  struct dlb2_create_dir_queue_args {
  * - num_hist_list_entries: Number of history list entries. This must be
  *	greater than or equal cq_depth.
  * - cos_id: class-of-service to allocate this port from. Must be between 0 and
- *	3, inclusive.
+ *	3, inclusive. Should be 255 if default.
  * - cos_strict: If set, return an error if there are no available ports in the
  *	requested class-of-service. Else, allocate the port from a different
  *	class-of-service if the requested class has no available ports.
diff --git a/drivers/event/dlb2/pf/base/dlb2_hw_types.h b/drivers/event/dlb2/pf/base/dlb2_hw_types.h
index 87996ef621..be09363893 100644
--- a/drivers/event/dlb2/pf/base/dlb2_hw_types.h
+++ b/drivers/event/dlb2/pf/base/dlb2_hw_types.h
@@ -351,7 +351,7 @@  struct dlb2_hw {
 	int prod_core_list[RTE_MAX_LCORE];
 	u8 num_prod_cores;
 	int dir_pp_allocations[DLB2_MAX_NUM_DIR_PORTS_V2_5];
-	int ldb_pp_allocations[DLB2_MAX_NUM_LDB_PORTS];
+	int ldb_pp_allocations[DLB2_MAX_NUM_LDB_PORTS + DLB2_NUM_COS_DOMAINS];
 
 	/* Virtualization */
 	int virt_mode;
diff --git a/drivers/event/dlb2/pf/base/dlb2_resource.c b/drivers/event/dlb2/pf/base/dlb2_resource.c
index 280a8e51b1..005447f471 100644
--- a/drivers/event/dlb2/pf/base/dlb2_resource.c
+++ b/drivers/event/dlb2/pf/base/dlb2_resource.c
@@ -577,11 +577,14 @@  static int dlb2_attach_ldb_ports(struct dlb2_hw *hw,
 	/* Allocate num_ldb_ports from any class-of-service */
 	for (i = 0; i < args->num_ldb_ports; i++) {
 		for (j = 0; j < DLB2_NUM_COS_DOMAINS; j++) {
+			/* Allocate from best performing cos */
+			u32 cos_idx = j + DLB2_MAX_NUM_LDB_PORTS;
+			u32 cos_id = hw->ldb_pp_allocations[cos_idx];
 			ret = __dlb2_attach_ldb_ports(hw,
 						      rsrcs,
 						      domain,
 						      1,
-						      j,
+						      cos_id,
 						      resp);
 			if (ret == 0)
 				break;
@@ -819,30 +822,38 @@  static int dlb2_pp_cycle_comp(const void *a, const void *b)
 
 /* Probe producer ports from different CPU cores */
 static void
-dlb2_get_pp_allocation(struct dlb2_hw *hw, int cpu, int port_type, int cos_id)
+dlb2_get_pp_allocation(struct dlb2_hw *hw, int cpu, int port_type)
 {
+	struct dlb2_pp_thread_data dlb2_thread_data[DLB2_MAX_NUM_DIR_PORTS_V2_5];
 	struct dlb2_dev *dlb2_dev = container_of(hw, struct dlb2_dev, hw);
-	int i, err, ver = DLB2_HW_DEVICE_FROM_PCI_ID(dlb2_dev->pdev);
+	struct dlb2_pp_thread_data cos_cycles[DLB2_NUM_COS_DOMAINS];
+	int ver = DLB2_HW_DEVICE_FROM_PCI_ID(dlb2_dev->pdev);
+	int num_ports_per_sort, num_ports, num_sort, i, err;
 	bool is_ldb = (port_type == DLB2_LDB_PORT);
-	int num_ports = is_ldb ? DLB2_MAX_NUM_LDB_PORTS :
-	DLB2_MAX_NUM_DIR_PORTS(ver);
-	struct dlb2_pp_thread_data dlb2_thread_data[num_ports];
-	int *port_allocations = is_ldb ? hw->ldb_pp_allocations :
-					 hw->dir_pp_allocations;
-	int num_sort = is_ldb ? DLB2_NUM_COS_DOMAINS : 1;
-	struct dlb2_pp_thread_data cos_cycles[num_sort];
-	int num_ports_per_sort = num_ports / num_sort;
+	int *port_allocations;
 	pthread_t pthread;
 
+	if (is_ldb) {
+		port_allocations = hw->ldb_pp_allocations;
+		num_ports = DLB2_MAX_NUM_LDB_PORTS;
+		num_sort = DLB2_NUM_COS_DOMAINS;
+	} else {
+		port_allocations = hw->dir_pp_allocations;
+		num_ports = DLB2_MAX_NUM_DIR_PORTS(ver);
+		num_sort = 1;
+	}
+
+	num_ports_per_sort = num_ports / num_sort;
+
 	dlb2_dev->enqueue_four = dlb2_movdir64b;
 
 	DLB2_LOG_INFO(" for %s: cpu core used in pp profiling: %d\n",
 		      is_ldb ? "LDB" : "DIR", cpu);
 
 	memset(cos_cycles, 0, num_sort * sizeof(struct dlb2_pp_thread_data));
-	for (i = 0; i < num_ports; i++) {
-		int cos = is_ldb ? (i >> DLB2_NUM_COS_DOMAINS) : 0;
 
+	for (i = 0; i < num_ports; i++) {
+		int cos = (i >> DLB2_NUM_COS_DOMAINS) % DLB2_NUM_COS_DOMAINS;
 		dlb2_thread_data[i].is_ldb = is_ldb;
 		dlb2_thread_data[i].pp = i;
 		dlb2_thread_data[i].cycles = 0;
@@ -861,12 +872,17 @@  dlb2_get_pp_allocation(struct dlb2_hw *hw, int cpu, int port_type, int cos_id)
 			DLB2_LOG_ERR(": thread join failed! err=%d", err);
 			return;
 		}
-		cos_cycles[cos].cycles += dlb2_thread_data[i].cycles;
+
+		if (is_ldb)
+			cos_cycles[cos].cycles += dlb2_thread_data[i].cycles;
 
 		if ((i + 1) % num_ports_per_sort == 0) {
-			int index = cos * num_ports_per_sort;
+			int index = 0;
 
-			cos_cycles[cos].pp = index;
+			if (is_ldb) {
+				cos_cycles[cos].pp = cos;
+				index = cos * num_ports_per_sort;
+			}
 			/*
 			 * For LDB ports first sort with in a cos. Later sort
 			 * the best cos based on total cycles for the cos.
@@ -880,35 +896,31 @@  dlb2_get_pp_allocation(struct dlb2_hw *hw, int cpu, int port_type, int cos_id)
 	}
 
 	/*
-	 * Re-arrange best ports by cos if default cos is used.
+	 * Sort by best cos aggregated over all ports per cos
+	 * Note: After DLB2_MAX_NUM_LDB_PORTS sorted cos is stored and so'pp'
+	 * is cos_id and not port id.
 	 */
-	if (is_ldb && cos_id == DLB2_COS_DEFAULT)
-		qsort(cos_cycles, num_sort,
-		      sizeof(struct dlb2_pp_thread_data),
+	if (is_ldb) {
+		qsort(cos_cycles, num_sort, sizeof(struct dlb2_pp_thread_data),
 		      dlb2_pp_cycle_comp);
+		for (i = 0; i < DLB2_NUM_COS_DOMAINS; i++)
+			port_allocations[i + DLB2_MAX_NUM_LDB_PORTS] = cos_cycles[i].pp;
+	}
 
 	for (i = 0; i < num_ports; i++) {
-		int start = is_ldb ? cos_cycles[i / num_ports_per_sort].pp : 0;
-		int index = i % num_ports_per_sort;
-
-		port_allocations[i] = dlb2_thread_data[start + index].pp;
+		port_allocations[i] = dlb2_thread_data[i].pp;
 		DLB2_LOG_INFO(": pp %d cycles %d", port_allocations[i],
-			     dlb2_thread_data[start + index].cycles);
+			      dlb2_thread_data[i].cycles);
 	}
+
 }
 
 int
 dlb2_resource_probe(struct dlb2_hw *hw, const void *probe_args)
 {
 	const struct dlb2_devargs *args = (const struct dlb2_devargs *)probe_args;
-	const char *mask = NULL;
-	int cpu = 0, cnt = 0, cores[RTE_MAX_LCORE];
-	int i, cos_id = DLB2_COS_DEFAULT;
-
-	if (args) {
-		mask = (const char *)args->producer_coremask;
-		cos_id = args->cos_id;
-	}
+	const char *mask = args ? args->producer_coremask : NULL;
+	int cpu = 0, cnt = 0, cores[RTE_MAX_LCORE], i;
 
 	if (mask && rte_eal_parse_coremask(mask, cores)) {
 		DLB2_LOG_ERR(": Invalid producer coremask=%s", mask);
@@ -917,16 +929,15 @@  dlb2_resource_probe(struct dlb2_hw *hw, const void *probe_args)
 
 	hw->num_prod_cores = 0;
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		bool is_pcore = (mask && cores[i] != -1);
 		if (rte_lcore_is_enabled(i)) {
-			if (mask) {
+			if (is_pcore) {
 				/*
 				 * Populate the producer cores from parsed
 				 * coremask
 				 */
-				if (cores[i] != -1) {
-					hw->prod_core_list[cores[i]] = i;
-					hw->num_prod_cores++;
-				}
+				hw->prod_core_list[cores[i]] = i;
+				hw->num_prod_cores++;
 			} else if ((++cnt == DLB2_EAL_PROBE_CORE ||
 			   rte_lcore_count() < DLB2_EAL_PROBE_CORE)) {
 				/*
@@ -936,14 +947,18 @@  dlb2_resource_probe(struct dlb2_hw *hw, const void *probe_args)
 				cpu = i;
 				break;
 			}
+		} else if (is_pcore) {
+			DLB2_LOG_ERR("Producer coremask(%s) must be a subset of EAL coremask",
+				     mask);
+			return -1;
 		}
 	}
 	/* Use the first core in producer coremask to probe */
 	if (hw->num_prod_cores)
 		cpu = hw->prod_core_list[0];
 
-	dlb2_get_pp_allocation(hw, cpu, DLB2_LDB_PORT, cos_id);
-	dlb2_get_pp_allocation(hw, cpu, DLB2_DIR_PORT, DLB2_COS_DEFAULT);
+	dlb2_get_pp_allocation(hw, cpu, DLB2_LDB_PORT);
+	dlb2_get_pp_allocation(hw, cpu, DLB2_DIR_PORT);
 
 	return 0;
 }
@@ -4543,7 +4558,8 @@  dlb2_verify_create_ldb_port_args(struct dlb2_hw *hw,
 		return -EINVAL;
 	}
 
-	if (args->cos_id >= DLB2_NUM_COS_DOMAINS) {
+	if (args->cos_id >= DLB2_NUM_COS_DOMAINS &&
+	    (args->cos_id != DLB2_COS_DEFAULT || args->cos_strict)) {
 		resp->status = DLB2_ST_INVALID_COS_ID;
 		return -EINVAL;
 	}
@@ -4554,7 +4570,13 @@  dlb2_verify_create_ldb_port_args(struct dlb2_hw *hw,
 					  typeof(*port));
 	} else {
 		for (i = 0; i < DLB2_NUM_COS_DOMAINS; i++) {
-			id = (args->cos_id + i) % DLB2_NUM_COS_DOMAINS;
+			if (args->cos_id == DLB2_COS_DEFAULT) {
+				/* Allocate from best performing cos */
+				u32 cos_idx = i + DLB2_MAX_NUM_LDB_PORTS;
+				id = hw->ldb_pp_allocations[cos_idx];
+			} else {
+				id = (args->cos_id + i) % DLB2_NUM_COS_DOMAINS;
+			}
 
 			port = DLB2_DOM_LIST_HEAD(domain->avail_ldb_ports[id],
 						  typeof(*port));