[v2] event/dlb2: add CQ weight support

Message ID 20220410224755.1524117-1-timothy.mcdaniel@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series [v2] event/dlb2: add CQ weight support |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation warning apply issues
ci/iol-testing warning apply patch failure

Commit Message

Timothy McDaniel April 10, 2022, 10:47 p.m. UTC
  Enabling the weight limit on a CQ allows the enqueued QEs' 2-bit weight
value (representing weights of 1, 2, 4, and 8) to factor into whether a
CQ is full. If the sum of the weights of the QEs in the CQ meet or exceed
its weight limit, DLB will stop scheduling QEs to it (until software pops
enough QEs from the CQ to reverse that).

CQ weight support is enabled via the command line, and applies to
DLB 2.5 (and above) load balanced ports. The DLB2 documentation will
be updated with further details.

Signed-off-by: Timothy McDaniel <timothy.mcdaniel@intel.com>
---
Depends-on: patch-109544("event/dlb2: add support for single 512B write of 4 QEs")

V2: Added
* Added patch dependency line in commit message
---
 drivers/event/dlb2/dlb2.c                  | 109 +++++++++-
 drivers/event/dlb2/dlb2_iface.c            |   4 +
 drivers/event/dlb2/dlb2_iface.h            |   4 +
 drivers/event/dlb2/dlb2_priv.h             |  10 +-
 drivers/event/dlb2/dlb2_user.h             |  30 +++
 drivers/event/dlb2/pf/base/dlb2_resource.c | 221 +++++++++++++++++++++
 drivers/event/dlb2/pf/base/dlb2_resource.h |  33 +++
 drivers/event/dlb2/pf/dlb2_pf.c            |  68 +++++++
 8 files changed, 475 insertions(+), 4 deletions(-)
  

Patch

diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c
index e2a5303310..9bac92c7b5 100644
--- a/drivers/event/dlb2/dlb2.c
+++ b/drivers/event/dlb2/dlb2.c
@@ -106,6 +106,16 @@  dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
 	}
 }
 
+/* override defaults with value(s) provided on command line */
+static void
+dlb2_init_cq_weight(struct dlb2_eventdev *dlb2, int *cq_weight)
+{
+	int q;
+
+	for (q = 0; q < DLB2_MAX_NUM_PORTS_ALL; q++)
+		dlb2->ev_ports[q].cq_weight = cq_weight[q];
+}
+
 static int
 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
 {
@@ -546,6 +556,53 @@  set_qid_depth_thresh_v2_5(const char *key __rte_unused,
 	return 0;
 }
 
+static int
+set_cq_weight(const char *key __rte_unused,
+	      const char *value,
+	      void *opaque)
+{
+	struct dlb2_cq_weight *cq_weight = opaque;
+	int first, last, weight, i;
+
+	if (value == NULL || opaque == NULL) {
+		DLB2_LOG_ERR("NULL pointer\n");
+		return -EINVAL;
+	}
+
+	/* command line override may take one of the following 3 forms:
+	 * qid_depth_thresh=all:<threshold_value> ... all queues
+	 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
+	 * qid_depth_thresh=qid:<threshold_value> ... just one queue
+	 */
+	if (sscanf(value, "all:%d", &weight) == 1) {
+		first = 0;
+		last = DLB2_MAX_NUM_LDB_PORTS - 1;
+	} else if (sscanf(value, "%d-%d:%d", &first, &last, &weight) == 3) {
+		/* we have everything we need */
+	} else if (sscanf(value, "%d:%d", &first, &weight) == 2) {
+		last = first;
+	} else {
+		DLB2_LOG_ERR("Error parsing ldb port qe weight devarg. Should be all:val, qid-qid:val, or qid:val\n");
+		return -EINVAL;
+	}
+
+	if (first > last || first < 0 ||
+		last >= DLB2_MAX_NUM_LDB_PORTS) {
+		DLB2_LOG_ERR("Error parsing ldb port qe weight arg, invalid port value\n");
+		return -EINVAL;
+	}
+
+	if (weight < 0 || weight > DLB2_MAX_CQ_DEPTH_OVERRIDE) {
+		DLB2_LOG_ERR("Error parsing ldb port qe weight devarg, must be < cq depth\n");
+		return -EINVAL;
+	}
+
+	for (i = first; i <= last; i++)
+		cq_weight->limit[i] = weight; /* indexed by qid */
+
+	return 0;
+}
+
 static void
 dlb2_eventdev_info_get(struct rte_eventdev *dev,
 		       struct rte_event_dev_info *dev_info)
@@ -1366,7 +1423,14 @@  dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
 		return -EINVAL;
 
 	if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
-		DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
+		DLB2_LOG_ERR("dlb2: invalid cq depth, must be at least %d\n",
+			     DLB2_MIN_CQ_DEPTH);
+		return -EINVAL;
+	}
+
+	if (dlb2->version == DLB2_HW_V2 && ev_port->cq_weight != 0 &&
+	    ev_port->cq_weight > dequeue_depth) {
+		DLB2_LOG_ERR("dlb2: invalid cq depth, must be >= cq weight%d\n",
 			     DLB2_MIN_CQ_DEPTH);
 		return -EINVAL;
 	}
@@ -1444,8 +1508,24 @@  dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
 	if (dlb2->version == DLB2_HW_V2) {
 		qm_port->cached_ldb_credits = 0;
 		qm_port->cached_dir_credits = 0;
-	} else
+		if (ev_port->cq_weight) {
+			struct dlb2_enable_cq_weight_args cq_weight_args = {0};
+
+			cq_weight_args.port_id = qm_port->id;
+			cq_weight_args.limit = ev_port->cq_weight;
+			ret = dlb2_iface_enable_cq_weight(handle, &cq_weight_args);
+			if (ret < 0) {
+				DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
+					ret,
+					dlb2_error_strings[cfg.response.  status]);
+				goto error_exit;
+			}
+		}
+		qm_port->cq_weight = ev_port->cq_weight;
+	} else {
 		qm_port->cached_credits = 0;
+		qm_port->cq_weight = 0;
+	}
 
 	/* CQs with depth < 8 use an 8-entry queue, but withhold credits so
 	 * the effective depth is smaller.
@@ -2843,6 +2923,14 @@  dlb2_event_build_hcws(struct dlb2_port *qm_port,
 			qe[2].data = ev[2].u64;
 			qe[3].data = ev[3].u64;
 #endif
+			/* will only be set for DLB 2.5 + */
+			if (qm_port->cq_weight) {
+				qe[0].weight = ev[0].impl_opaque & 3;
+				qe[1].weight = ev[1].impl_opaque & 3;
+				qe[2].weight = ev[2].impl_opaque & 3;
+				qe[3].weight = ev[3].impl_opaque & 3;
+			}
+
 
 		break;
 	case 3:
@@ -4639,6 +4727,9 @@  dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
 	dlb2_init_queue_depth_thresholds(dlb2,
 					 dlb2_args->qid_depth_thresholds.val);
 
+	dlb2_init_cq_weight(dlb2,
+			    dlb2_args->cq_weight.limit);
+
 	return 0;
 }
 
@@ -4693,6 +4784,7 @@  dlb2_parse_params(const char *params,
 					     DLB2_DEPTH_THRESH_ARG,
 					     DLB2_VECTOR_OPTS_ENAB_ARG,
 					     DLB2_MAX_CQ_DEPTH,
+					     DLB2_CQ_WEIGHT,
 					     NULL };
 
 	if (params != NULL && params[0] != '\0') {
@@ -4833,7 +4925,18 @@  dlb2_parse_params(const char *params,
 					set_max_cq_depth,
 					&dlb2_args->max_cq_depth);
 			if (ret != 0) {
-				DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
+				DLB2_LOG_ERR("%s: Error parsing max cq depth",
+					     name);
+				rte_kvargs_free(kvlist);
+				return ret;
+			}
+
+			ret = rte_kvargs_process(kvlist,
+					DLB2_CQ_WEIGHT,
+					set_cq_weight,
+					&dlb2_args->cq_weight);
+			if (ret != 0) {
+				DLB2_LOG_ERR("%s: Error parsing cq weight on",
 					     name);
 				rte_kvargs_free(kvlist);
 				return ret;
diff --git a/drivers/event/dlb2/dlb2_iface.c b/drivers/event/dlb2/dlb2_iface.c
index 5471dd8da7..7913525e0f 100644
--- a/drivers/event/dlb2/dlb2_iface.c
+++ b/drivers/event/dlb2/dlb2_iface.c
@@ -72,3 +72,7 @@  int (*dlb2_iface_get_ldb_queue_depth)(struct dlb2_hw_dev *handle,
 
 int (*dlb2_iface_get_dir_queue_depth)(struct dlb2_hw_dev *handle,
 				struct dlb2_get_dir_queue_depth_args *args);
+
+int (*dlb2_iface_enable_cq_weight)(struct dlb2_hw_dev *handle,
+				   struct dlb2_enable_cq_weight_args *args);
+
diff --git a/drivers/event/dlb2/dlb2_iface.h b/drivers/event/dlb2/dlb2_iface.h
index b508eb0936..3aace0ba19 100644
--- a/drivers/event/dlb2/dlb2_iface.h
+++ b/drivers/event/dlb2/dlb2_iface.h
@@ -71,4 +71,8 @@  extern int (*dlb2_iface_get_ldb_queue_depth)(struct dlb2_hw_dev *handle,
 
 extern int (*dlb2_iface_get_dir_queue_depth)(struct dlb2_hw_dev *handle,
 				struct dlb2_get_dir_queue_depth_args *args);
+
+extern int (*dlb2_iface_enable_cq_weight)(struct dlb2_hw_dev *handle,
+					  struct dlb2_enable_cq_weight_args *args);
+
 #endif /* _DLB2_IFACE_H_ */
diff --git a/drivers/event/dlb2/dlb2_priv.h b/drivers/event/dlb2/dlb2_priv.h
index 3e47e4776b..6084fc4183 100644
--- a/drivers/event/dlb2/dlb2_priv.h
+++ b/drivers/event/dlb2/dlb2_priv.h
@@ -44,6 +44,7 @@ 
 #define DLB2_DEPTH_THRESH_ARG "default_depth_thresh"
 #define DLB2_VECTOR_OPTS_ENAB_ARG "vector_opts_enable"
 #define DLB2_MAX_CQ_DEPTH "max_cq_depth"
+#define DLB2_CQ_WEIGHT "cq_weight"
 
 /* Begin HW related defines and structs */
 
@@ -249,7 +250,7 @@  struct dlb2_enqueue_qe {
 	/* Word 4 */
 	uint16_t lock_id;
 	uint8_t meas_lat:1;
-	uint8_t rsvd1:2;
+	uint8_t weight:2; /* DLB 2.5 and above */
 	uint8_t no_dec:1;
 	uint8_t cmp_id:4;
 	union {
@@ -377,6 +378,7 @@  struct dlb2_port {
 	struct dlb2_eventdev_port *ev_port; /* back ptr */
 	bool use_scalar; /* force usage of scalar code */
 	uint16_t hw_credit_quanta;
+	uint32_t cq_weight;
 };
 
 /* Per-process per-port mmio and memory pointers */
@@ -524,6 +526,7 @@  struct dlb2_eventdev_port {
 	/* enq_configured is set when the qm port is created */
 	bool enq_configured;
 	uint8_t implicit_release; /* release events before dequeuing */
+	uint32_t cq_weight; /* DLB2.5 and above ldb ports only */
 }  __rte_cache_aligned;
 
 struct dlb2_queue {
@@ -625,6 +628,10 @@  struct dlb2_qid_depth_thresholds {
 	int val[DLB2_MAX_NUM_QUEUES_ALL];
 };
 
+struct dlb2_cq_weight {
+	int limit[DLB2_MAX_NUM_LDB_PORTS];
+};
+
 struct dlb2_devargs {
 	int socket_id;
 	int max_num_events;
@@ -638,6 +645,7 @@  struct dlb2_devargs {
 	int default_depth_thresh;
 	bool vector_opts_enabled;
 	int max_cq_depth;
+	struct dlb2_cq_weight cq_weight;
 };
 
 /* End Eventdev related defines and structs */
diff --git a/drivers/event/dlb2/dlb2_user.h b/drivers/event/dlb2/dlb2_user.h
index 9760e9bda6..c67e64bdd5 100644
--- a/drivers/event/dlb2/dlb2_user.h
+++ b/drivers/event/dlb2/dlb2_user.h
@@ -47,6 +47,8 @@  enum dlb2_error {
 	DLB2_ST_NO_MEMORY,
 	DLB2_ST_INVALID_LOCK_ID_COMP_LEVEL,
 	DLB2_ST_INVALID_COS_ID,
+	DLB2_ST_INVALID_CQ_WEIGHT_LIMIT,
+	DLB2_ST_FEATURE_UNAVAILABLE,
 };
 
 static const char dlb2_error_strings[][128] = {
@@ -87,6 +89,9 @@  static const char dlb2_error_strings[][128] = {
 	"DLB2_ST_NO_MEMORY",
 	"DLB2_ST_INVALID_LOCK_ID_COMP_LEVEL",
 	"DLB2_ST_INVALID_COS_ID",
+	"DLB2_ST_INVALID_CQ_WEIGHT_LIMIT",
+	"DLB2_ST_FEATURE_UNAVAILABLE",
+
 };
 
 struct dlb2_cmd_response {
@@ -687,6 +692,31 @@  struct dlb2_pending_port_unmaps_args {
 	__u32 padding0;
 };
 
+/*
+ * DLB2_DOMAIN_CMD_ENABLE_CQ_WEIGHT: Enable QE-weight based scheduling on a
+ *      load-balanced port's CQ and configures the CQ's weight limit.
+ *
+ *      This must be called after creating the port but before starting the
+ *      domain. The QE weight limit must be non-zero and cannot exceed the
+ *      CQ's depth.
+ *
+ * Input parameters:
+ * - port_id: Load-balanced port ID.
+ * - limit: QE weight limit.
+ *
+ * Output parameters:
+ * - response.status: Detailed error code. In certain cases, such as if the
+ *      ioctl request arg is invalid, the driver won't set status.
+ * - response.id: number of unmaps in progress.
+ */
+struct dlb2_enable_cq_weight_args {
+	/* Output parameters */
+	struct dlb2_cmd_response response;
+	/* Input parameters */
+	__u32 port_id;
+	__u32 limit;
+};
+
 /*
  * Mapping sizes for memory mapping the consumer queue (CQ) memory space, and
  * producer port (PP) MMIO space.
diff --git a/drivers/event/dlb2/pf/base/dlb2_resource.c b/drivers/event/dlb2/pf/base/dlb2_resource.c
index d4c49c2992..54222d732c 100644
--- a/drivers/event/dlb2/pf/base/dlb2_resource.c
+++ b/drivers/event/dlb2/pf/base/dlb2_resource.c
@@ -6246,3 +6246,224 @@  int dlb2_set_group_sequence_numbers(struct dlb2_hw *hw,
 	return 0;
 }
 
+/**
+ * dlb2_hw_set_qe_arbiter_weights() - program QE arbiter weights
+ * @hw: dlb2_hw handle for a particular device.
+ * @weight: 8-entry array of arbiter weights.
+ *
+ * weight[N] programs priority N's weight. In cases where the 8 priorities are
+ * reduced to 4 bins, the mapping is:
+ * - weight[1] programs bin 0
+ * - weight[3] programs bin 1
+ * - weight[5] programs bin 2
+ * - weight[7] programs bin 3
+ */
+void dlb2_hw_set_qe_arbiter_weights(struct dlb2_hw *hw, u8 weight[8])
+{
+	u32 reg = 0;
+
+	DLB2_BITS_SET(reg, weight[1], DLB2_ATM_CFG_ARB_WEIGHTS_RDY_BIN_BIN0);
+	DLB2_BITS_SET(reg, weight[3], DLB2_ATM_CFG_ARB_WEIGHTS_RDY_BIN_BIN1);
+	DLB2_BITS_SET(reg, weight[5], DLB2_ATM_CFG_ARB_WEIGHTS_RDY_BIN_BIN2);
+	DLB2_BITS_SET(reg, weight[7], DLB2_ATM_CFG_ARB_WEIGHTS_RDY_BIN_BIN3);
+	DLB2_CSR_WR(hw, DLB2_ATM_CFG_ARB_WEIGHTS_RDY_BIN, reg);
+
+	reg = 0;
+	DLB2_BITS_SET(reg, weight[1], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_NALB_0_PRI0);
+	DLB2_BITS_SET(reg, weight[3], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_NALB_0_PRI1);
+	DLB2_BITS_SET(reg, weight[5], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_NALB_0_PRI2);
+	DLB2_BITS_SET(reg, weight[7], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_NALB_0_PRI3);
+	DLB2_CSR_WR(hw, DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_NALB_0(hw->ver), reg);
+
+	reg = 0;
+	DLB2_BITS_SET(reg, weight[1], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_REPLAY_0_PRI0);
+	DLB2_BITS_SET(reg, weight[3], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_REPLAY_0_PRI1);
+	DLB2_BITS_SET(reg, weight[5], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_REPLAY_0_PRI2);
+	DLB2_BITS_SET(reg, weight[7], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_REPLAY_0_PRI3);
+	DLB2_CSR_WR(hw, DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_REPLAY_0(hw->ver), reg);
+
+	reg = 0;
+	DLB2_BITS_SET(reg, weight[1], DLB2_DP_CFG_ARB_WEIGHTS_TQPRI_REPLAY_0_PRI0);
+	DLB2_BITS_SET(reg, weight[3], DLB2_DP_CFG_ARB_WEIGHTS_TQPRI_REPLAY_0_PRI1);
+	DLB2_BITS_SET(reg, weight[5], DLB2_DP_CFG_ARB_WEIGHTS_TQPRI_REPLAY_0_PRI2);
+	DLB2_BITS_SET(reg, weight[7], DLB2_DP_CFG_ARB_WEIGHTS_TQPRI_REPLAY_0_PRI3);
+	DLB2_CSR_WR(hw, DLB2_DP_CFG_ARB_WEIGHTS_TQPRI_REPLAY_0, reg);
+
+	reg = 0;
+	DLB2_BITS_SET(reg, weight[1], DLB2_DP_CFG_ARB_WEIGHTS_TQPRI_DIR_0_PRI0);
+	DLB2_BITS_SET(reg, weight[3], DLB2_DP_CFG_ARB_WEIGHTS_TQPRI_DIR_0_PRI1);
+	DLB2_BITS_SET(reg, weight[5], DLB2_DP_CFG_ARB_WEIGHTS_TQPRI_DIR_0_PRI2);
+	DLB2_BITS_SET(reg, weight[7], DLB2_DP_CFG_ARB_WEIGHTS_TQPRI_DIR_0_PRI3);
+	DLB2_CSR_WR(hw, DLB2_DP_CFG_ARB_WEIGHTS_TQPRI_DIR_0, reg);
+
+	reg = 0;
+	DLB2_BITS_SET(reg, weight[1], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_ATQ_0_PRI0);
+	DLB2_BITS_SET(reg, weight[3], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_ATQ_0_PRI1);
+	DLB2_BITS_SET(reg, weight[5], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_ATQ_0_PRI2);
+	DLB2_BITS_SET(reg, weight[7], DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_ATQ_0_PRI3);
+	DLB2_CSR_WR(hw, DLB2_NALB_CFG_ARB_WEIGHTS_TQPRI_ATQ_0(hw->ver), reg);
+
+	reg = 0;
+	DLB2_BITS_SET(reg, weight[1], DLB2_ATM_CFG_ARB_WEIGHTS_SCHED_BIN_BIN0);
+	DLB2_BITS_SET(reg, weight[3], DLB2_ATM_CFG_ARB_WEIGHTS_SCHED_BIN_BIN1);
+	DLB2_BITS_SET(reg, weight[5], DLB2_ATM_CFG_ARB_WEIGHTS_SCHED_BIN_BIN2);
+	DLB2_BITS_SET(reg, weight[7], DLB2_ATM_CFG_ARB_WEIGHTS_SCHED_BIN_BIN3);
+	DLB2_CSR_WR(hw, DLB2_ATM_CFG_ARB_WEIGHTS_SCHED_BIN, reg);
+
+	reg = 0;
+	DLB2_BITS_SET(reg, weight[1], DLB2_AQED_CFG_ARB_WEIGHTS_TQPRI_ATM_0_PRI0);
+	DLB2_BITS_SET(reg, weight[3], DLB2_AQED_CFG_ARB_WEIGHTS_TQPRI_ATM_0_PRI1);
+	DLB2_BITS_SET(reg, weight[5], DLB2_AQED_CFG_ARB_WEIGHTS_TQPRI_ATM_0_PRI2);
+	DLB2_BITS_SET(reg, weight[7], DLB2_AQED_CFG_ARB_WEIGHTS_TQPRI_ATM_0_PRI3);
+	DLB2_CSR_WR(hw, DLB2_AQED_CFG_ARB_WEIGHTS_TQPRI_ATM_0, reg);
+}
+
+/**
+ * dlb2_hw_set_qid_arbiter_weights() - program QID arbiter weights
+ * @hw: dlb2_hw handle for a particular device.
+ * @weight: 8-entry array of arbiter weights.
+ *
+ * weight[N] programs priority N's weight. In cases where the 8 priorities are
+ * reduced to 4 bins, the mapping is:
+ * - weight[1] programs bin 0
+ * - weight[3] programs bin 1
+ * - weight[5] programs bin 2
+ * - weight[7] programs bin 3
+ */
+void dlb2_hw_set_qid_arbiter_weights(struct dlb2_hw *hw, u8 weight[8])
+{
+	u32 reg = 0;
+
+	DLB2_BITS_SET(reg, weight[1], DLB2_LSP_CFG_ARB_WEIGHT_LDB_QID_0_PRI0_WEIGHT);
+	DLB2_BITS_SET(reg, weight[3], DLB2_LSP_CFG_ARB_WEIGHT_LDB_QID_0_PRI1_WEIGHT);
+	DLB2_BITS_SET(reg, weight[5], DLB2_LSP_CFG_ARB_WEIGHT_LDB_QID_0_PRI2_WEIGHT);
+	DLB2_BITS_SET(reg, weight[7], DLB2_LSP_CFG_ARB_WEIGHT_LDB_QID_0_PRI3_WEIGHT);
+	DLB2_CSR_WR(hw, DLB2_LSP_CFG_ARB_WEIGHT_LDB_QID_0(hw->ver), reg);
+
+	reg = 0;
+	DLB2_BITS_SET(reg, weight[1], DLB2_LSP_CFG_ARB_WEIGHT_ATM_NALB_QID_0_PRI0_WEIGHT);
+	DLB2_BITS_SET(reg, weight[3], DLB2_LSP_CFG_ARB_WEIGHT_ATM_NALB_QID_0_PRI1_WEIGHT);
+	DLB2_BITS_SET(reg, weight[5], DLB2_LSP_CFG_ARB_WEIGHT_ATM_NALB_QID_0_PRI2_WEIGHT);
+	DLB2_BITS_SET(reg, weight[7], DLB2_LSP_CFG_ARB_WEIGHT_ATM_NALB_QID_0_PRI3_WEIGHT);
+	DLB2_CSR_WR(hw, DLB2_LSP_CFG_ARB_WEIGHT_ATM_NALB_QID_0(hw->ver), reg);
+}
+
+static void dlb2_log_enable_cq_weight(struct dlb2_hw *hw,
+				      u32 domain_id,
+				      struct dlb2_enable_cq_weight_args *args,
+				      bool vdev_req,
+				      unsigned int vdev_id)
+{
+	DLB2_HW_DBG(hw, "DLB2 enable CQ weight arguments:\n");
+	if (vdev_req)
+		DLB2_HW_DBG(hw, "(Request from vdev %d)\n", vdev_id);
+		DLB2_HW_DBG(hw, "\tDomain ID: %d\n",
+			    domain_id);
+		DLB2_HW_DBG(hw, "\tPort ID:   %d\n",
+			    args->port_id);
+		DLB2_HW_DBG(hw, "\tLimit:   %d\n",
+			    args->limit);
+}
+
+static int
+dlb2_verify_enable_cq_weight_args(struct dlb2_hw *hw,
+				  u32 domain_id,
+				  struct dlb2_enable_cq_weight_args *args,
+				  struct dlb2_cmd_response *resp,
+				  bool vdev_req,
+				  unsigned int vdev_id)
+{
+	struct dlb2_hw_domain *domain;
+	struct dlb2_ldb_port *port;
+
+	if (hw->ver == DLB2_HW_V2) {
+		resp->status = DLB2_ST_FEATURE_UNAVAILABLE;
+		return -EINVAL;
+	}
+
+	domain = dlb2_get_domain_from_id(hw, domain_id, vdev_req, vdev_id);
+
+	if (!domain) {
+		resp->status = DLB2_ST_INVALID_DOMAIN_ID;
+		return -EINVAL;
+	}
+
+	if (!domain->configured) {
+		resp->status = DLB2_ST_DOMAIN_NOT_CONFIGURED;
+		return -EINVAL;
+	}
+
+	if (domain->started) {
+		resp->status = DLB2_ST_DOMAIN_STARTED;
+		return -EINVAL;
+	}
+
+	port = dlb2_get_domain_used_ldb_port(args->port_id, vdev_req, domain);
+	if (!port || !port->configured) {
+		resp->status = DLB2_ST_INVALID_PORT_ID;
+		return -EINVAL;
+	}
+
+	if (args->limit == 0 || args->limit > port->cq_depth) {
+		resp->status = DLB2_ST_INVALID_CQ_WEIGHT_LIMIT;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int dlb2_hw_enable_cq_weight(struct dlb2_hw *hw,
+			     u32 domain_id,
+			     struct dlb2_enable_cq_weight_args *args,
+			     struct dlb2_cmd_response *resp,
+			     bool vdev_req,
+			     unsigned int vdev_id)
+{
+	struct dlb2_hw_domain *domain;
+	struct dlb2_ldb_port *port;
+	int ret, id;
+	u32 reg = 0;
+
+	dlb2_log_enable_cq_weight(hw, domain_id, args, vdev_req, vdev_id);
+
+	/*
+	 * Verify that hardware resources are available before attempting to
+	 * satisfy the request. This simplifies the error unwinding code.
+	 */
+	ret = dlb2_verify_enable_cq_weight_args(hw,
+						domain_id,
+						args,
+						resp,
+						vdev_req,
+						vdev_id);
+	if (ret)
+		return ret;
+
+	domain = dlb2_get_domain_from_id(hw, domain_id, vdev_req, vdev_id);
+	if (!domain) {
+		DLB2_HW_ERR(hw,
+			    "[%s():%d] Internal error: domain not found\n",
+			    __func__, __LINE__);
+		return -EFAULT;
+	}
+
+	id = args->port_id;
+
+	port = dlb2_get_domain_used_ldb_port(id, vdev_req, domain);
+	if (!port) {
+		DLB2_HW_ERR(hw,
+			    "[%s():	%d] Internal error: port not found\n",
+			    __func__, __LINE__);
+		return -EFAULT;
+	}
+
+	DLB2_BIT_SET(reg, DLB2_LSP_CFG_CQ_LDB_WU_LIMIT_V);
+	DLB2_BITS_SET(reg, args->limit, DLB2_LSP_CFG_CQ_LDB_WU_LIMIT_LIMIT);
+
+	DLB2_CSR_WR(hw, DLB2_LSP_CFG_CQ_LDB_WU_LIMIT(port->id.phys_id), reg);
+
+	resp->status = 0;
+
+	return 0;
+}
+
diff --git a/drivers/event/dlb2/pf/base/dlb2_resource.h b/drivers/event/dlb2/pf/base/dlb2_resource.h
index 684049cd6e..a7e6c90888 100644
--- a/drivers/event/dlb2/pf/base/dlb2_resource.h
+++ b/drivers/event/dlb2/pf/base/dlb2_resource.h
@@ -1910,4 +1910,37 @@  void dlb2_hw_dir_cq_interrupt_set_mode(struct dlb2_hw *hw,
 				       int port_id,
 				       int mode);
 
+/**
+ * dlb2_hw_enable_cq_weight() - Enable QE-weight based scheduling on an LDB port.
+ * @hw: dlb2_hw handle for a particular device.
+ * @domain_id: domain ID.
+ * @args: CQ weight enablement arguments.
+ * @resp: response structure.
+ * @vdev_request: indicates whether this request came from a vdev.
+ * @vdev_id: If vdev_request is true, this contains the vdev's ID.
+ *
+ * This function enables QE-weight based scheduling on a load-balanced port's
+ * CQ and configures the CQ's weight limit.
+ *
+ * This must be called after creating the port but before starting the
+ * domain.
+ *
+ * Return:
+ * Returns 0 upon success, < 0 otherwise. If an error occurs, resp->status is
+ * assigned a detailed error code from enum dlb2_error. If successful, resp->id
+ * contains the queue ID.
+ *
+ * Errors:
+ * EINVAL - The domain or port is not configured, the domainhas already been
+ *          started, the requested limit exceeds the port's CQ depth, or this
+ *          feature is unavailable on the device.
+ * EFAULT - Internal error (resp->status not set).
+ */
+int dlb2_hw_enable_cq_weight(struct dlb2_hw *hw,
+			     u32 domain_id,
+			     struct dlb2_enable_cq_weight_args *args,
+			     struct dlb2_cmd_response *resp,
+			     bool vdev_request,
+			     unsigned int vdev_id);
+
 #endif /* __DLB2_RESOURCE_H */
diff --git a/drivers/event/dlb2/pf/dlb2_pf.c b/drivers/event/dlb2/pf/dlb2_pf.c
index 5c80c724f1..10918ff281 100644
--- a/drivers/event/dlb2/pf/dlb2_pf.c
+++ b/drivers/event/dlb2/pf/dlb2_pf.c
@@ -41,6 +41,8 @@ 
 #include "base/dlb2_resource.h"
 
 static const char *event_dlb2_pf_name = RTE_STR(EVDEV_DLB2_NAME_PMD);
+static unsigned int dlb2_qe_sa_pct = 1;
+static unsigned int dlb2_qid_sa_pct;
 
 static void
 dlb2_pf_low_level_io_init(void)
@@ -80,6 +82,26 @@  dlb2_pf_get_device_version(struct dlb2_hw_dev *handle,
 	return 0;
 }
 
+static void dlb2_pf_calc_arbiter_weights(u8 *weight,
+					 unsigned int pct)
+{
+	int val, i;
+
+	/* Largest possible weight (100% SA case): 32 */
+	val = (DLB2_MAX_WEIGHT + 1) / DLB2_NUM_ARB_WEIGHTS;
+
+	/* Scale val according to the starvation avoidance percentage */
+	val = (val * pct) / 100;
+	if (val == 0 && pct != 0)
+		val = 1;
+
+	/* Prio 7 always has weight 0xff */
+	weight[DLB2_NUM_ARB_WEIGHTS - 1] = DLB2_MAX_WEIGHT;
+
+	for (i = DLB2_NUM_ARB_WEIGHTS - 2; i >= 0; i--)
+		weight[i] = weight[i + 1] - val;
+}
+
 static void
 dlb2_pf_hardware_init(struct dlb2_hw_dev *handle)
 {
@@ -87,6 +109,27 @@  dlb2_pf_hardware_init(struct dlb2_hw_dev *handle)
 
 	dlb2_hw_enable_sparse_ldb_cq_mode(&dlb2_dev->hw);
 	dlb2_hw_enable_sparse_dir_cq_mode(&dlb2_dev->hw);
+
+	/* Configure arbitration weights for QE selection */
+	if (dlb2_qe_sa_pct <= 100) {
+		u8 weight[DLB2_NUM_ARB_WEIGHTS];
+
+		dlb2_pf_calc_arbiter_weights(weight,
+					     dlb2_qe_sa_pct);
+
+		dlb2_hw_set_qe_arbiter_weights(&dlb2_dev->hw, weight);
+	}
+
+	/* Configure arbitration weights for QID selection */
+	if (dlb2_qid_sa_pct <= 100) {
+		u8 weight[DLB2_NUM_ARB_WEIGHTS];
+
+		dlb2_pf_calc_arbiter_weights(weight,
+					     dlb2_qid_sa_pct);
+
+		dlb2_hw_set_qid_arbiter_weights(&dlb2_dev->hw, weight);
+	}
+
 }
 
 static int
@@ -578,6 +621,30 @@  dlb2_pf_get_dir_queue_depth(struct dlb2_hw_dev *handle,
 	return ret;
 }
 
+static int
+dlb2_pf_enable_cq_weight(struct dlb2_hw_dev *handle,
+			 struct dlb2_enable_cq_weight_args *args)
+{
+	struct dlb2_dev *dlb2_dev = (struct dlb2_dev *)handle->pf_dev;
+	struct dlb2_cmd_response response = {0};
+	int ret = 0;
+
+	DLB2_INFO(dev->dlb2_device, "Entering %s()\n", __func__);
+
+	ret = dlb2_hw_enable_cq_weight(&dlb2_dev->hw,
+				       handle->domain_id,
+				       args,
+				       &response,
+				       false,
+				       0);
+	args->response = response;
+
+	DLB2_INFO(dev->dlb2_device, "Exiting %s() with ret=%d\n",
+		  __func__, ret);
+
+	return ret;
+}
+
 static void
 dlb2_pf_iface_fn_ptrs_init(void)
 {
@@ -602,6 +669,7 @@  dlb2_pf_iface_fn_ptrs_init(void)
 	dlb2_iface_get_sn_allocation = dlb2_pf_get_sn_allocation;
 	dlb2_iface_set_sn_allocation = dlb2_pf_set_sn_allocation;
 	dlb2_iface_get_sn_occupancy = dlb2_pf_get_sn_occupancy;
+	dlb2_iface_enable_cq_weight = dlb2_pf_enable_cq_weight;
 }
 
 /* PCI DEV HOOKS */