[v2] event/cnxk: add WQE stashing support

Message ID 20230222053816.2247-1-pbhagavatula@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series [v2] event/cnxk: add WQE stashing support |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation fail ninja build failure
ci/iol-testing fail build patch failure
ci/Intel-compilation fail Compilation issues
ci/github-robot: build success github build: passed

Commit Message

Pavan Nikhilesh Bhagavatula Feb. 22, 2023, 5:38 a.m. UTC
  From: Pavan Nikhilesh <pbhagavatula@marvell.com>

CN10K supports stashing the scheduled WQE carried by `rte_event` to the
cores L2 Dcache.The number of cache lines to be stashed and the offset
is configurable per HWGRP i.e. event queue.

By default, stashing is enabled on queues which have been connected to
Rx adapter. Stashing improves performance by up to 6% based on the
workload. Both MBUF and NIX_RX_WQE_HDR + NIX_RX_PARSE_S are stashed.

Add device arguments to support configuring WQE stashing.
The dictionary format for device arguments is as follows
`[Qx|stash_offset|stash_length]` here the stash offset can be a
negative integer.

  For stashing mbuf on queue 0 and mbuf + headroom on queue 1
  -a 0002:1e:00.0,stash="[0|-1|1][1|-1|2]"

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
Depends-on: series-27133 ("common/cnxk: add cnf10ka A1 platform")

 v2 Changes:
 - Spellcheck and add dependency.

 doc/guides/eventdevs/cnxk.rst       | 14 ++++++
 drivers/common/cnxk/roc_api.h       |  3 ++
 drivers/common/cnxk/roc_features.h  | 13 ++++++
 drivers/common/cnxk/roc_mbox.h      | 10 +++++
 drivers/common/cnxk/roc_model.h     |  6 +++
 drivers/common/cnxk/roc_sso.c       | 49 ++++++++++++++++++++-
 drivers/common/cnxk/roc_sso.h       | 11 ++++-
 drivers/common/cnxk/version.map     |  1 +
 drivers/event/cnxk/cn10k_eventdev.c | 30 +++++++++++++
 drivers/event/cnxk/cn10k_eventdev.h |  3 ++
 drivers/event/cnxk/cnxk_eventdev.c  | 66 ++++++++++++++++++++++++++---
 drivers/event/cnxk/cnxk_eventdev.h  | 11 ++++-
 12 files changed, 207 insertions(+), 10 deletions(-)
 create mode 100644 drivers/common/cnxk/roc_features.h

--
2.25.1
  

Patch

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index a91c947c10..c3c0bfd7b5 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -102,6 +102,20 @@  Runtime Config Options

     -a 0002:0e:00.0,qos=[1-50-50]

+- ``CN10K WQE stashing support``
+
+  CN10K supports stashing the scheduled WQE carried by `rte_event` to the
+  cores L2 Dcache. The number of cache lines to be stashed and the offset
+  is configurable per HWGRP i.e. event queue. The dictionary format is as
+  follows `[Qx|stash_offset|stash_length]` here the stash offset can be
+  a negative integer.
+  By default stashing is not enabled.
+
+  For example::
+
+    For stashing mbuf on queue 0 and mbuf + headroom on queue 1
+    -a 0002:0e:00.0,stash="[0|-1|1][1|-1|2]"
+
 - ``Force Rx Back pressure``

    Force Rx back pressure when same mempool is used across ethernet device
diff --git a/drivers/common/cnxk/roc_api.h b/drivers/common/cnxk/roc_api.h
index 9d7f5417c2..993a2f7a68 100644
--- a/drivers/common/cnxk/roc_api.h
+++ b/drivers/common/cnxk/roc_api.h
@@ -47,6 +47,9 @@ 
 /* HW Errata */
 #include "roc_errata.h"

+/* HW Features */
+#include "roc_features.h"
+
 /* Mbox */
 #include "roc_mbox.h"

diff --git a/drivers/common/cnxk/roc_features.h b/drivers/common/cnxk/roc_features.h
new file mode 100644
index 0000000000..f3948af45e
--- /dev/null
+++ b/drivers/common/cnxk/roc_features.h
@@ -0,0 +1,13 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+#ifndef _ROC_FEATURES_H_
+#define _ROC_FEATURES_H_
+
+static inline bool
+roc_feature_sso_has_stash(void)
+{
+	return (roc_model_is_cn103xx() | roc_model_is_cn10ka_b0()) ? true : false;
+}
+
+#endif
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index a6091a98c1..af3c10b0b0 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -127,6 +127,8 @@  struct mbox_msghdr {
 	M(SSO_CONFIG_LSW, 0x612, ssow_config_lsw, ssow_config_lsw, msg_rsp)    \
 	M(SSO_HWS_CHNG_MSHIP, 0x613, ssow_chng_mship, ssow_chng_mship,         \
 	  msg_rsp)                                                             \
+	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
+	  sso_grp_stash_cfg, msg_rsp)                                          \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
 	  tim_lf_alloc_rsp)                                                    \
@@ -1374,6 +1376,14 @@  struct sso_grp_qos_cfg {
 	uint16_t __io iaq_thr;
 };

+struct sso_grp_stash_cfg {
+	struct mbox_msghdr hdr;
+	uint16_t __io grp;
+	uint8_t __io ena;
+	uint8_t __io offset : 4;
+	uint8_t __io num_linesm1 : 4;
+};
+
 struct sso_grp_stats {
 	struct mbox_msghdr hdr;
 	uint16_t __io grp;
diff --git a/drivers/common/cnxk/roc_model.h b/drivers/common/cnxk/roc_model.h
index 081c03915c..ba0fe577c0 100644
--- a/drivers/common/cnxk/roc_model.h
+++ b/drivers/common/cnxk/roc_model.h
@@ -244,6 +244,12 @@  roc_model_is_cn10kb(void)
 	return roc_model->flag & ROC_MODEL_CN103xx;
 }

+static inline uint64_t
+roc_model_is_cn103xx(void)
+{
+	return roc_model->flag & ROC_MODEL_CN103xx;
+}
+
 static inline bool
 roc_env_is_hw(void)
 {
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 9e466f09ef..4a6a5080f7 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -414,8 +414,7 @@  roc_sso_hwgrp_hws_link_status(struct roc_sso *roc_sso, uint8_t hws,
 }

 int
-roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso, struct roc_sso_hwgrp_qos *qos,
-			 uint8_t nb_qos)
+roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso, struct roc_sso_hwgrp_qos *qos, uint16_t nb_qos)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
 	struct dev *dev = &sso->dev;
@@ -423,6 +422,9 @@  roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso, struct roc_sso_hwgrp_qos *qos,
 	struct mbox *mbox;
 	int i, rc;

+	if (!nb_qos)
+		return 0;
+
 	mbox = mbox_get(dev->mbox);
 	for (i = 0; i < nb_qos; i++) {
 		uint8_t iaq_prcnt = qos[i].iaq_prcnt;
@@ -748,6 +750,49 @@  sso_update_msix_vec_count(struct roc_sso *roc_sso, uint16_t sso_vec_cnt)
 	return rc;
 }

+int
+roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso, struct roc_sso_hwgrp_stash *stash,
+			   uint16_t nb_stash)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_grp_stash_cfg *req;
+	struct dev *dev = &sso->dev;
+	struct mbox *mbox;
+	int i, rc;
+
+	if (!nb_stash)
+		return 0;
+
+	mbox = mbox_get(dev->mbox);
+	for (i = 0; i < nb_stash; i++) {
+		req = mbox_alloc_msg_sso_grp_stash_config(mbox);
+		if (req == NULL) {
+			rc = mbox_process(mbox);
+			if (rc) {
+				rc = -EIO;
+				goto fail;
+			}
+
+			req = mbox_alloc_msg_sso_grp_stash_config(mbox);
+			if (req == NULL) {
+				rc = -ENOSPC;
+				goto fail;
+			}
+		}
+		req->ena = true;
+		req->grp = stash[i].hwgrp;
+		req->offset = stash[i].stash_offset;
+		req->num_linesm1 = stash[i].stash_count - 1;
+	}
+
+	rc = mbox_process(mbox);
+	if (rc)
+		rc = -EIO;
+fail:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, uint16_t nb_tim_lfs)
 {
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 4699908c25..e67797b046 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -14,6 +14,12 @@  struct roc_sso_hwgrp_qos {
 	uint8_t taq_prcnt;
 };

+struct roc_sso_hwgrp_stash {
+	uint16_t hwgrp;
+	uint8_t stash_offset;
+	uint8_t stash_count;
+};
+
 struct roc_sso_hws_stats {
 	uint64_t arbitration;
 };
@@ -66,7 +72,7 @@  int __roc_api roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_
 void __roc_api roc_sso_rsrc_fini(struct roc_sso *roc_sso);
 int __roc_api roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso,
 				       struct roc_sso_hwgrp_qos *qos,
-				       uint8_t nb_qos);
+				       uint16_t nb_qos);
 int __roc_api roc_sso_hwgrp_alloc_xaq(struct roc_sso *roc_sso,
 				      uint32_t npa_aura_id, uint16_t hwgrps);
 int __roc_api roc_sso_hwgrp_release_xaq(struct roc_sso *roc_sso,
@@ -88,6 +94,9 @@  int __roc_api roc_sso_hwgrp_init_xaq_aura(struct roc_sso *roc_sso,
 					  uint32_t nb_xae);
 int __roc_api roc_sso_hwgrp_free_xaq_aura(struct roc_sso *roc_sso,
 					  uint16_t nb_hwgrp);
+int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
+					 struct roc_sso_hwgrp_stash *stash,
+					 uint16_t nb_stash);

 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 5d2b75fb5a..381bbb0b6c 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -379,6 +379,7 @@  INTERNAL {
 	roc_sso_hwgrp_qos_config;
 	roc_sso_hwgrp_release_xaq;
 	roc_sso_hwgrp_set_priority;
+	roc_sso_hwgrp_stash_config;
 	roc_sso_hwgrp_stats_get;
 	roc_sso_hws_base_get;
 	roc_sso_hws_link;
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 8e74edff55..a9d2e5463f 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -786,6 +786,24 @@  cn10k_sso_port_unlink(struct rte_eventdev *event_dev, void *port,
 	return (int)nb_unlinks;
 }

+static void
+cn10k_sso_configure_queue_stash(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
+	int i, rc;
+
+	plt_sso_dbg();
+	for (i = 0; i < dev->stash_cnt; i++) {
+		stash[i].hwgrp = dev->stash_parse_data[i].queue;
+		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
+		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
+	}
+	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
+	if (rc < 0)
+		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+}
+
 static int
 cn10k_sso_start(struct rte_eventdev *event_dev)
 {
@@ -795,6 +813,7 @@  cn10k_sso_start(struct rte_eventdev *event_dev)
 	if (rc < 0)
 		return rc;

+	cn10k_sso_configure_queue_stash(event_dev);
 	rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
 			    cn10k_sso_hws_flush_events);
 	if (rc < 0)
@@ -866,6 +885,8 @@  cn10k_sso_rx_adapter_queue_add(
 	int32_t rx_queue_id,
 	const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
 {
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash;
 	struct cn10k_eth_rxq *rxq;
 	uint64_t meta_aura;
 	void *lookup_mem;
@@ -884,6 +905,14 @@  cn10k_sso_rx_adapter_queue_add(
 	meta_aura = rxq->meta_aura;
 	cn10k_sso_set_priv_mem(event_dev, lookup_mem, meta_aura);
 	cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	if (roc_feature_sso_has_stash()) {
+		stash.hwgrp = queue_conf->ev.queue_id;
+		stash.stash_offset = CN10K_SSO_DEFAULT_STASH_OFFSET;
+		stash.stash_count = CN10K_SSO_DEFAULT_STASH_LENGTH;
+		rc = roc_sso_hwgrp_stash_config(&dev->sso, &stash, 1);
+		if (rc < 0)
+			plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+	}

 	return 0;
 }
@@ -1226,6 +1255,7 @@  RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_FORCE_BP "=1"
 			      CN10K_SSO_GW_MODE "=<int>"
+			      CN10K_SSO_STASH "=<string>"
 			      CNXK_TIM_DISABLE_NPA "=1"
 			      CNXK_TIM_CHNK_SLOTS "=<int>"
 			      CNXK_TIM_RINGS_LMT "=<int>"
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index 5fb6f0a6f2..aaa01d1ec1 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -5,6 +5,9 @@ 
 #ifndef __CN10K_EVENTDEV_H__
 #define __CN10K_EVENTDEV_H__

+#define CN10K_SSO_DEFAULT_STASH_OFFSET -1
+#define CN10K_SSO_DEFAULT_STASH_LENGTH 2
+
 struct cn10k_sso_hws {
 	uint64_t base;
 	uint64_t gw_rdata;
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 11bcd792ba..5f1fa1e96d 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -459,6 +459,8 @@  cnxk_sso_close(struct rte_eventdev *event_dev, cnxk_sso_unlink_t unlink_fn)
 	return 0;
 }

+typedef void (*param_parse_t)(char *value, void *opaque);
+
 static void
 parse_queue_param(char *value, void *opaque)
 {
@@ -496,7 +498,44 @@  parse_queue_param(char *value, void *opaque)
 }

 static void
-parse_qos_list(const char *value, void *opaque)
+parse_stash_param(char *value, void *opaque)
+{
+	struct cnxk_sso_stash queue_stash = {0};
+	struct cnxk_sso_evdev *dev = opaque;
+	struct cnxk_sso_stash *old_ptr;
+	char *tok = strtok(value, "|");
+	uint16_t *val;
+
+	if (!strlen(value))
+		return;
+
+	val = (uint16_t *)&queue_stash;
+	while (tok != NULL) {
+		*val = atoi(tok);
+		tok = strtok(NULL, "|");
+		val++;
+	}
+
+	if (val != (&queue_stash.stash_length + 1)) {
+		plt_err("Invalid QoS parameter expected [Qx|stash_offset|stash_length]");
+		return;
+	}
+
+	dev->stash_cnt++;
+	old_ptr = dev->stash_parse_data;
+	dev->stash_parse_data =
+		rte_realloc(dev->stash_parse_data,
+			    sizeof(struct cnxk_sso_stash) * dev->stash_cnt, 0);
+	if (dev->stash_parse_data == NULL) {
+		dev->stash_parse_data = old_ptr;
+		dev->stash_cnt--;
+		return;
+	}
+	dev->stash_parse_data[dev->stash_cnt - 1] = queue_stash;
+}
+
+static void
+parse_list(const char *value, void *opaque, param_parse_t fn)
 {
 	char *s = strdup(value);
 	char *start = NULL;
@@ -511,7 +550,7 @@  parse_qos_list(const char *value, void *opaque)

 		if (start && start < end) {
 			*end = 0;
-			parse_queue_param(start + 1, opaque);
+			fn(start + 1, opaque);
 			s = end;
 			start = end;
 		}
@@ -522,14 +561,27 @@  parse_qos_list(const char *value, void *opaque)
 }

 static int
-parse_sso_kvargs_dict(const char *key, const char *value, void *opaque)
+parse_sso_kvargs_qos_dict(const char *key, const char *value, void *opaque)
 {
 	RTE_SET_USED(key);

 	/* Dict format [Qx-TAQ-IAQ][Qz-TAQ-IAQ] use '-' cause ',' isn't allowed.
 	 * Everything is expressed in percentages, 0 represents default.
 	 */
-	parse_qos_list(value, opaque);
+	parse_list(value, opaque, parse_queue_param);
+
+	return 0;
+}
+
+static int
+parse_sso_kvargs_stash_dict(const char *key, const char *value, void *opaque)
+{
+	RTE_SET_USED(key);
+
+	/* Dict format [Qx|<stash_offset>|<stash_length>] use '|' cause ','
+	 * isn't allowed.
+	 */
+	parse_list(value, opaque, parse_stash_param);

 	return 0;
 }
@@ -548,14 +600,16 @@  cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)

 	rte_kvargs_process(kvlist, CNXK_SSO_XAE_CNT, &parse_kvargs_value,
 			   &dev->xae_cnt);
-	rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict,
-			   dev);
+	rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS,
+			   &parse_sso_kvargs_qos_dict, dev);
 	rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_flag,
 			   &dev->force_ena_bp);
 	rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_flag,
 			   &single_ws);
 	rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_flag,
 			   &dev->gw_mode);
+	rte_kvargs_process(kvlist, CN10K_SSO_STASH,
+			   &parse_sso_kvargs_stash_dict, dev);
 	dev->dual_ws = !single_ws;
 	rte_kvargs_free(kvlist);
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index d93fdcda25..c7cbd722ab 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -31,6 +31,7 @@ 
 #define CNXK_SSO_FORCE_BP  "force_rx_bp"
 #define CN9K_SSO_SINGLE_WS "single_ws"
 #define CN10K_SSO_GW_MODE  "gw_mode"
+#define CN10K_SSO_STASH	   "stash"

 #define NSEC2USEC(__ns)		((__ns) / 1E3)
 #define USEC2NSEC(__us)		((__us)*1E3)
@@ -70,6 +71,12 @@  struct cnxk_sso_qos {
 	uint16_t iaq_prcnt;
 };

+struct cnxk_sso_stash {
+	uint16_t queue;
+	uint16_t stash_offset;
+	uint16_t stash_length;
+};
+
 struct cnxk_sso_evdev {
 	struct roc_sso sso;
 	uint8_t max_event_queues;
@@ -103,13 +110,15 @@  struct cnxk_sso_evdev {
 	struct cnxk_timesync_info *tstamp[RTE_MAX_ETHPORTS];
 	/* Dev args */
 	uint32_t xae_cnt;
-	uint8_t qos_queue_cnt;
+	uint16_t qos_queue_cnt;
 	struct cnxk_sso_qos *qos_parse_data;
 	uint8_t force_ena_bp;
 	/* CN9K */
 	uint8_t dual_ws;
 	/* CN10K */
 	uint8_t gw_mode;
+	uint16_t stash_cnt;
+	struct cnxk_sso_stash *stash_parse_data;
 	/* Crypto adapter */
 	uint8_t is_ca_internal_port;
 } __rte_cache_aligned;