[v4,6/7] crypto/dpaa2_sec: ordered queue support

Message ID 20220210105852.1268506-7-g.singh@nxp.com (mailing list archive)
State Accepted, archived
Delegated to: akhil goyal
Headers
Series NXP crypto drivers changes |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Gagandeep Singh Feb. 10, 2022, 10:58 a.m. UTC
  From: Nipun Gupta <nipun.gupta@nxp.com>

This patch supports ordered queue for DPAA2 platform.

Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>
Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 doc/guides/cryptodevs/dpaa2_sec.rst         |   7 +
 drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 288 ++++++++++++++++++--
 drivers/crypto/dpaa2_sec/dpaa2_sec_priv.h   |   2 +
 drivers/crypto/dpaa2_sec/mc/fsl_dpseci.h    |  14 +-
 4 files changed, 290 insertions(+), 21 deletions(-)
  

Patch

diff --git a/doc/guides/cryptodevs/dpaa2_sec.rst b/doc/guides/cryptodevs/dpaa2_sec.rst
index 875d918068..1a590309a0 100644
--- a/doc/guides/cryptodevs/dpaa2_sec.rst
+++ b/doc/guides/cryptodevs/dpaa2_sec.rst
@@ -185,3 +185,10 @@  on error, mode 1 means dump HW error code and mode 2 means dump HW error code
 along with other useful debugging information like session, queue, descriptor
 data.
 e.g. ``fslmc:dpseci.1,drv_dump_mode=1``
+
+Enable strict ordering
+----------------------
+
+Use dev arg option ``drv_strict_order=1`` to enable strict ordering.
+By default, loose ordering is set for ordered schedule type event.
+e.g. ``fslmc:dpseci.1,drv_strict_order=1``
diff --git a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
index cb8aaf6446..e62d04852b 100644
--- a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
+++ b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
@@ -52,6 +52,7 @@ 
 #define NO_PREFETCH 0
 
 #define DRIVER_DUMP_MODE "drv_dump_mode"
+#define DRIVER_STRICT_ORDER "drv_strict_order"
 
 /* DPAA2_SEC_DP_DUMP levels */
 enum dpaa2_sec_dump_levels {
@@ -1477,14 +1478,14 @@  dpaa2_sec_enqueue_burst(void *qp, struct rte_crypto_op **ops,
 
 		for (loop = 0; loop < frames_to_send; loop++) {
 			if (*dpaa2_seqn((*ops)->sym->m_src)) {
-				uint8_t dqrr_index =
-					*dpaa2_seqn((*ops)->sym->m_src) - 1;
-
-				flags[loop] = QBMAN_ENQUEUE_FLAG_DCA | dqrr_index;
-				DPAA2_PER_LCORE_DQRR_SIZE--;
-				DPAA2_PER_LCORE_DQRR_HELD &= ~(1 << dqrr_index);
-				*dpaa2_seqn((*ops)->sym->m_src) =
-					DPAA2_INVALID_MBUF_SEQN;
+				if (*dpaa2_seqn((*ops)->sym->m_src) & QBMAN_ENQUEUE_FLAG_DCA) {
+					DPAA2_PER_LCORE_DQRR_SIZE--;
+					DPAA2_PER_LCORE_DQRR_HELD &= ~(1 <<
+					*dpaa2_seqn((*ops)->sym->m_src) &
+					QBMAN_EQCR_DCA_IDXMASK);
+				}
+				flags[loop] = *dpaa2_seqn((*ops)->sym->m_src);
+				*dpaa2_seqn((*ops)->sym->m_src) = DPAA2_INVALID_MBUF_SEQN;
 			}
 
 			/*Clear the unused FD fields before sending*/
@@ -1709,6 +1710,168 @@  dpaa2_sec_dump(struct rte_crypto_op *op)
 
 }
 
+static void
+dpaa2_sec_free_eqresp_buf(uint16_t eqresp_ci)
+{
+	struct dpaa2_dpio_dev *dpio_dev = DPAA2_PER_LCORE_DPIO;
+	struct rte_crypto_op *op;
+	struct qbman_fd *fd;
+
+	fd = qbman_result_eqresp_fd(&dpio_dev->eqresp[eqresp_ci]);
+	op = sec_fd_to_mbuf(fd);
+	/* Instead of freeing, enqueue it to the sec tx queue (sec->core)
+	 * after setting an error in FD. But this will have performance impact.
+	 */
+	rte_pktmbuf_free(op->sym->m_src);
+}
+
+static void
+dpaa2_sec_set_enqueue_descriptor(struct dpaa2_queue *dpaa2_q,
+			     struct rte_mbuf *m,
+			     struct qbman_eq_desc *eqdesc)
+{
+	struct dpaa2_dpio_dev *dpio_dev = DPAA2_PER_LCORE_DPIO;
+	struct eqresp_metadata *eqresp_meta;
+	struct dpaa2_sec_dev_private *priv = dpaa2_q->crypto_data->dev_private;
+	uint16_t orpid, seqnum;
+	uint8_t dq_idx;
+
+	if (*dpaa2_seqn(m) & DPAA2_ENQUEUE_FLAG_ORP) {
+		orpid = (*dpaa2_seqn(m) & DPAA2_EQCR_OPRID_MASK) >>
+			DPAA2_EQCR_OPRID_SHIFT;
+		seqnum = (*dpaa2_seqn(m) & DPAA2_EQCR_SEQNUM_MASK) >>
+			DPAA2_EQCR_SEQNUM_SHIFT;
+
+
+		if (!priv->en_loose_ordered) {
+			qbman_eq_desc_set_orp(eqdesc, 1, orpid, seqnum, 0);
+			qbman_eq_desc_set_response(eqdesc, (uint64_t)
+				DPAA2_VADDR_TO_IOVA(&dpio_dev->eqresp[
+				dpio_dev->eqresp_pi]), 1);
+			qbman_eq_desc_set_token(eqdesc, 1);
+
+			eqresp_meta = &dpio_dev->eqresp_meta[dpio_dev->eqresp_pi];
+			eqresp_meta->dpaa2_q = dpaa2_q;
+			eqresp_meta->mp = m->pool;
+
+			dpio_dev->eqresp_pi + 1 < MAX_EQ_RESP_ENTRIES ?
+				dpio_dev->eqresp_pi++ : (dpio_dev->eqresp_pi = 0);
+		} else {
+			qbman_eq_desc_set_orp(eqdesc, 0, orpid, seqnum, 0);
+		}
+	} else {
+		dq_idx = *dpaa2_seqn(m) - 1;
+		qbman_eq_desc_set_dca(eqdesc, 1, dq_idx, 0);
+		DPAA2_PER_LCORE_DQRR_SIZE--;
+		DPAA2_PER_LCORE_DQRR_HELD &= ~(1 << dq_idx);
+	}
+	*dpaa2_seqn(m) = DPAA2_INVALID_MBUF_SEQN;
+}
+
+
+static uint16_t
+dpaa2_sec_enqueue_burst_ordered(void *qp, struct rte_crypto_op **ops,
+			uint16_t nb_ops)
+{
+	/* Function to transmit the frames to given device and VQ*/
+	uint32_t loop;
+	int32_t ret;
+	struct qbman_fd fd_arr[MAX_TX_RING_SLOTS];
+	uint32_t frames_to_send, num_free_eq_desc, retry_count;
+	struct qbman_eq_desc eqdesc[MAX_TX_RING_SLOTS];
+	struct dpaa2_sec_qp *dpaa2_qp = (struct dpaa2_sec_qp *)qp;
+	struct qbman_swp *swp;
+	uint16_t num_tx = 0;
+	uint16_t bpid;
+	struct rte_mempool *mb_pool;
+	struct dpaa2_sec_dev_private *priv =
+				dpaa2_qp->tx_vq.crypto_data->dev_private;
+
+	if (unlikely(nb_ops == 0))
+		return 0;
+
+	if (ops[0]->sess_type == RTE_CRYPTO_OP_SESSIONLESS) {
+		DPAA2_SEC_ERR("sessionless crypto op not supported");
+		return 0;
+	}
+
+	if (!DPAA2_PER_LCORE_DPIO) {
+		ret = dpaa2_affine_qbman_swp();
+		if (ret) {
+			DPAA2_SEC_ERR("Failure in affining portal");
+			return 0;
+		}
+	}
+	swp = DPAA2_PER_LCORE_PORTAL;
+
+	while (nb_ops) {
+		frames_to_send = (nb_ops > dpaa2_eqcr_size) ?
+			dpaa2_eqcr_size : nb_ops;
+
+		if (!priv->en_loose_ordered) {
+			if (*dpaa2_seqn((*ops)->sym->m_src)) {
+				num_free_eq_desc = dpaa2_free_eq_descriptors();
+				if (num_free_eq_desc < frames_to_send)
+					frames_to_send = num_free_eq_desc;
+			}
+		}
+
+		for (loop = 0; loop < frames_to_send; loop++) {
+			/*Prepare enqueue descriptor*/
+			qbman_eq_desc_clear(&eqdesc[loop]);
+			qbman_eq_desc_set_fq(&eqdesc[loop], dpaa2_qp->tx_vq.fqid);
+
+			if (*dpaa2_seqn((*ops)->sym->m_src))
+				dpaa2_sec_set_enqueue_descriptor(
+						&dpaa2_qp->tx_vq,
+						(*ops)->sym->m_src,
+						&eqdesc[loop]);
+			else
+				qbman_eq_desc_set_no_orp(&eqdesc[loop],
+							 DPAA2_EQ_RESP_ERR_FQ);
+
+			/*Clear the unused FD fields before sending*/
+			memset(&fd_arr[loop], 0, sizeof(struct qbman_fd));
+			mb_pool = (*ops)->sym->m_src->pool;
+			bpid = mempool_to_bpid(mb_pool);
+			ret = build_sec_fd(*ops, &fd_arr[loop], bpid);
+			if (ret) {
+				DPAA2_SEC_ERR("error: Improper packet contents"
+					      " for crypto operation");
+				goto skip_tx;
+			}
+			ops++;
+		}
+
+		loop = 0;
+		retry_count = 0;
+		while (loop < frames_to_send) {
+			ret = qbman_swp_enqueue_multiple_desc(swp,
+					&eqdesc[loop], &fd_arr[loop],
+					frames_to_send - loop);
+			if (unlikely(ret < 0)) {
+				retry_count++;
+				if (retry_count > DPAA2_MAX_TX_RETRY_COUNT) {
+					num_tx += loop;
+					nb_ops -= loop;
+					goto skip_tx;
+				}
+			} else {
+				loop += ret;
+				retry_count = 0;
+			}
+		}
+
+		num_tx += loop;
+		nb_ops -= loop;
+	}
+
+skip_tx:
+	dpaa2_qp->tx_vq.tx_pkts += num_tx;
+	dpaa2_qp->tx_vq.err_pkts += nb_ops;
+	return num_tx;
+}
+
 static uint16_t
 dpaa2_sec_dequeue_burst(void *qp, struct rte_crypto_op **ops,
 			uint16_t nb_ops)
@@ -3622,6 +3785,10 @@  dpaa2_sec_dev_start(struct rte_cryptodev *dev)
 
 	PMD_INIT_FUNC_TRACE();
 
+	/* Change the tx burst function if ordered queues are used */
+	if (priv->en_ordered)
+		dev->enqueue_burst = dpaa2_sec_enqueue_burst_ordered;
+
 	memset(&attr, 0, sizeof(struct dpseci_attr));
 
 	ret = dpseci_enable(dpseci, CMD_PRI_LOW, priv->token);
@@ -3834,12 +4001,46 @@  dpaa2_sec_process_atomic_event(struct qbman_swp *swp __rte_unused,
 
 	ev->event_ptr = sec_fd_to_mbuf(fd);
 	dqrr_index = qbman_get_dqrr_idx(dq);
-	*dpaa2_seqn(crypto_op->sym->m_src) = dqrr_index + 1;
+	*dpaa2_seqn(crypto_op->sym->m_src) = QBMAN_ENQUEUE_FLAG_DCA | dqrr_index;
 	DPAA2_PER_LCORE_DQRR_SIZE++;
 	DPAA2_PER_LCORE_DQRR_HELD |= 1 << dqrr_index;
 	DPAA2_PER_LCORE_DQRR_MBUF(dqrr_index) = crypto_op->sym->m_src;
 }
 
+static void __rte_hot
+dpaa2_sec_process_ordered_event(struct qbman_swp *swp,
+				const struct qbman_fd *fd,
+				const struct qbman_result *dq,
+				struct dpaa2_queue *rxq,
+				struct rte_event *ev)
+{
+	struct rte_crypto_op *crypto_op = (struct rte_crypto_op *)ev->event_ptr;
+
+	/* Prefetching mbuf */
+	rte_prefetch0((void *)(size_t)(DPAA2_GET_FD_ADDR(fd)-
+		rte_dpaa2_bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size));
+
+	/* Prefetching ipsec crypto_op stored in priv data of mbuf */
+	rte_prefetch0((void *)(size_t)(DPAA2_GET_FD_ADDR(fd)-64));
+
+	ev->flow_id = rxq->ev.flow_id;
+	ev->sub_event_type = rxq->ev.sub_event_type;
+	ev->event_type = RTE_EVENT_TYPE_CRYPTODEV;
+	ev->op = RTE_EVENT_OP_NEW;
+	ev->sched_type = rxq->ev.sched_type;
+	ev->queue_id = rxq->ev.queue_id;
+	ev->priority = rxq->ev.priority;
+	ev->event_ptr = sec_fd_to_mbuf(fd);
+
+	*dpaa2_seqn(crypto_op->sym->m_src) = DPAA2_ENQUEUE_FLAG_ORP;
+	*dpaa2_seqn(crypto_op->sym->m_src) |= qbman_result_DQ_odpid(dq) <<
+		DPAA2_EQCR_OPRID_SHIFT;
+	*dpaa2_seqn(crypto_op->sym->m_src) |= qbman_result_DQ_seqnum(dq) <<
+		DPAA2_EQCR_SEQNUM_SHIFT;
+
+	qbman_swp_dqrr_consume(swp, dq);
+}
+
 int
 dpaa2_sec_eventq_attach(const struct rte_cryptodev *dev,
 		int qp_id,
@@ -3857,6 +4058,8 @@  dpaa2_sec_eventq_attach(const struct rte_cryptodev *dev,
 		qp->rx_vq.cb = dpaa2_sec_process_parallel_event;
 	else if (event->sched_type == RTE_SCHED_TYPE_ATOMIC)
 		qp->rx_vq.cb = dpaa2_sec_process_atomic_event;
+	else if (event->sched_type == RTE_SCHED_TYPE_ORDERED)
+		qp->rx_vq.cb = dpaa2_sec_process_ordered_event;
 	else
 		return -EINVAL;
 
@@ -3875,6 +4078,37 @@  dpaa2_sec_eventq_attach(const struct rte_cryptodev *dev,
 		cfg.options |= DPSECI_QUEUE_OPT_ORDER_PRESERVATION;
 		cfg.order_preservation_en = 1;
 	}
+
+	if (event->sched_type == RTE_SCHED_TYPE_ORDERED) {
+		struct opr_cfg ocfg;
+
+		/* Restoration window size = 256 frames */
+		ocfg.oprrws = 3;
+		/* Restoration window size = 512 frames for LX2 */
+		if (dpaa2_svr_family == SVR_LX2160A)
+			ocfg.oprrws = 4;
+		/* Auto advance NESN window enabled */
+		ocfg.oa = 1;
+		/* Late arrival window size disabled */
+		ocfg.olws = 0;
+		/* ORL resource exhaustaion advance NESN disabled */
+		ocfg.oeane = 0;
+
+		if (priv->en_loose_ordered)
+			ocfg.oloe = 1;
+		else
+			ocfg.oloe = 0;
+
+		ret = dpseci_set_opr(dpseci, CMD_PRI_LOW, priv->token,
+				   qp_id, OPR_OPT_CREATE, &ocfg);
+		if (ret) {
+			RTE_LOG(ERR, PMD, "Error setting opr: ret: %d\n", ret);
+			return ret;
+		}
+		qp->tx_vq.cb_eqresp_free = dpaa2_sec_free_eqresp_buf;
+		priv->en_ordered = 1;
+	}
+
 	ret = dpseci_set_rx_queue(dpseci, CMD_PRI_LOW, priv->token,
 				  qp_id, &cfg);
 	if (ret) {
@@ -3979,24 +4213,35 @@  dpaa2_sec_uninit(const struct rte_cryptodev *dev)
 }
 
 static int
-check_devargs_handler(__rte_unused const char *key, const char *value,
-		      __rte_unused void *opaque)
+check_devargs_handler(const char *key, const char *value,
+		      void *opaque)
 {
-	dpaa2_sec_dp_dump = atoi(value);
-	if (dpaa2_sec_dp_dump > DPAA2_SEC_DP_FULL_DUMP) {
-		DPAA2_SEC_WARN("WARN: DPAA2_SEC_DP_DUMP_LEVEL is not "
-			      "supported, changing to FULL error prints\n");
-		dpaa2_sec_dp_dump = DPAA2_SEC_DP_FULL_DUMP;
-	}
+	struct rte_cryptodev *dev = (struct rte_cryptodev *)opaque;
+	struct dpaa2_sec_dev_private *priv = dev->data->dev_private;
+
+	if (!strcmp(key, "drv_strict_order")) {
+		priv->en_loose_ordered = false;
+	} else if (!strcmp(key, "drv_dump_mode")) {
+		dpaa2_sec_dp_dump = atoi(value);
+		if (dpaa2_sec_dp_dump > DPAA2_SEC_DP_FULL_DUMP) {
+			DPAA2_SEC_WARN("WARN: DPAA2_SEC_DP_DUMP_LEVEL is not "
+				      "supported, changing to FULL error"
+				      " prints\n");
+			dpaa2_sec_dp_dump = DPAA2_SEC_DP_FULL_DUMP;
+		}
+	} else
+		return -1;
 
 	return 0;
 }
 
 static void
-dpaa2_sec_get_devargs(struct rte_devargs *devargs, const char *key)
+dpaa2_sec_get_devargs(struct rte_cryptodev *cryptodev, const char *key)
 {
 	struct rte_kvargs *kvlist;
+	struct rte_devargs *devargs;
 
+	devargs = cryptodev->device->devargs;
 	if (!devargs)
 		return;
 
@@ -4010,7 +4255,7 @@  dpaa2_sec_get_devargs(struct rte_devargs *devargs, const char *key)
 	}
 
 	rte_kvargs_process(kvlist, key,
-			check_devargs_handler, NULL);
+			check_devargs_handler, (void *)cryptodev);
 	rte_kvargs_free(kvlist);
 }
 
@@ -4101,6 +4346,7 @@  dpaa2_sec_dev_init(struct rte_cryptodev *cryptodev)
 	cryptodev->data->nb_queue_pairs = internals->max_nb_queue_pairs;
 	internals->hw = dpseci;
 	internals->token = token;
+	internals->en_loose_ordered = true;
 
 	snprintf(str, sizeof(str), "sec_fle_pool_p%d_%d",
 			getpid(), cryptodev->data->dev_id);
@@ -4115,7 +4361,8 @@  dpaa2_sec_dev_init(struct rte_cryptodev *cryptodev)
 		goto init_error;
 	}
 
-	dpaa2_sec_get_devargs(cryptodev->device->devargs, DRIVER_DUMP_MODE);
+	dpaa2_sec_get_devargs(cryptodev, DRIVER_DUMP_MODE);
+	dpaa2_sec_get_devargs(cryptodev, DRIVER_STRICT_ORDER);
 	DPAA2_SEC_INFO("driver %s: created", cryptodev->data->name);
 	return 0;
 
@@ -4215,5 +4462,6 @@  RTE_PMD_REGISTER_DPAA2(CRYPTODEV_NAME_DPAA2_SEC_PMD, rte_dpaa2_sec_driver);
 RTE_PMD_REGISTER_CRYPTO_DRIVER(dpaa2_sec_crypto_drv,
 		rte_dpaa2_sec_driver.driver, cryptodev_driver_id);
 RTE_PMD_REGISTER_PARAM_STRING(CRYPTODEV_NAME_DPAA2_SEC_PMD,
+		DRIVER_STRICT_ORDER "=<int>"
 		DRIVER_DUMP_MODE "=<int>");
 RTE_LOG_REGISTER(dpaa2_logtype_sec, pmd.crypto.dpaa2, NOTICE);
diff --git a/drivers/crypto/dpaa2_sec/dpaa2_sec_priv.h b/drivers/crypto/dpaa2_sec/dpaa2_sec_priv.h
index a8f9440632..3094778a7a 100644
--- a/drivers/crypto/dpaa2_sec/dpaa2_sec_priv.h
+++ b/drivers/crypto/dpaa2_sec/dpaa2_sec_priv.h
@@ -37,6 +37,8 @@  struct dpaa2_sec_dev_private {
 	uint16_t token; /**< Token required by DPxxx objects */
 	unsigned int max_nb_queue_pairs;
 	/**< Max number of queue pairs supported by device */
+	uint8_t en_ordered;
+	uint8_t en_loose_ordered;
 };
 
 struct dpaa2_sec_qp {
diff --git a/drivers/crypto/dpaa2_sec/mc/fsl_dpseci.h b/drivers/crypto/dpaa2_sec/mc/fsl_dpseci.h
index 279e8f4d4a..c295c04f24 100644
--- a/drivers/crypto/dpaa2_sec/mc/fsl_dpseci.h
+++ b/drivers/crypto/dpaa2_sec/mc/fsl_dpseci.h
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
  *
  * Copyright 2013-2016 Freescale Semiconductor Inc.
- * Copyright 2016-2017 NXP
+ * Copyright 2016-2020 NXP
  *
  */
 #ifndef __FSL_DPSECI_H
@@ -11,6 +11,8 @@ 
  * Contains initialization APIs and runtime control APIs for DPSECI
  */
 
+#include <fsl_dpopr.h>
+
 struct fsl_mc_io;
 
 /**
@@ -41,6 +43,16 @@  int dpseci_close(struct fsl_mc_io *mc_io,
  */
 #define DPSECI_OPT_HAS_CG				0x000020
 
+/**
+ * Enable the Order Restoration support
+ */
+#define DPSECI_OPT_HAS_OPR				0x000040
+
+/**
+ * Order Point Records are shared for the entire DPSECI
+ */
+#define DPSECI_OPT_OPR_SHARED				0x000080
+
 /**
  * struct dpseci_cfg - Structure representing DPSECI configuration
  * @options: Any combination of the following options: