[1/2] crypto/mlx5: optimize AES-GCM IPsec operation

Message ID 20240530072413.1602343-2-suanmingm@nvidia.com (mailing list archive)
State Superseded
Delegated to: akhil goyal
Headers
Series crypto/mlx5: optimize AES-GCM IPsec operation |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation warning apply patch failure
ci/iol-testing warning apply patch failure

Commit Message

Suanming Mou May 30, 2024, 7:24 a.m. UTC
  To optimize AES-GCM IPsec operation within crypto/mlx5,
the DPDK API typically supplies AES_GCM AAD/Payload/Digest
in separate locations, potentially disrupting their
contiguous layout. In cases where the memory layout fails
to meet hardware (HW) requirements, an UMR WQE is initiated
ahead of the GCM's GGA WQE to establish a continuous
AAD/Payload/Digest virtual memory space for the HW MMU.

For IPsec scenarios, where the memory layout consistently
adheres to the fixed order of AAD/IV/Payload/Digest,
directly shrinking memory for AAD proves more efficient
than preparing a UMR WQE. To address this, a new devarg
"crypto_mode" with mode "ipsec_opt" is introduced in the
commit, offering an optimization hint specifically for
IPsec cases. When enabled, the PMD copies AAD directly
before Payload in the enqueue_burst function instead of
employing the UMR WQE. Subsequently, in the dequeue_burst
function, the overridden IV before Payload is restored
from the GGA WQE. It's crucial for users to avoid utilizing
the input mbuf data during processing.

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 doc/guides/cryptodevs/mlx5.rst         |  20 +++
 doc/guides/rel_notes/release_24_07.rst |   4 +
 drivers/crypto/mlx5/mlx5_crypto.c      |  24 ++-
 drivers/crypto/mlx5/mlx5_crypto.h      |  19 +++
 drivers/crypto/mlx5/mlx5_crypto_gcm.c  | 220 +++++++++++++++++++++++--
 5 files changed, 265 insertions(+), 22 deletions(-)
  

Patch

diff --git a/doc/guides/cryptodevs/mlx5.rst b/doc/guides/cryptodevs/mlx5.rst
index 8c05759ae7..320f57bb02 100644
--- a/doc/guides/cryptodevs/mlx5.rst
+++ b/doc/guides/cryptodevs/mlx5.rst
@@ -185,6 +185,25 @@  for an additional list of options shared with other mlx5 drivers.
 
   Maximum number of mbuf chain segments(src or dest), default value is 8.
 
+- ``crypto_mode`` parameter [string]
+
+  Only valid in AES-GCM mode. Will be ignored in AES-XTS mode.
+
+  - ``full_capable``
+       Use UMR WQE for inputs not as contiguous AAD/Payload/Digest.
+
+  - ``ipsec_opt``
+       Do software AAD shrink for inputs as contiguous AAD/IV/Payload/Digest.
+       The PMD relies on the IPsec layout, expecting the memory to align with
+       AAD/IV/Payload/Digest in a contiguous manner, all within a single mbuf
+       for any given OP.
+       The PMD extracts the ESP.IV bytes from the input memory and binds the
+       AAD (ESP SPI and SN) to the payload during enqueue OP. It then restores
+       the original memory layout in the decrypt OP.
+       ESP.IV size supported range is [0,16] bytes.
+
+  Set to ``full_capable`` by default.
+
 
 Supported NICs
 --------------
@@ -205,6 +224,7 @@  Limitations
   values.
 - AES-GCM is supported only on BlueField-3.
 - AES-GCM supports only key import plaintext mode.
+- AES-GCM ``ipsec_opt`` mode does not support multi-segment mode.
 
 
 Prerequisites
diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst
index ffbe9ce051..9a9e471058 100644
--- a/doc/guides/rel_notes/release_24_07.rst
+++ b/doc/guides/rel_notes/release_24_07.rst
@@ -81,6 +81,10 @@  New Features
 
   * Added SSE/NEON vector datapath.
 
+* **Updated NVIDIA mlx5 driver.**
+
+  * Added AES-GCM IPsec operation optimization.
+
 
 Removed Items
 -------------
diff --git a/drivers/crypto/mlx5/mlx5_crypto.c b/drivers/crypto/mlx5/mlx5_crypto.c
index 26bd4087da..d49a375dcb 100644
--- a/drivers/crypto/mlx5/mlx5_crypto.c
+++ b/drivers/crypto/mlx5/mlx5_crypto.c
@@ -25,10 +25,6 @@ 
 
 #define MLX5_CRYPTO_FEATURE_FLAGS(wrapped_mode) \
 	(RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO | RTE_CRYPTODEV_FF_HW_ACCELERATED | \
-	 RTE_CRYPTODEV_FF_IN_PLACE_SGL | RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT | \
-	 RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT | \
-	 RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT | \
-	 RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT | \
 	 (wrapped_mode ? RTE_CRYPTODEV_FF_CIPHER_WRAPPED_KEY : 0) | \
 	 RTE_CRYPTODEV_FF_CIPHER_MULTIPLE_DATA_UNITS)
 
@@ -60,6 +56,14 @@  mlx5_crypto_dev_infos_get(struct rte_cryptodev *dev,
 		dev_info->driver_id = mlx5_crypto_driver_id;
 		dev_info->feature_flags =
 			MLX5_CRYPTO_FEATURE_FLAGS(priv->is_wrapped_mode);
+		if (!mlx5_crypto_is_ipsec_opt(priv))
+			dev_info->feature_flags |=
+				RTE_CRYPTODEV_FF_IN_PLACE_SGL |
+				RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT |
+				RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT |
+				RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT |
+				RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT;
+
 		dev_info->capabilities = priv->caps;
 		dev_info->max_nb_queue_pairs = MLX5_CRYPTO_MAX_QPS;
 		if (priv->caps->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD) {
@@ -249,6 +253,16 @@  mlx5_crypto_args_check_handler(const char *key, const char *val, void *opaque)
 		fclose(file);
 		devarg_prms->login_devarg = true;
 		return 0;
+	} else if (strcmp(key, "crypto_mode") == 0) {
+		if (strcmp(val, "full_capable") == 0) {
+			devarg_prms->crypto_mode = MLX5_CRYPTO_FULL_CAPABLE;
+		} else if (strcmp(val, "ipsec_opt") == 0) {
+			devarg_prms->crypto_mode = MLX5_CRYPTO_IPSEC_OPT;
+		} else {
+			DRV_LOG(ERR, "Invalid crypto mode: %s", val);
+			rte_errno = EINVAL;
+			return -rte_errno;
+		}
 	}
 	errno = 0;
 	tmp = strtoul(val, NULL, 0);
@@ -294,6 +308,7 @@  mlx5_crypto_parse_devargs(struct mlx5_kvargs_ctrl *mkvlist,
 		"max_segs_num",
 		"wcs_file",
 		"algo",
+		"crypto_mode",
 		NULL,
 	};
 
@@ -379,6 +394,7 @@  mlx5_crypto_dev_probe(struct mlx5_common_device *cdev,
 	priv->crypto_dev = crypto_dev;
 	priv->is_wrapped_mode = wrapped_mode;
 	priv->max_segs_num = devarg_prms.max_segs_num;
+	priv->crypto_mode = devarg_prms.crypto_mode;
 	/* Init and override AES-GCM configuration. */
 	if (devarg_prms.is_aes_gcm) {
 		ret = mlx5_crypto_gcm_init(priv);
diff --git a/drivers/crypto/mlx5/mlx5_crypto.h b/drivers/crypto/mlx5/mlx5_crypto.h
index 5432484f80..547bb490e2 100644
--- a/drivers/crypto/mlx5/mlx5_crypto.h
+++ b/drivers/crypto/mlx5/mlx5_crypto.h
@@ -25,6 +25,16 @@ 
 					MLX5_WSEG_SIZE)
 #define MLX5_CRYPTO_GCM_MAX_AAD 64
 #define MLX5_CRYPTO_GCM_MAX_DIGEST 16
+#define MLX5_CRYPTO_GCM_IPSEC_IV_SIZE 16
+
+enum mlx5_crypto_mode {
+	MLX5_CRYPTO_FULL_CAPABLE,
+	MLX5_CRYPTO_IPSEC_OPT,
+};
+
+struct mlx5_crypto_ipsec_mem {
+	uint8_t mem[MLX5_CRYPTO_GCM_IPSEC_IV_SIZE];
+} __rte_packed;
 
 struct mlx5_crypto_priv {
 	TAILQ_ENTRY(mlx5_crypto_priv) next;
@@ -45,6 +55,7 @@  struct mlx5_crypto_priv {
 	uint16_t umr_wqe_stride;
 	uint16_t max_rdmar_ds;
 	uint32_t is_wrapped_mode:1;
+	enum mlx5_crypto_mode crypto_mode;
 };
 
 struct mlx5_crypto_qp {
@@ -57,6 +68,7 @@  struct mlx5_crypto_qp {
 	struct mlx5_devx_obj **mkey; /* WQE's indirect mekys. */
 	struct mlx5_klm *klm_array;
 	union mlx5_gga_crypto_opaque *opaque_addr;
+	struct mlx5_crypto_ipsec_mem *ipsec_mem;
 	struct mlx5_mr_ctrl mr_ctrl;
 	struct mlx5_pmd_mr mr;
 	/* Crypto QP. */
@@ -93,6 +105,7 @@  struct mlx5_crypto_devarg_params {
 	uint64_t keytag;
 	uint32_t max_segs_num;
 	uint32_t is_aes_gcm:1;
+	enum mlx5_crypto_mode crypto_mode;
 };
 
 struct mlx5_crypto_session {
@@ -139,6 +152,12 @@  struct mlx5_crypto_dek_ctx {
 	struct mlx5_crypto_priv *priv;
 };
 
+static __rte_always_inline bool
+mlx5_crypto_is_ipsec_opt(struct mlx5_crypto_priv *priv)
+{
+	return priv->crypto_mode == MLX5_CRYPTO_IPSEC_OPT;
+}
+
 typedef void *(*mlx5_crypto_mkey_update_t)(struct mlx5_crypto_priv *priv,
 					   struct mlx5_crypto_qp *qp,
 					   uint32_t idx);
diff --git a/drivers/crypto/mlx5/mlx5_crypto_gcm.c b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
index fc6ade6711..189e798d1d 100644
--- a/drivers/crypto/mlx5/mlx5_crypto_gcm.c
+++ b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
@@ -181,6 +181,7 @@  mlx5_crypto_sym_gcm_session_configure(struct rte_cryptodev *dev,
 		DRV_LOG(ERR, "Only AES-GCM algorithm is supported.");
 		return -ENOTSUP;
 	}
+
 	if (aead->op == RTE_CRYPTO_AEAD_OP_ENCRYPT)
 		op_type = MLX5_CRYPTO_OP_TYPE_ENCRYPTION;
 	else
@@ -235,6 +236,7 @@  mlx5_crypto_gcm_qp_release(struct rte_cryptodev *dev, uint16_t qp_id)
 	}
 	mlx5_crypto_indirect_mkeys_release(qp, qp->entries_n);
 	mlx5_mr_btree_free(&qp->mr_ctrl.cache_bh);
+	rte_free(qp->ipsec_mem);
 	rte_free(qp);
 	dev->data->queue_pairs[qp_id] = NULL;
 	return 0;
@@ -321,13 +323,16 @@  mlx5_crypto_gcm_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 	uint32_t log_ops_n = rte_log2_u32(qp_conf->nb_descriptors);
 	uint32_t entries = RTE_BIT32(log_ops_n);
 	uint32_t alloc_size = sizeof(*qp);
+	uint32_t extra_obj_size = 0;
 	size_t mr_size, opaq_size;
 	void *mr_buf;
 	int ret;
 
+	if (!mlx5_crypto_is_ipsec_opt(priv))
+		extra_obj_size = sizeof(struct mlx5_devx_obj *);
 	alloc_size = RTE_ALIGN(alloc_size, RTE_CACHE_LINE_SIZE);
 	alloc_size += (sizeof(struct rte_crypto_op *) +
-		       sizeof(struct mlx5_devx_obj *)) * entries;
+		       extra_obj_size) * entries;
 	qp = rte_zmalloc_socket(__func__, alloc_size, RTE_CACHE_LINE_SIZE,
 				socket_id);
 	if (qp == NULL) {
@@ -370,7 +375,7 @@  mlx5_crypto_gcm_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 	 * Triple the CQ size as UMR QP which contains UMR and SEND_EN WQE
 	 * will share this CQ .
 	 */
-	qp->cq_entries_n = rte_align32pow2(entries * 3);
+	qp->cq_entries_n = rte_align32pow2(entries * (mlx5_crypto_is_ipsec_opt(priv) ? 1 : 3));
 	ret = mlx5_devx_cq_create(priv->cdev->ctx, &qp->cq_obj,
 				  rte_log2_u32(qp->cq_entries_n),
 				  &cq_attr, socket_id);
@@ -384,7 +389,7 @@  mlx5_crypto_gcm_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 	qp_attr.num_of_send_wqbbs = entries;
 	qp_attr.mmo = attr->crypto_mmo.crypto_mmo_qp;
 	/* Set MMO QP as follower as the input data may depend on UMR. */
-	qp_attr.cd_slave_send = 1;
+	qp_attr.cd_slave_send = !mlx5_crypto_is_ipsec_opt(priv);
 	ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->qp_obj,
 				  qp_attr.num_of_send_wqbbs * MLX5_WQE_SIZE,
 				  &qp_attr, socket_id);
@@ -397,18 +402,28 @@  mlx5_crypto_gcm_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 	if (ret)
 		goto err;
 	qp->ops = (struct rte_crypto_op **)(qp + 1);
-	qp->mkey = (struct mlx5_devx_obj **)(qp->ops + entries);
-	if (mlx5_crypto_gcm_umr_qp_setup(dev, qp, socket_id)) {
-		DRV_LOG(ERR, "Failed to setup UMR QP.");
-		goto err;
-	}
-	DRV_LOG(INFO, "QP %u: SQN=0x%X CQN=0x%X entries num = %u",
-		(uint32_t)qp_id, qp->qp_obj.qp->id, qp->cq_obj.cq->id, entries);
-	if (mlx5_crypto_indirect_mkeys_prepare(priv, qp, &mkey_attr,
-					       mlx5_crypto_gcm_mkey_klm_update)) {
-		DRV_LOG(ERR, "Cannot allocate indirect memory regions.");
-		rte_errno = ENOMEM;
-		goto err;
+	if (!mlx5_crypto_is_ipsec_opt(priv)) {
+		qp->mkey = (struct mlx5_devx_obj **)(qp->ops + entries);
+		if (mlx5_crypto_gcm_umr_qp_setup(dev, qp, socket_id)) {
+			DRV_LOG(ERR, "Failed to setup UMR QP.");
+			goto err;
+		}
+		DRV_LOG(INFO, "QP %u: SQN=0x%X CQN=0x%X entries num = %u",
+			(uint32_t)qp_id, qp->qp_obj.qp->id, qp->cq_obj.cq->id, entries);
+		if (mlx5_crypto_indirect_mkeys_prepare(priv, qp, &mkey_attr,
+						       mlx5_crypto_gcm_mkey_klm_update)) {
+			DRV_LOG(ERR, "Cannot allocate indirect memory regions.");
+			rte_errno = ENOMEM;
+			goto err;
+		}
+	} else {
+		extra_obj_size = sizeof(struct mlx5_crypto_ipsec_mem) * entries;
+		qp->ipsec_mem = rte_calloc(__func__, (size_t)1, extra_obj_size,
+					   RTE_CACHE_LINE_SIZE);
+		if (!qp->ipsec_mem) {
+			DRV_LOG(ERR, "Failed to allocate ipsec_mem.");
+			goto err;
+		}
 	}
 	dev->data->queue_pairs[qp_id] = qp;
 	return 0;
@@ -974,6 +989,168 @@  mlx5_crypto_gcm_dequeue_burst(void *queue_pair,
 	return op_num;
 }
 
+static uint16_t
+mlx5_crypto_gcm_ipsec_enqueue_burst(void *queue_pair,
+				    struct rte_crypto_op **ops,
+				    uint16_t nb_ops)
+{
+	struct mlx5_crypto_qp *qp = queue_pair;
+	struct mlx5_crypto_session *sess;
+	struct mlx5_crypto_priv *priv = qp->priv;
+	struct mlx5_crypto_gcm_data gcm_data;
+	struct rte_crypto_op *op;
+	struct rte_mbuf *m_src;
+	uint16_t mask = qp->entries_n - 1;
+	uint16_t remain = qp->entries_n - (qp->pi - qp->qp_ci);
+	uint32_t idx;
+	uint32_t pkt_iv_len;
+	uint8_t *payload;
+
+	if (remain < nb_ops)
+		nb_ops = remain;
+	else
+		remain = nb_ops;
+	if (unlikely(remain == 0))
+		return 0;
+	do {
+		op = *ops++;
+		sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
+		idx = qp->pi & mask;
+		m_src = op->sym->m_src;
+		MLX5_ASSERT(m_src->nb_segs == 1);
+		payload = rte_pktmbuf_mtod_offset(m_src, void *, op->sym->aead.data.offset);
+		gcm_data.src_addr = RTE_PTR_SUB(payload, sess->aad_len);
+		/*
+		 * IPsec IV between payload and AAD should be equal or less than
+		 * MLX5_CRYPTO_GCM_IPSEC_IV_SIZE.
+		 */
+		pkt_iv_len = RTE_PTR_DIFF(payload,
+				RTE_PTR_ADD(op->sym->aead.aad.data, sess->aad_len));
+		MLX5_ASSERT(pkt_iv_len <= MLX5_CRYPTO_GCM_IPSEC_IV_SIZE);
+		gcm_data.src_bytes = op->sym->aead.data.length + sess->aad_len;
+		gcm_data.src_mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, op->sym->m_src);
+		/* OOP mode is not supported. */
+		MLX5_ASSERT(!op->sym->m_dst || op->sym->m_dst == m_src);
+		gcm_data.dst_addr = gcm_data.src_addr;
+		gcm_data.dst_mkey = gcm_data.src_mkey;
+		gcm_data.dst_bytes = gcm_data.src_bytes;
+		/* Digest should follow payload. */
+		MLX5_ASSERT(RTE_PTR_ADD
+			(gcm_data.src_addr, sess->aad_len + op->sym->aead.data.length) ==
+			op->sym->aead.digest.data);
+		if (sess->op_type == MLX5_CRYPTO_OP_TYPE_ENCRYPTION)
+			gcm_data.dst_bytes += sess->tag_len;
+		else
+			gcm_data.src_bytes += sess->tag_len;
+		mlx5_crypto_gcm_wqe_set(qp, op, idx, &gcm_data);
+		/*
+		 * All the data such as IV have been copied above,
+		 * shrink AAD before payload. First backup the mem,
+		 * then do shrink.
+		 */
+		rte_memcpy(&qp->ipsec_mem[idx],
+			   RTE_PTR_SUB(payload, MLX5_CRYPTO_GCM_IPSEC_IV_SIZE),
+			   MLX5_CRYPTO_GCM_IPSEC_IV_SIZE);
+		/* If no memory overlap, do copy directly, otherwise memmove. */
+		if (likely(pkt_iv_len >= sess->aad_len))
+			rte_memcpy(gcm_data.src_addr, op->sym->aead.aad.data, sess->aad_len);
+		else
+			memmove(gcm_data.src_addr, op->sym->aead.aad.data, sess->aad_len);
+		op->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
+		qp->ops[idx] = op;
+		qp->pi++;
+	} while (--remain);
+	qp->stats.enqueued_count += nb_ops;
+	/* Update the last GGA cseg with COMP. */
+	((struct mlx5_wqe_cseg *)qp->wqe)->flags =
+		RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET);
+	mlx5_doorbell_ring(&priv->uar.bf_db, *(volatile uint64_t *)qp->wqe,
+			   qp->pi, &qp->qp_obj.db_rec[MLX5_SND_DBR],
+			   !priv->uar.dbnc);
+	return nb_ops;
+}
+
+static __rte_always_inline void
+mlx5_crypto_gcm_restore_ipsec_mem(struct mlx5_crypto_qp *qp,
+				  uint16_t orci,
+				  uint16_t rci,
+				  uint16_t op_mask)
+{
+	uint32_t idx;
+	struct mlx5_crypto_session *sess;
+	struct rte_crypto_op *op;
+	struct rte_mbuf *m_src;
+	uint8_t *payload;
+
+	while (orci != rci) {
+		idx = orci & op_mask;
+		op = qp->ops[idx];
+		sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
+		m_src = op->sym->m_src;
+		payload = rte_pktmbuf_mtod_offset(m_src, void *,
+						  op->sym->aead.data.offset);
+		/* Restore the IPsec memory. */
+		if (unlikely(sess->aad_len > MLX5_CRYPTO_GCM_IPSEC_IV_SIZE))
+			memmove(op->sym->aead.aad.data,
+				RTE_PTR_SUB(payload, sess->aad_len), sess->aad_len);
+		rte_memcpy(RTE_PTR_SUB(payload, MLX5_CRYPTO_GCM_IPSEC_IV_SIZE),
+			   &qp->ipsec_mem[idx], MLX5_CRYPTO_GCM_IPSEC_IV_SIZE);
+		orci++;
+	}
+}
+
+static uint16_t
+mlx5_crypto_gcm_ipsec_dequeue_burst(void *queue_pair,
+				    struct rte_crypto_op **ops,
+				    uint16_t nb_ops)
+{
+	struct mlx5_crypto_qp *qp = queue_pair;
+	volatile struct mlx5_cqe *restrict cqe;
+	const unsigned int cq_size = qp->cq_entries_n;
+	const unsigned int mask = cq_size - 1;
+	const unsigned int op_mask = qp->entries_n - 1;
+	uint32_t idx;
+	uint32_t next_idx = qp->cq_ci & mask;
+	uint16_t reported_ci = qp->reported_ci;
+	uint16_t qp_ci = qp->qp_ci;
+	const uint16_t max = RTE_MIN((uint16_t)(qp->pi - reported_ci), nb_ops);
+	uint16_t op_num = 0;
+	int ret;
+
+	if (unlikely(max == 0))
+		return 0;
+	while (qp_ci - reported_ci < max) {
+		idx = next_idx;
+		next_idx = (qp->cq_ci + 1) & mask;
+		cqe = &qp->cq_obj.cqes[idx];
+		ret = check_cqe(cqe, cq_size, qp->cq_ci);
+		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+			if (unlikely(ret != MLX5_CQE_STATUS_HW_OWN))
+				mlx5_crypto_gcm_cqe_err_handle(qp,
+						qp->ops[reported_ci & op_mask]);
+			break;
+		}
+		qp_ci = rte_be_to_cpu_16(cqe->wqe_counter) + 1;
+		qp->cq_ci++;
+	}
+	/* If wqe_counter changed, means CQE handled. */
+	if (likely(qp->qp_ci != qp_ci)) {
+		qp->qp_ci = qp_ci;
+		rte_io_wmb();
+		qp->cq_obj.db_rec[0] = rte_cpu_to_be_32(qp->cq_ci);
+	}
+	/* If reported_ci is not same with qp_ci, means op retrieved. */
+	if (qp_ci != reported_ci) {
+		op_num = RTE_MIN((uint16_t)(qp_ci - reported_ci), max);
+		reported_ci += op_num;
+		mlx5_crypto_gcm_restore_ipsec_mem(qp, qp->reported_ci, reported_ci, op_mask);
+		mlx5_crypto_gcm_fill_op(qp, ops, qp->reported_ci, reported_ci, op_mask);
+		qp->stats.dequeued_count += op_num;
+		qp->reported_ci = reported_ci;
+	}
+	return op_num;
+}
+
 int
 mlx5_crypto_gcm_init(struct mlx5_crypto_priv *priv)
 {
@@ -987,9 +1164,16 @@  mlx5_crypto_gcm_init(struct mlx5_crypto_priv *priv)
 	mlx5_os_set_reg_mr_cb(&priv->reg_mr_cb, &priv->dereg_mr_cb);
 	dev_ops->queue_pair_setup = mlx5_crypto_gcm_qp_setup;
 	dev_ops->queue_pair_release = mlx5_crypto_gcm_qp_release;
-	crypto_dev->dequeue_burst = mlx5_crypto_gcm_dequeue_burst;
-	crypto_dev->enqueue_burst = mlx5_crypto_gcm_enqueue_burst;
-	priv->max_klm_num = RTE_ALIGN((priv->max_segs_num + 1) * 2 + 1, MLX5_UMR_KLM_NUM_ALIGN);
+	if (mlx5_crypto_is_ipsec_opt(priv)) {
+		crypto_dev->dequeue_burst = mlx5_crypto_gcm_ipsec_dequeue_burst;
+		crypto_dev->enqueue_burst = mlx5_crypto_gcm_ipsec_enqueue_burst;
+		priv->max_klm_num = 0;
+	} else {
+		crypto_dev->dequeue_burst = mlx5_crypto_gcm_dequeue_burst;
+		crypto_dev->enqueue_burst = mlx5_crypto_gcm_enqueue_burst;
+		priv->max_klm_num = RTE_ALIGN((priv->max_segs_num + 1) * 2 + 1,
+					MLX5_UMR_KLM_NUM_ALIGN);
+	}
 	/* Generate GCM capability. */
 	ret = mlx5_crypto_generate_gcm_cap(&cdev->config.hca_attr.crypto_mmo,
 					   mlx5_crypto_gcm_caps);