[2/3] crypto/cnxk: improvements to fastpath handling

Message ID 20220620071807.951128-3-ktejasree@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: akhil goyal
Headers
Series support new full context firmware |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Tejasree Kondoj June 20, 2022, 7:18 a.m. UTC
  From: Anoob Joseph <anoobj@marvell.com>

Remove SA & packet accesses in dequeue path by adjusting the headers in
the enqueue path for outbound packets. For inbound packets, add extra
esn_en flag in the SA to minimize cache line accesses in the datapath.

Also, use seq_lo for IPID. IPID just need to be unique. Instead of
incrementing per packet, use ESN low bits.

Signed-off-by: Anoob Joseph <anoobj@marvell.com>
---
 drivers/crypto/cnxk/cn9k_cryptodev_ops.c | 69 ++++++++++++++----------
 drivers/crypto/cnxk/cn9k_ipsec.c         | 11 ++--
 drivers/crypto/cnxk/cn9k_ipsec.h         |  7 ++-
 drivers/crypto/cnxk/cn9k_ipsec_la_ops.h  | 55 +++++++++++--------
 drivers/crypto/cnxk/cnxk_cryptodev_ops.h | 12 ++---
 5 files changed, 87 insertions(+), 67 deletions(-)
  

Patch

diff --git a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
index 7720730120..8aab9c9f60 100644
--- a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
@@ -43,10 +43,12 @@  cn9k_cpt_sec_inst_fill(struct rte_crypto_op *op,
 		       struct cpt_inst_s *inst)
 {
 	struct rte_crypto_sym_op *sym_op = op->sym;
-	struct roc_ie_on_common_sa *common_sa;
 	struct cn9k_sec_session *priv;
-	struct roc_ie_on_sa_ctl *ctl;
 	struct cn9k_ipsec_sa *sa;
+	int ret;
+
+	priv = get_sec_session_private_data(op->sym->sec_session);
+	sa = &priv->sa;
 
 	if (unlikely(sym_op->m_dst && sym_op->m_dst != sym_op->m_src)) {
 		plt_dp_err("Out of place is not supported");
@@ -58,21 +60,17 @@  cn9k_cpt_sec_inst_fill(struct rte_crypto_op *op,
 		return -ENOTSUP;
 	}
 
-	priv = get_sec_session_private_data(op->sym->sec_session);
-	sa = &priv->sa;
-
 	if (sa->dir == RTE_SECURITY_IPSEC_SA_DIR_EGRESS)
-		return process_outb_sa(op, sa, inst);
-
-	infl_req->op_flags |= CPT_OP_FLAGS_IPSEC_DIR_INBOUND;
-
-	common_sa = &sa->in_sa.common_sa;
-	ctl = &common_sa->ctl;
-
-	if (ctl->esn_en)
-		infl_req->op_flags |= CPT_OP_FLAGS_IPSEC_INB_ESN;
+		ret = process_outb_sa(op, sa, inst);
+	else {
+		infl_req->op_flags |= CPT_OP_FLAGS_IPSEC_DIR_INBOUND;
+		process_inb_sa(op, sa, inst);
+		if (unlikely(sa->esn_en))
+			infl_req->op_flags |= CPT_OP_FLAGS_IPSEC_INB_ESN;
+		ret = 0;
+	}
 
-	return process_inb_sa(op, sa, inst);
+	return ret;
 }
 
 static inline struct cnxk_se_sess *
@@ -234,19 +232,29 @@  cn9k_cpt_enqueue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops)
 	};
 
 	pend_q = &qp->pend_q;
-
-	const uint64_t lmt_base = qp->lf.lmt_base;
-	const uint64_t io_addr = qp->lf.io_addr;
-	const uint64_t pq_mask = pend_q->pq_mask;
+	rte_prefetch2(pend_q);
 
 	/* Clear w0, w2, w3 of both inst */
 
+#if defined(RTE_ARCH_ARM64)
+	uint64x2_t zero = vdupq_n_u64(0);
+
+	vst1q_u64(&inst[0].w0.u64, zero);
+	vst1q_u64(&inst[1].w0.u64, zero);
+	vst1q_u64(&inst[0].w2.u64, zero);
+	vst1q_u64(&inst[1].w2.u64, zero);
+#else
 	inst[0].w0.u64 = 0;
 	inst[0].w2.u64 = 0;
 	inst[0].w3.u64 = 0;
 	inst[1].w0.u64 = 0;
 	inst[1].w2.u64 = 0;
 	inst[1].w3.u64 = 0;
+#endif
+
+	const uint64_t lmt_base = qp->lf.lmt_base;
+	const uint64_t io_addr = qp->lf.io_addr;
+	const uint64_t pq_mask = pend_q->pq_mask;
 
 	head = pend_q->head;
 	nb_allowed = pending_queue_free_cnt(head, pend_q->tail, pq_mask);
@@ -506,21 +514,26 @@  cn9k_cpt_sec_post_process(struct rte_crypto_op *cop,
 	uint16_t m_len = 0;
 	char *data;
 
-	priv = get_sec_session_private_data(cop->sym->sec_session);
-	sa = &priv->sa;
-
 	if (infl_req->op_flags & CPT_OP_FLAGS_IPSEC_DIR_INBOUND) {
-		struct roc_ie_on_common_sa *common_sa = &sa->in_sa.common_sa;
+		struct roc_ie_on_common_sa *common_sa;
 
 		data = rte_pktmbuf_mtod(m, char *);
-		if (infl_req->op_flags == CPT_OP_FLAGS_IPSEC_INB_ESN) {
-			struct roc_ie_on_inb_hdr *inb_hdr =
-				(struct roc_ie_on_inb_hdr *)data;
-			uint64_t seq = rte_be_to_cpu_64(inb_hdr->seq);
+		if (unlikely(infl_req->op_flags & CPT_OP_FLAGS_IPSEC_INB_ESN)) {
+			struct roc_ie_on_inb_hdr *inb_hdr;
+			uint64_t seq;
+
+			priv = get_sec_session_private_data(
+				sym_op->sec_session);
+			sa = &priv->sa;
+			common_sa = &sa->in_sa.common_sa;
+
+			inb_hdr = (struct roc_ie_on_inb_hdr *)data;
+			seq = rte_be_to_cpu_64(inb_hdr->seq);
 
 			if (seq > common_sa->seq_t.u64)
 				common_sa->seq_t.u64 = seq;
 		}
+
 		ip = (struct rte_ipv4_hdr *)(data + ROC_IE_ON_INB_RPTR_HDR);
 
 		if (((ip->version_ihl & 0xf0) >> RTE_IPV4_IHL_MULTIPLIER) ==
@@ -537,8 +550,6 @@  cn9k_cpt_sec_post_process(struct rte_crypto_op *cop,
 		m->data_len = m_len;
 		m->pkt_len = m_len;
 		m->data_off += ROC_IE_ON_INB_RPTR_HDR;
-	} else {
-		rte_pktmbuf_adj(m, sa->custom_hdr_len);
 	}
 }
 
diff --git a/drivers/crypto/cnxk/cn9k_ipsec.c b/drivers/crypto/cnxk/cn9k_ipsec.c
index 85f3f26c32..49a775eb7f 100644
--- a/drivers/crypto/cnxk/cn9k_ipsec.c
+++ b/drivers/crypto/cnxk/cn9k_ipsec.c
@@ -40,13 +40,8 @@  cn9k_ipsec_outb_sa_create(struct cnxk_cpt_qp *qp,
 
 	/* Initialize lookaside IPsec private data */
 	sa->dir = RTE_SECURITY_IPSEC_SA_DIR_EGRESS;
-	/* Start ip id from 1 */
-	sa->ip_id = 1;
-	sa->seq_lo = 1;
-	sa->seq_hi = 0;
 
-	if (ipsec->esn.value)
-		sa->esn = ipsec->esn.value;
+	sa->esn = ipsec->esn.value;
 
 	ret = cnxk_ipsec_outb_rlens_get(&sa->rlens, ipsec, crypto_xform);
 	if (ret)
@@ -166,10 +161,12 @@  cn9k_ipsec_inb_sa_create(struct cnxk_cpt_qp *qp,
 	}
 
 	ret = cnxk_on_ipsec_inb_sa_create(ipsec, crypto_xform, &sa->in_sa);
-
 	if (ret < 0)
 		return ret;
 
+	if (sa->in_sa.common_sa.ctl.esn_en)
+		sa->esn_en = 1;
+
 	ctx_len = ret;
 	opcode = ROC_IE_ON_MAJOR_OP_WRITE_IPSEC_INBOUND;
 	egrp = roc_cpt->eng_grp[CPT_ENG_TYPE_IE];
diff --git a/drivers/crypto/cnxk/cn9k_ipsec.h b/drivers/crypto/cnxk/cn9k_ipsec.h
index 499dbc2782..bed5976096 100644
--- a/drivers/crypto/cnxk/cn9k_ipsec.h
+++ b/drivers/crypto/cnxk/cn9k_ipsec.h
@@ -28,8 +28,6 @@  struct cn9k_ipsec_sa {
 	uint8_t custom_hdr_len;
 	/** Response length calculation data */
 	struct cnxk_ipsec_outb_rlens rlens;
-	/** Outbound IP-ID */
-	uint16_t ip_id;
 	/** ESN */
 	union {
 		uint64_t esn;
@@ -42,6 +40,11 @@  struct cn9k_ipsec_sa {
 	struct cnxk_on_ipsec_ar ar;
 	/** Anti replay window size */
 	uint32_t replay_win_sz;
+	/*
+	 * ESN enable flag. Copy of in_sa ctl.esn_en to have single cache line
+	 * access in the non-esn fastpath.
+	 */
+	uint8_t esn_en;
 	/** Queue pair */
 	struct cnxk_cpt_qp *qp;
 };
diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
index bbb4404a89..65dbb629b1 100644
--- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
+++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h
@@ -77,29 +77,36 @@  process_outb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa,
 	const unsigned int hdr_len = sa->custom_hdr_len;
 	struct rte_crypto_sym_op *sym_op = cop->sym;
 	struct rte_mbuf *m_src = sym_op->m_src;
+	uint32_t dlen, rlen, pkt_len, seq_lo;
+	uint16_t data_off = m_src->data_off;
 	struct roc_ie_on_outb_hdr *hdr;
-	uint32_t dlen, rlen;
 	int32_t extend_tail;
+	uint64_t esn;
 
-	dlen = rte_pktmbuf_pkt_len(m_src) + hdr_len;
-	rlen = ipsec_po_out_rlen_get(sa, dlen - hdr_len);
+	pkt_len = rte_pktmbuf_pkt_len(m_src);
+	dlen = pkt_len + hdr_len;
+	rlen = ipsec_po_out_rlen_get(sa, pkt_len);
 
 	extend_tail = rlen - dlen;
 	if (unlikely(extend_tail > rte_pktmbuf_tailroom(m_src))) {
-		plt_dp_err("Not enough tail room (required: %d, available: %d",
+		plt_dp_err("Not enough tail room (required: %d, available: %d)",
 			   extend_tail, rte_pktmbuf_tailroom(m_src));
 		return -ENOMEM;
 	}
 
-	m_src->data_len += extend_tail;
-	m_src->pkt_len += extend_tail;
-
-	hdr = (struct roc_ie_on_outb_hdr *)rte_pktmbuf_prepend(m_src, hdr_len);
-	if (unlikely(hdr == NULL)) {
-		plt_dp_err("Not enough head room");
+	if (unlikely(hdr_len > data_off)) {
+		plt_dp_err("Not enough head room (required: %d, available: %d)",
+			   hdr_len, rte_pktmbuf_headroom(m_src));
 		return -ENOMEM;
 	}
 
+	pkt_len += extend_tail;
+
+	m_src->data_len = pkt_len;
+	m_src->pkt_len = pkt_len;
+
+	hdr = PLT_PTR_ADD(m_src->buf_addr, data_off - hdr_len);
+
 #ifdef LA_IPSEC_DEBUG
 	if (sa->inst.w4 & ROC_IE_ON_PER_PKT_IV) {
 		memcpy(&hdr->iv[0],
@@ -109,23 +116,28 @@  process_outb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa,
 	}
 #endif
 
-	hdr->seq = rte_cpu_to_be_32(sa->seq_lo);
-	hdr->ip_id = rte_cpu_to_be_32(sa->ip_id);
-	hdr->esn = rte_cpu_to_be_32(sa->seq_hi);
+	esn = ++sa->esn;
+
+	/* Set ESN seq hi */
+	hdr->esn = rte_cpu_to_be_32(esn >> 32);
 
-	sa->ip_id++;
-	sa->esn++;
+	/* Set ESN seq lo */
+	seq_lo = rte_cpu_to_be_32(esn & (BIT_ULL(32) - 1));
+	hdr->seq = seq_lo;
+
+	/* Set IPID same as seq_lo */
+	hdr->ip_id = seq_lo;
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | dlen;
-	inst->dptr = rte_pktmbuf_iova(m_src);
-	inst->rptr = inst->dptr;
+	inst->dptr = PLT_U64_CAST(hdr);
+	inst->rptr = PLT_U64_CAST(hdr);
 	inst->w7.u64 = sa->inst.w7;
 
 	return 0;
 }
 
-static __rte_always_inline int
+static __rte_always_inline void
 process_inb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa,
 	       struct cpt_inst_s *inst)
 {
@@ -149,16 +161,13 @@  process_inb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa,
 			inst->dptr = rte_pktmbuf_iova(m_src);
 			inst->rptr = inst->dptr;
 			inst->w7.u64 = sa->inst.w7;
-			return 0;
+			return;
 		}
 	}
 
 	/* Prepare CPT instruction */
 	inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src);
-	inst->dptr = rte_pktmbuf_iova(m_src);
-	inst->rptr = inst->dptr;
+	inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src);
 	inst->w7.u64 = sa->inst.w7;
-
-	return 0;
 }
 #endif /* __CN9K_IPSEC_LA_OPS_H__ */
diff --git a/drivers/crypto/cnxk/cnxk_cryptodev_ops.h b/drivers/crypto/cnxk/cnxk_cryptodev_ops.h
index ec99e6d660..0b41d47de9 100644
--- a/drivers/crypto/cnxk/cnxk_cryptodev_ops.h
+++ b/drivers/crypto/cnxk/cnxk_cryptodev_ops.h
@@ -70,16 +70,16 @@  struct cnxk_cpt_qp {
 	/**< Crypto LF */
 	struct pending_queue pend_q;
 	/**< Pending queue */
-	struct rte_mempool *sess_mp;
-	/**< Session mempool */
-	struct rte_mempool *sess_mp_priv;
-	/**< Session private data mempool */
-	struct cpt_qp_meta_info meta_info;
-	/**< Metabuf info required to support operations on the queue pair */
 	struct roc_cpt_lmtline lmtline;
 	/**< Lmtline information */
+	struct cpt_qp_meta_info meta_info;
+	/**< Metabuf info required to support operations on the queue pair */
 	struct crypto_adpter_info ca;
 	/**< Crypto adapter related info */
+	struct rte_mempool *sess_mp;
+	/**< Session mempool */
+	struct rte_mempool *sess_mp_priv;
+	/**< Session private data mempool */
 };
 
 int cnxk_cpt_dev_config(struct rte_cryptodev *dev,