From patchwork Mon Jun 24 06:24:01 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Aakash Sasidharan X-Patchwork-Id: 141523 X-Patchwork-Delegate: gakhil@marvell.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id D5B46454A7; Mon, 24 Jun 2024 08:33:05 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id EB5EA40E37; Mon, 24 Jun 2024 08:31:47 +0200 (CEST) Received: from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com [67.231.156.173]) by mails.dpdk.org (Postfix) with ESMTP id E5BB24027B for ; Mon, 24 Jun 2024 08:24:48 +0200 (CEST) Received: from pps.filterd (m0045851.ppops.net [127.0.0.1]) by mx0b-0016f401.pphosted.com (8.18.1.2/8.18.1.2) with ESMTP id 45NLvS3X026147 for ; Sun, 23 Jun 2024 23:24:48 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h= cc:content-transfer-encoding:content-type:date:from:in-reply-to :message-id:mime-version:references:subject:to; s=pfpt0220; bh=9 yfN+hLBJYnaaTIgl7UXakJLvU//mtLgEt0nI0xotn8=; b=gVo1ovjWBqEFvUgS2 /w/b8hRu6pqolj+4EiSgB+s757eR0CZDmoaYaavuoAFNHUF5JAm0LC+oBQCs3Hxj Cp2qXzJNNZBhftLDs4MHpxr+6s0lYMizLU8dcxdPczpMOHXoSUIJSKWFKduynmUH zWI1PqVCfSYzeUIcfylg08Akd2q8/7RMQ9vTkBkb/xMucrvvn215keZu0LGP7hs3 s08wUTIM3QloDKEr4uiLfZSXIb3pjrhIpRNwpPJYY1Q0wT0ilFRISfY/ci1ECizv QWTaRVWrKCTAKnMe0HebigF7XF20h9lP+e0BCQXXTiTm7N2EUj8uGBNorfkNwEtf MQwTw== Received: from dc5-exch05.marvell.com ([199.233.59.128]) by mx0b-0016f401.pphosted.com (PPS) with ESMTPS id 3ywx4gbp7x-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=NOT) for ; Sun, 23 Jun 2024 23:24:47 -0700 (PDT) Received: from DC5-EXCH05.marvell.com (10.69.176.209) by DC5-EXCH05.marvell.com (10.69.176.209) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.4; Sun, 23 Jun 2024 23:24:46 -0700 Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH05.marvell.com (10.69.176.209) with Microsoft SMTP Server id 15.2.1544.4 via Frontend Transport; Sun, 23 Jun 2024 23:24:46 -0700 Received: from localhost.localdomain (unknown [10.28.36.177]) by maili.marvell.com (Postfix) with ESMTP id 329DA3F7040; Sun, 23 Jun 2024 23:24:41 -0700 (PDT) From: Aakash Sasidharan To: Nithin Dabilpuram , Kiran Kumar K , Sunil Kumar Kori , Satha Rao , Harman Kalra , Ankur Dwivedi , Anoob Joseph , Tejasree Kondoj , Pavan Nikhilesh , "Shijith Thotton" CC: , , , , Subject: [PATCH v2 12/12] crypto/cnxk: enable dual submission to CPT Date: Mon, 24 Jun 2024 11:54:01 +0530 Message-ID: <20240624062401.4143606-13-asasidharan@marvell.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20240624062401.4143606-1-asasidharan@marvell.com> References: <20240620145848.3461844-1-asasidharan@marvell.com> <20240624062401.4143606-1-asasidharan@marvell.com> MIME-Version: 1.0 X-Proofpoint-GUID: Gd4O3OCFwKr_C8yBMQfxwdFImPi-sgCj X-Proofpoint-ORIG-GUID: Gd4O3OCFwKr_C8yBMQfxwdFImPi-sgCj X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.293,Aquarius:18.0.1039,Hydra:6.0.680,FMLib:17.12.28.16 definitions=2024-06-24_05,2024-06-21_01,2024-05-17_01 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org From: Anoob Joseph Submit two instructions in one LMTLINE. Signed-off-by: Anoob Joseph --- drivers/common/cnxk/roc_cpt.c | 17 +- drivers/common/cnxk/roc_cpt.h | 8 +- drivers/crypto/cnxk/cn10k_cryptodev_ops.c | 182 +++++----------------- drivers/crypto/cnxk/cn10k_cryptodev_ops.h | 60 ++++++- drivers/crypto/cnxk/cnxk_cryptodev_ops.c | 47 ++---- drivers/crypto/cnxk/cnxk_cryptodev_ops.h | 2 + drivers/event/cnxk/cnxk_eventdev_adptr.c | 4 +- 7 files changed, 124 insertions(+), 196 deletions(-) diff --git a/drivers/common/cnxk/roc_cpt.c b/drivers/common/cnxk/roc_cpt.c index 9f283ceb2e..aba2a49d19 100644 --- a/drivers/common/cnxk/roc_cpt.c +++ b/drivers/common/cnxk/roc_cpt.c @@ -1135,8 +1135,8 @@ roc_cpt_iq_enable(struct roc_cpt_lf *lf) } int -roc_cpt_lmtline_init(struct roc_cpt *roc_cpt, struct roc_cpt_lmtline *lmtline, - int lf_id) +roc_cpt_lmtline_init(struct roc_cpt *roc_cpt, struct roc_cpt_lmtline *lmtline, int lf_id, + bool is_dual) { struct roc_cpt_lf *lf; @@ -1145,12 +1145,19 @@ roc_cpt_lmtline_init(struct roc_cpt *roc_cpt, struct roc_cpt_lmtline *lmtline, return -ENOTSUP; lmtline->io_addr = lf->io_addr; - if (roc_model_is_cn10k()) - lmtline->io_addr |= ROC_CN10K_CPT_INST_DW_M1 << 4; + lmtline->fc_thresh = lf->nb_desc - CPT_LF_FC_MIN_THRESHOLD; + + if (roc_model_is_cn10k()) { + if (is_dual) { + lmtline->io_addr |= ROC_CN10K_TWO_CPT_INST_DW_M1 << 4; + lmtline->fc_thresh = lf->nb_desc - 2 * CPT_LF_FC_MIN_THRESHOLD; + } else { + lmtline->io_addr |= ROC_CN10K_CPT_INST_DW_M1 << 4; + } + } lmtline->fc_addr = lf->fc_addr; lmtline->lmt_base = lf->lmt_base; - lmtline->fc_thresh = lf->nb_desc - CPT_LF_FC_MIN_THRESHOLD; return 0; } diff --git a/drivers/common/cnxk/roc_cpt.h b/drivers/common/cnxk/roc_cpt.h index 8ef9062ae0..e2e919f80f 100644 --- a/drivers/common/cnxk/roc_cpt.h +++ b/drivers/common/cnxk/roc_cpt.h @@ -200,12 +200,12 @@ int __roc_api roc_cpt_afs_print(struct roc_cpt *roc_cpt); int __roc_api roc_cpt_lfs_print(struct roc_cpt *roc_cpt); void __roc_api roc_cpt_iq_disable(struct roc_cpt_lf *lf); void __roc_api roc_cpt_iq_enable(struct roc_cpt_lf *lf); -int __roc_api roc_cpt_lmtline_init(struct roc_cpt *roc_cpt, - struct roc_cpt_lmtline *lmtline, int lf_id); +int __roc_api roc_cpt_lmtline_init(struct roc_cpt *roc_cpt, struct roc_cpt_lmtline *lmtline, + int lf_id, bool is_dual); void __roc_api roc_cpt_parse_hdr_dump(FILE *file, const struct cpt_parse_hdr_s *cpth); -int __roc_api roc_cpt_ctx_write(struct roc_cpt_lf *lf, void *sa_dptr, - void *sa_cptr, uint16_t sa_len); +int __roc_api roc_cpt_ctx_write(struct roc_cpt_lf *lf, void *sa_dptr, void *sa_cptr, + uint16_t sa_len); void __roc_api roc_cpt_int_misc_cb_register(roc_cpt_int_misc_cb_t cb, void *args); int __roc_api roc_cpt_int_misc_cb_unregister(roc_cpt_int_misc_cb_t cb, void *args); diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c index 0d5a9ab5ef..9d6ac06bd2 100644 --- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c +++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c @@ -12,11 +12,6 @@ #include #include "roc_cpt.h" -#if defined(__aarch64__) -#include "roc_io.h" -#else -#include "roc_io_generic.h" -#endif #include "roc_idev.h" #include "roc_sso.h" #include "roc_sso_dp.h" @@ -40,8 +35,8 @@ /* Holds information required to send crypto operations in one burst */ struct ops_burst { - struct rte_crypto_op *op[CN10K_PKTS_PER_LOOP]; - uint64_t w2[CN10K_PKTS_PER_LOOP]; + struct rte_crypto_op *op[CN10K_CPT_PKTS_PER_LOOP]; + uint64_t w2[CN10K_CPT_PKTS_PER_LOOP]; struct cn10k_sso_hws *ws; struct cnxk_cpt_qp *qp; uint16_t nb_ops; @@ -55,54 +50,6 @@ struct vec_request { uint64_t w2; }; -static __rte_always_inline void __rte_hot -cn10k_cpt_lmtst_dual_submit(uint64_t *io_addr, const uint16_t lmt_id, int *i) -{ - uint64_t lmt_arg; - - /* Check if the total number of instructions is odd or even. */ - const int flag_odd = *i & 0x1; - - /* Reduce i by 1 when odd number of instructions.*/ - *i -= flag_odd; - - if (*i > 2 * CN10K_PKTS_PER_STEORL) { - lmt_arg = ROC_CN10K_DUAL_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, *io_addr); - lmt_arg = ROC_CN10K_DUAL_CPT_LMT_ARG | (*i / 2 - CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL); - roc_lmt_submit_steorl(lmt_arg, *io_addr); - if (flag_odd) { - *io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) | - (ROC_CN10K_CPT_INST_DW_M1 << 4); - lmt_arg = (uint64_t)(lmt_id + *i / 2); - roc_lmt_submit_steorl(lmt_arg, *io_addr); - *io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) | - (ROC_CN10K_TWO_CPT_INST_DW_M1 << 4); - *i += 1; - } - } else { - if (*i != 0) { - lmt_arg = - ROC_CN10K_DUAL_CPT_LMT_ARG | (*i / 2 - 1) << 12 | (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, *io_addr); - } - - if (flag_odd) { - *io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) | - (ROC_CN10K_CPT_INST_DW_M1 << 4); - lmt_arg = (uint64_t)(lmt_id + *i / 2); - roc_lmt_submit_steorl(lmt_arg, *io_addr); - *io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) | - (ROC_CN10K_TWO_CPT_INST_DW_M1 << 4); - *i += 1; - } - } - - rte_io_wmb(); -} - static inline struct cnxk_se_sess * cn10k_cpt_sym_temp_sess_create(struct cnxk_cpt_qp *qp, struct rte_crypto_op *op) { @@ -385,8 +332,8 @@ static uint16_t cn10k_cpt_enqueue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops, const bool is_sg_ver2) { - uint64_t lmt_base, lmt_arg, io_addr; struct cpt_inflight_req *infl_req; + uint64_t head, lmt_base, io_addr; uint16_t nb_allowed, count = 0; struct cnxk_cpt_qp *qp = qptr; struct pending_queue *pend_q; @@ -394,7 +341,6 @@ cn10k_cpt_enqueue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops, union cpt_fc_write_s fc; uint64_t *fc_addr; uint16_t lmt_id; - uint64_t head; int ret, i; pend_q = &qp->pend_q; @@ -424,11 +370,11 @@ cn10k_cpt_enqueue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops, goto pend_q_commit; } - for (i = 0; i < RTE_MIN(CN10K_PKTS_PER_LOOP, nb_ops); i++) { + for (i = 0; i < RTE_MIN(CN10K_CPT_PKTS_PER_LOOP, nb_ops); i++) { infl_req = &pend_q->req_queue[head]; infl_req->op_flags = 0; - ret = cn10k_cpt_fill_inst(qp, ops + i, &inst[2 * i], infl_req, is_sg_ver2); + ret = cn10k_cpt_fill_inst(qp, ops + i, &inst[i], infl_req, is_sg_ver2); if (unlikely(ret != 1)) { plt_dp_err("Could not process op: %p", ops + i); if (i == 0) @@ -439,24 +385,12 @@ cn10k_cpt_enqueue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops, pending_queue_advance(&head, pq_mask); } - if (i > CN10K_PKTS_PER_STEORL) { - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL); - roc_lmt_submit_steorl(lmt_arg, io_addr); - } else { - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - } - - rte_io_wmb(); + cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i); - if (nb_ops - i > 0 && i == CN10K_PKTS_PER_LOOP) { - nb_ops -= i; - ops += i; - count += i; + if (nb_ops - i > 0 && i == CN10K_CPT_PKTS_PER_LOOP) { + nb_ops -= CN10K_CPT_PKTS_PER_LOOP; + ops += CN10K_CPT_PKTS_PER_LOOP; + count += CN10K_CPT_PKTS_PER_LOOP; goto again; } @@ -631,7 +565,7 @@ cn10k_cpt_vec_pkt_submission_timeout_handle(void) static inline void cn10k_cpt_vec_submit(struct vec_request vec_tbl[], uint16_t vec_tbl_len, struct cnxk_cpt_qp *qp) { - uint64_t lmt_base, lmt_arg, lmt_id, io_addr; + uint64_t lmt_base, lmt_id, io_addr; union cpt_fc_write_s fc; struct cpt_inst_s *inst; uint16_t burst_size; @@ -659,7 +593,7 @@ cn10k_cpt_vec_submit(struct vec_request vec_tbl[], uint16_t vec_tbl_len, struct again: burst_size = RTE_MIN(CN10K_PKTS_PER_STEORL, vec_tbl_len); for (i = 0; i < burst_size; i++) - cn10k_cpt_vec_inst_fill(&vec_tbl[i], &inst[i * 2], qp, vec_tbl[0].w7); + cn10k_cpt_vec_inst_fill(&vec_tbl[i], &inst[i], qp, vec_tbl[0].w7); do { fc.u64[0] = __atomic_load_n(fc_addr, __ATOMIC_RELAXED); @@ -669,10 +603,7 @@ cn10k_cpt_vec_submit(struct vec_request vec_tbl[], uint16_t vec_tbl_len, struct cn10k_cpt_vec_pkt_submission_timeout_handle(); } while (true); - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - - rte_io_wmb(); + cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i); vec_tbl_len -= i; @@ -686,12 +617,12 @@ static inline int ca_lmtst_vec_submit(struct ops_burst *burst, struct vec_request vec_tbl[], uint16_t *vec_tbl_len, const bool is_sg_ver2) { - struct cpt_inflight_req *infl_reqs[CN10K_PKTS_PER_LOOP]; - uint64_t lmt_base, lmt_arg, io_addr; + struct cpt_inflight_req *infl_reqs[CN10K_CPT_PKTS_PER_LOOP]; uint16_t lmt_id, len = *vec_tbl_len; struct cpt_inst_s *inst, *inst_base; struct cpt_inflight_req *infl_req; struct rte_event_vector *vec; + uint64_t lmt_base, io_addr; union cpt_fc_write_s fc; struct cnxk_cpt_qp *qp; uint64_t *fc_addr; @@ -728,7 +659,7 @@ ca_lmtst_vec_submit(struct ops_burst *burst, struct vec_request vec_tbl[], uint1 } for (i = 0; i < burst->nb_ops; i++) { - inst = &inst_base[2 * i]; + inst = &inst_base[i]; infl_req = infl_reqs[i]; infl_req->op_flags = 0; @@ -788,24 +719,12 @@ next_op:; if (CNXK_TT_FROM_TAG(burst->ws->gw_rdata) == SSO_TT_ORDERED) roc_sso_hws_head_wait(burst->ws->base); - if (i > CN10K_PKTS_PER_STEORL) { - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL); - roc_lmt_submit_steorl(lmt_arg, io_addr); - } else { - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - } + cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i); /* Store w7 of last successfully filled instruction */ inst = &inst_base[2 * (i - 1)]; vec_tbl[0].w7 = inst->w7; - rte_io_wmb(); - put: if (i != burst->nb_ops) rte_mempool_put_bulk(qp->ca.req_mp, (void *)&infl_reqs[i], burst->nb_ops - i); @@ -818,10 +737,10 @@ next_op:; static inline uint16_t ca_lmtst_burst_submit(struct ops_burst *burst, const bool is_sg_ver2) { - struct cpt_inflight_req *infl_reqs[CN10K_PKTS_PER_LOOP]; - uint64_t lmt_base, lmt_arg, io_addr; + struct cpt_inflight_req *infl_reqs[CN10K_CPT_PKTS_PER_LOOP]; struct cpt_inst_s *inst, *inst_base; struct cpt_inflight_req *infl_req; + uint64_t lmt_base, io_addr; union cpt_fc_write_s fc; struct cnxk_cpt_qp *qp; uint64_t *fc_addr; @@ -852,7 +771,7 @@ ca_lmtst_burst_submit(struct ops_burst *burst, const bool is_sg_ver2) } for (i = 0; i < burst->nb_ops; i++) { - inst = &inst_base[2 * i]; + inst = &inst_base[i]; infl_req = infl_reqs[i]; infl_req->op_flags = 0; @@ -889,19 +808,7 @@ ca_lmtst_burst_submit(struct ops_burst *burst, const bool is_sg_ver2) if (CNXK_TT_FROM_TAG(burst->ws->gw_rdata) == SSO_TT_ORDERED) roc_sso_hws_head_wait(burst->ws->base); - if (i > CN10K_PKTS_PER_STEORL) { - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL); - roc_lmt_submit_steorl(lmt_arg, io_addr); - } else { - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - } - - rte_io_wmb(); + cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i); put: if (unlikely(i != burst->nb_ops)) @@ -963,7 +870,7 @@ cn10k_cpt_crypto_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_ev burst.op[burst.nb_ops] = op; /* Max nb_ops per burst check */ - if (++burst.nb_ops == CN10K_PKTS_PER_LOOP) { + if (++burst.nb_ops == CN10K_CPT_PKTS_PER_LOOP) { if (is_vector) submitted = ca_lmtst_vec_submit(&burst, vec_tbl, &vec_tbl_len, is_sg_ver2); @@ -1460,8 +1367,6 @@ cn10k_cryptodev_sec_inb_rx_inject(void *dev, struct rte_mbuf **pkts, vf = cdev->data->dev_private; - const int nb_pkts_per_loop = 2 * CN10K_PKTS_PER_LOOP; - lmt_base = vf->rx_inj_lmtline.lmt_base; io_addr = vf->rx_inj_lmtline.io_addr; fc_addr = vf->rx_inj_lmtline.fc_addr; @@ -1481,7 +1386,7 @@ cn10k_cryptodev_sec_inb_rx_inject(void *dev, struct rte_mbuf **pkts, if (unlikely(fc.s.qsize > fc_thresh)) goto exit; - for (; i < RTE_MIN(nb_pkts_per_loop, nb_pkts); i++) { + for (; i < RTE_MIN(CN10K_CPT_PKTS_PER_LOOP, nb_pkts); i++) { m = pkts[i]; sec_sess = (struct cn10k_sec_session *)sess[i]; @@ -1542,11 +1447,11 @@ cn10k_cryptodev_sec_inb_rx_inject(void *dev, struct rte_mbuf **pkts, cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i); - if (nb_pkts - i > 0 && i == nb_pkts_per_loop) { - nb_pkts -= nb_pkts_per_loop; - pkts += nb_pkts_per_loop; - count += nb_pkts_per_loop; - sess += nb_pkts_per_loop; + if (nb_pkts - i > 0 && i == CN10K_CPT_PKTS_PER_LOOP) { + nb_pkts -= CN10K_CPT_PKTS_PER_LOOP; + pkts += CN10K_CPT_PKTS_PER_LOOP; + count += CN10K_CPT_PKTS_PER_LOOP; + sess += CN10K_CPT_PKTS_PER_LOOP; goto again; } @@ -1645,8 +1550,8 @@ cn10k_cpt_raw_enqueue_burst(void *qpair, uint8_t *drv_ctx, struct rte_crypto_sym const bool is_sgv2) { uint16_t lmt_id, nb_allowed, nb_ops = vec->num; - uint64_t lmt_base, lmt_arg, io_addr, head; struct cpt_inflight_req *infl_req; + uint64_t lmt_base, io_addr, head; struct cnxk_cpt_qp *qp = qpair; struct cnxk_sym_dp_ctx *dp_ctx; struct pending_queue *pend_q; @@ -1683,7 +1588,7 @@ cn10k_cpt_raw_enqueue_burst(void *qpair, uint8_t *drv_ctx, struct rte_crypto_sym goto pend_q_commit; } - for (i = 0; i < RTE_MIN(CN10K_PKTS_PER_LOOP, nb_ops); i++) { + for (i = 0; i < RTE_MIN(CN10K_CPT_PKTS_PER_LOOP, nb_ops); i++) { struct cnxk_iov iov; index = count + i; @@ -1691,7 +1596,7 @@ cn10k_cpt_raw_enqueue_burst(void *qpair, uint8_t *drv_ctx, struct rte_crypto_sym infl_req->op_flags = 0; cnxk_raw_burst_to_iov(vec, &ofs, index, &iov); - ret = cn10k_cpt_raw_fill_inst(&iov, qp, dp_ctx, &inst[2 * i], infl_req, + ret = cn10k_cpt_raw_fill_inst(&iov, qp, dp_ctx, &inst[i], infl_req, user_data[index], is_sgv2); if (unlikely(ret != 1)) { plt_dp_err("Could not process vec: %d", index); @@ -1705,21 +1610,9 @@ cn10k_cpt_raw_enqueue_burst(void *qpair, uint8_t *drv_ctx, struct rte_crypto_sym pending_queue_advance(&head, pq_mask); } - if (i > CN10K_PKTS_PER_STEORL) { - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL); - roc_lmt_submit_steorl(lmt_arg, io_addr); - } else { - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - } - - rte_io_wmb(); + cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i); - if (nb_ops - i > 0 && i == CN10K_PKTS_PER_LOOP) { + if (nb_ops - i > 0 && i == CN10K_CPT_PKTS_PER_LOOP) { nb_ops -= i; count += i; goto again; @@ -1760,8 +1653,8 @@ cn10k_cpt_raw_enqueue(void *qpair, uint8_t *drv_ctx, struct rte_crypto_vec *data struct rte_crypto_va_iova_ptr *aad_or_auth_iv, void *user_data, const bool is_sgv2) { - uint64_t lmt_base, lmt_arg, io_addr, head; struct cpt_inflight_req *infl_req; + uint64_t lmt_base, io_addr, head; struct cnxk_cpt_qp *qp = qpair; struct cnxk_sym_dp_ctx *dp_ctx; uint16_t lmt_id, nb_allowed; @@ -1769,7 +1662,7 @@ cn10k_cpt_raw_enqueue(void *qpair, uint8_t *drv_ctx, struct rte_crypto_vec *data union cpt_fc_write_s fc; struct cnxk_iov iov; uint64_t *fc_addr; - int ret; + int ret, i = 1; struct pending_queue *pend_q = &qp->pend_q; const uint64_t pq_mask = pend_q->pq_mask; @@ -1806,10 +1699,7 @@ cn10k_cpt_raw_enqueue(void *qpair, uint8_t *drv_ctx, struct rte_crypto_vec *data pending_queue_advance(&head, pq_mask); - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - - rte_io_wmb(); + cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i); pend_q->head = head; pend_q->time_out = rte_get_timer_cycles() + DEFAULT_COMMAND_TIMEOUT * rte_get_timer_hz(); diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.h b/drivers/crypto/cnxk/cn10k_cryptodev_ops.h index 406c4abc7f..be76c49a65 100644 --- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.h +++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.h @@ -5,15 +5,21 @@ #ifndef _CN10K_CRYPTODEV_OPS_H_ #define _CN10K_CRYPTODEV_OPS_H_ -#include #include +#include #include #include +#if defined(__aarch64__) +#include "roc_io.h" +#else +#include "roc_io_generic.h" +#endif + #include "cnxk_cryptodev.h" -#define CN10K_PKTS_PER_LOOP 32 -#define CN10K_PKTS_PER_STEORL 16 +#define CN10K_PKTS_PER_STEORL 32 +#define CN10K_LMTLINES_PER_STEORL 16 extern struct rte_cryptodev_ops cn10k_cpt_ops; @@ -34,4 +40,52 @@ __rte_internal uint16_t __rte_hot cn10k_cpt_sg_ver2_crypto_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events); +static __rte_always_inline void __rte_hot +cn10k_cpt_lmtst_dual_submit(uint64_t *io_addr, const uint16_t lmt_id, int *i) +{ + uint64_t lmt_arg; + + /* Check if the total number of instructions is odd or even. */ + const int flag_odd = *i & 0x1; + + /* Reduce i by 1 when odd number of instructions.*/ + *i -= flag_odd; + + if (*i > CN10K_PKTS_PER_STEORL) { + lmt_arg = ROC_CN10K_DUAL_CPT_LMT_ARG | (CN10K_LMTLINES_PER_STEORL - 1) << 12 | + (uint64_t)lmt_id; + roc_lmt_submit_steorl(lmt_arg, *io_addr); + lmt_arg = ROC_CN10K_DUAL_CPT_LMT_ARG | + (*i / 2 - CN10K_LMTLINES_PER_STEORL - 1) << 12 | + (uint64_t)(lmt_id + CN10K_LMTLINES_PER_STEORL); + roc_lmt_submit_steorl(lmt_arg, *io_addr); + if (flag_odd) { + *io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) | + (ROC_CN10K_CPT_INST_DW_M1 << 4); + lmt_arg = (uint64_t)(lmt_id + *i / 2); + roc_lmt_submit_steorl(lmt_arg, *io_addr); + *io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) | + (ROC_CN10K_TWO_CPT_INST_DW_M1 << 4); + *i += 1; + } + } else { + if (*i != 0) { + lmt_arg = + ROC_CN10K_DUAL_CPT_LMT_ARG | (*i / 2 - 1) << 12 | (uint64_t)lmt_id; + roc_lmt_submit_steorl(lmt_arg, *io_addr); + } + + if (flag_odd) { + *io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) | + (ROC_CN10K_CPT_INST_DW_M1 << 4); + lmt_arg = (uint64_t)(lmt_id + *i / 2); + roc_lmt_submit_steorl(lmt_arg, *io_addr); + *io_addr = (*io_addr & ~(uint64_t)(0x7 << 4)) | + (ROC_CN10K_TWO_CPT_INST_DW_M1 << 4); + *i += 1; + } + } + + rte_io_wmb(); +} #endif /* _CN10K_CRYPTODEV_OPS_H_ */ diff --git a/drivers/crypto/cnxk/cnxk_cryptodev_ops.c b/drivers/crypto/cnxk/cnxk_cryptodev_ops.c index 6acaa4413b..cfcfa79fdf 100644 --- a/drivers/crypto/cnxk/cnxk_cryptodev_ops.c +++ b/drivers/crypto/cnxk/cnxk_cryptodev_ops.c @@ -431,7 +431,6 @@ cnxk_cpt_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id, struct rte_pci_device *pci_dev; struct cnxk_cpt_qp *qp; uint32_t nb_desc; - uint64_t io_addr; int ret; if (dev->data->queue_pairs[qp_id] != NULL) @@ -467,7 +466,7 @@ cnxk_cpt_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id, roc_cpt->lf[qp_id] = &qp->lf; - ret = roc_cpt_lmtline_init(roc_cpt, &qp->lmtline, qp_id); + ret = roc_cpt_lmtline_init(roc_cpt, &qp->lmtline, qp_id, true); if (ret < 0) { roc_cpt->lf[qp_id] = NULL; plt_err("Could not init lmtline for queue pair %d", qp_id); @@ -478,7 +477,7 @@ cnxk_cpt_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id, dev->data->queue_pairs[qp_id] = qp; if (qp_id == vf->rx_inject_qp) { - ret = roc_cpt_lmtline_init(roc_cpt, &vf->rx_inj_lmtline, vf->rx_inject_qp); + ret = roc_cpt_lmtline_init(roc_cpt, &vf->rx_inj_lmtline, vf->rx_inject_qp, true); if (ret) { plt_err("Could not init lmtline Rx inject"); goto exit; @@ -486,14 +485,6 @@ cnxk_cpt_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id, vf->rx_inj_sso_pf_func = roc_idev_nix_inl_dev_pffunc_get(); - /* Update IO addr to enable dual submission */ - io_addr = vf->rx_inj_lmtline.io_addr; - io_addr = (io_addr & ~(uint64_t)(0x7 << 4)) | ROC_CN10K_TWO_CPT_INST_DW_M1 << 4; - vf->rx_inj_lmtline.io_addr = io_addr; - - /* Update FC threshold to reflect dual submission */ - vf->rx_inj_lmtline.fc_thresh -= 32; - /* Block the queue for other submissions */ qp->pend_q.pq_mask = 0; } @@ -969,44 +960,28 @@ rte_pmd_cnxk_crypto_qptr_get(uint8_t dev_id, uint16_t qp_id) static inline void cnxk_crypto_cn10k_submit(void *qptr, void *inst, uint16_t nb_inst) { - uint64_t lmt_base, lmt_arg, io_addr; struct cnxk_cpt_qp *qp = qptr; - uint16_t i, j, lmt_id; + uint64_t lmt_base, io_addr; + uint16_t lmt_id; void *lmt_dst; + int i; lmt_base = qp->lmtline.lmt_base; io_addr = qp->lmtline.io_addr; ROC_LMT_BASE_ID_GET(lmt_base, lmt_id); -again: - i = RTE_MIN(nb_inst, CN10K_PKTS_PER_LOOP); lmt_dst = PLT_PTR_CAST(lmt_base); +again: + i = RTE_MIN(nb_inst, CN10K_CPT_PKTS_PER_LOOP); - for (j = 0; j < i; j++) { - rte_memcpy(lmt_dst, inst, sizeof(struct cpt_inst_s)); - inst = RTE_PTR_ADD(inst, sizeof(struct cpt_inst_s)); - lmt_dst = RTE_PTR_ADD(lmt_dst, 2 * sizeof(struct cpt_inst_s)); - } - - rte_io_wmb(); - - if (i > CN10K_PKTS_PER_STEORL) { - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - CN10K_PKTS_PER_STEORL - 1) << 12 | - (uint64_t)(lmt_id + CN10K_PKTS_PER_STEORL); - roc_lmt_submit_steorl(lmt_arg, io_addr); - } else { - lmt_arg = ROC_CN10K_CPT_LMT_ARG | (i - 1) << 12 | (uint64_t)lmt_id; - roc_lmt_submit_steorl(lmt_arg, io_addr); - } + memcpy(lmt_dst, inst, i * sizeof(struct cpt_inst_s)); - rte_io_wmb(); + cn10k_cpt_lmtst_dual_submit(&io_addr, lmt_id, &i); if (nb_inst - i > 0) { - nb_inst -= i; + nb_inst -= CN10K_CPT_PKTS_PER_LOOP; + inst = RTE_PTR_ADD(inst, CN10K_CPT_PKTS_PER_LOOP * sizeof(struct cpt_inst_s)); goto again; } } diff --git a/drivers/crypto/cnxk/cnxk_cryptodev_ops.h b/drivers/crypto/cnxk/cnxk_cryptodev_ops.h index 9de7e432e4..caf6ac35e5 100644 --- a/drivers/crypto/cnxk/cnxk_cryptodev_ops.h +++ b/drivers/crypto/cnxk/cnxk_cryptodev_ops.h @@ -25,6 +25,8 @@ #define MOD_INC(i, l) ((i) == (l - 1) ? (i) = 0 : (i)++) +#define CN10K_CPT_PKTS_PER_LOOP 64 + /* Macros to form words in CPT instruction */ #define CNXK_CPT_INST_W2(tag, tt, grp, rvu_pf_func) \ ((tag) | ((uint64_t)(tt) << 32) | ((uint64_t)(grp) << 34) | \ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 98db11ad61..2c049e7041 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -632,7 +632,7 @@ crypto_adapter_qp_setup(const struct rte_cryptodev *cdev, struct cnxk_cpt_qp *qp * simultaneous enqueue from all available cores. */ if (roc_model_is_cn10k()) - nb_desc_min = rte_lcore_count() * 32; + nb_desc_min = rte_lcore_count() * CN10K_CPT_PKTS_PER_LOOP; else nb_desc_min = rte_lcore_count() * 2; @@ -707,7 +707,7 @@ crypto_adapter_qp_free(struct cnxk_cpt_qp *qp) rte_mempool_free(qp->ca.req_mp); qp->ca.enabled = false; - ret = roc_cpt_lmtline_init(qp->lf.roc_cpt, &qp->lmtline, qp->lf.lf_id); + ret = roc_cpt_lmtline_init(qp->lf.roc_cpt, &qp->lmtline, qp->lf.lf_id, true); if (ret < 0) { plt_err("Could not reset lmtline for queue pair %d", qp->lf.lf_id); return ret;