From patchwork Wed Aug 30 16:54:41 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pavan Nikhilesh Bhagavatula X-Patchwork-Id: 130929 X-Patchwork-Delegate: jerinj@marvell.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 7F2B841FD1; Wed, 30 Aug 2023 18:55:05 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 7A4A240293; Wed, 30 Aug 2023 18:55:05 +0200 (CEST) Received: from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com [67.231.156.173]) by mails.dpdk.org (Postfix) with ESMTP id 19AC640292 for ; Wed, 30 Aug 2023 18:55:04 +0200 (CEST) Received: from pps.filterd (m0045851.ppops.net [127.0.0.1]) by mx0b-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 37U85vjB011785 for ; Wed, 30 Aug 2023 09:55:03 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding : content-type; s=pfpt0220; bh=TmLSXzBSGDt3iFNvgmusNe2qAq550mePq2qtN0UNB4g=; b=j2nIYVjOhS69xEz29+VDaimLUsLS3Is5VROxg3tPMQdiE3IMsRMhkq5lEhHsiSXJFr71 apuQewd13Whlriax9v6II5o2m3aqlmceIO6rfiALG1A1B0MBoE8Tj+9uNVLcqnh1KzdM oJhUC1pYFnUiwaCVD19A/7nPaQ28aepyk+nSeUWsW9LS6mjYun4FJj5vmp7Vfa0MtpmZ Z0ilyONFA9GbtCMe9EwmVp8FzOL5+v9PBRQV5dflKOi2XG+69Z+rdyb6XBdWQG83ubsj uiaUyHHycGOw9jvN8gnkGXyAfIUiQQ3vJyGn+Awsplekd2qv0528FHbTrHVCI1+m2yup 2w== Received: from dc5-exch02.marvell.com ([199.233.59.182]) by mx0b-0016f401.pphosted.com (PPS) with ESMTPS id 3st1y61r4k-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT) for ; Wed, 30 Aug 2023 09:55:03 -0700 Received: from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH02.marvell.com (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.48; Wed, 30 Aug 2023 09:54:50 -0700 Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.48 via Frontend Transport; Wed, 30 Aug 2023 09:54:50 -0700 Received: from MININT-80QBFE8.corp.innovium.com (MININT-80QBFE8.marvell.com [10.28.164.106]) by maili.marvell.com (Postfix) with ESMTP id 663DF3F707E; Wed, 30 Aug 2023 09:54:48 -0700 (PDT) From: To: , Vamsi Attunuru CC: , Pavan Nikhilesh Subject: [PATCH v3 2/2] dma/cnxk: rewrite DMA fastpath Date: Wed, 30 Aug 2023 22:24:41 +0530 Message-ID: <20230830165441.9188-2-pbhagavatula@marvell.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20230830165441.9188-1-pbhagavatula@marvell.com> References: <20230830143057.8445-1-pbhagavatula@marvell.com> <20230830165441.9188-1-pbhagavatula@marvell.com> MIME-Version: 1.0 X-Proofpoint-ORIG-GUID: 11MrsIoloiIzKNZFNIt-72iEUSH2fiaQ X-Proofpoint-GUID: 11MrsIoloiIzKNZFNIt-72iEUSH2fiaQ X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.267,Aquarius:18.0.957,Hydra:6.0.601,FMLib:17.11.176.26 definitions=2023-08-30_13,2023-08-29_01,2023-05-22_02 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org From: Pavan Nikhilesh Rewrite DMA fastpath to use NEON instructions and reduce number of words read from config. Signed-off-by: Pavan Nikhilesh --- drivers/dma/cnxk/cnxk_dmadev.c | 454 +++-------------------------- drivers/dma/cnxk/cnxk_dmadev.h | 89 +++++- drivers/dma/cnxk/cnxk_dmadev_fp.c | 455 ++++++++++++++++++++++++++++++ drivers/dma/cnxk/meson.build | 9 +- 4 files changed, 577 insertions(+), 430 deletions(-) create mode 100644 drivers/dma/cnxk/cnxk_dmadev_fp.c diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c index 35c2b79156..465290ce7a 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.c +++ b/drivers/dma/cnxk/cnxk_dmadev.c @@ -2,19 +2,6 @@ * Copyright (C) 2021 Marvell International Ltd. */ -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan); @@ -166,22 +153,9 @@ cnxk_dmadev_configure(struct rte_dma_dev *dev, const struct rte_dma_conf *conf, return rc; } -static int -cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, - const struct rte_dma_vchan_conf *conf, uint32_t conf_sz) +static void +cn9k_dmadev_setup_hdr(union cnxk_dpi_instr_cmd *header, const struct rte_dma_vchan_conf *conf) { - struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - uint16_t max_desc; - uint32_t size; - int i; - - RTE_SET_USED(conf_sz); - - if (dpivf->flag & CNXK_DPI_DEV_START) - return 0; - header->cn9k.pt = DPI_HDR_PT_ZBW_CA; switch (conf->direction) { @@ -217,57 +191,11 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, header->cn9k.fport = conf->dst_port.pcie.coreid; header->cn9k.pvfe = 0; }; - - /* Free up descriptor memory before allocating. */ - cnxk_dmadev_vchan_free(dpivf, vchan); - - max_desc = conf->nb_desc; - if (!rte_is_power_of_2(max_desc)) - max_desc = rte_align32pow2(max_desc); - - if (max_desc > DPI_MAX_DESC) - max_desc = DPI_MAX_DESC; - - size = (max_desc * sizeof(struct cnxk_dpi_compl_s *)); - dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0); - - if (dpi_conf->c_desc.compl_ptr == NULL) { - plt_err("Failed to allocate for comp_data"); - return -ENOMEM; - } - - for (i = 0; i < max_desc; i++) { - dpi_conf->c_desc.compl_ptr[i] = - rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0); - if (!dpi_conf->c_desc.compl_ptr[i]) { - plt_err("Failed to allocate for descriptor memory"); - return -ENOMEM; - } - - dpi_conf->c_desc.compl_ptr[i]->cdata = DPI_REQ_CDATA; - } - - dpi_conf->c_desc.max_cnt = (max_desc - 1); - - return 0; } -static int -cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, - const struct rte_dma_vchan_conf *conf, uint32_t conf_sz) +static void +cn10k_dmadev_setup_hdr(union cnxk_dpi_instr_cmd *header, const struct rte_dma_vchan_conf *conf) { - struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - uint16_t max_desc; - uint32_t size; - int i; - - RTE_SET_USED(conf_sz); - - if (dpivf->flag & CNXK_DPI_DEV_START) - return 0; - header->cn10k.pt = DPI_HDR_PT_ZBW_CA; switch (conf->direction) { @@ -303,6 +231,29 @@ cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, header->cn10k.fport = conf->dst_port.pcie.coreid; header->cn10k.pvfe = 0; }; +} + +static int +cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, + const struct rte_dma_vchan_conf *conf, uint32_t conf_sz) +{ + struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; + struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; + union cnxk_dpi_instr_cmd *header; + uint16_t max_desc; + uint32_t size; + int i; + + RTE_SET_USED(conf_sz); + + header = (union cnxk_dpi_instr_cmd *)&dpi_conf->cmd.u; + if (dpivf->flag & CNXK_DPI_DEV_START) + return 0; + + if (dpivf->is_cn10k) + cn10k_dmadev_setup_hdr(header, conf); + else + cn9k_dmadev_setup_hdr(header, conf); /* Free up descriptor memory before allocating. */ cnxk_dmadev_vchan_free(dpivf, vchan); @@ -329,6 +280,7 @@ cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, plt_err("Failed to allocate for descriptor memory"); return -ENOMEM; } + dpi_conf->c_desc.compl_ptr[i]->cdata = DPI_REQ_CDATA; } @@ -374,6 +326,11 @@ static int cnxk_dmadev_stop(struct rte_dma_dev *dev) { struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; + uint64_t reg; + + reg = plt_read64(dpivf->rdpi.rbase + DPI_VDMA_SADDR); + while (!(reg & BIT_ULL(63))) + reg = plt_read64(dpivf->rdpi.rbase + DPI_VDMA_SADDR); roc_dpi_disable(&dpivf->rdpi); dpivf->flag &= ~CNXK_DPI_DEV_START; @@ -396,332 +353,6 @@ cnxk_dmadev_close(struct rte_dma_dev *dev) return 0; } -static inline int -__dpi_queue_write(struct cnxk_dpi_vf_s *dpi, uint64_t *cmds, int cmd_count) -{ - uint64_t *ptr = dpi->chunk_base; - - if ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > DPI_MAX_CMD_SIZE) || cmds == NULL) - return -EINVAL; - - /* - * Normally there is plenty of room in the current buffer for the - * command - */ - if (dpi->chunk_head + cmd_count < dpi->chunk_size_m1) { - ptr += dpi->chunk_head; - dpi->chunk_head += cmd_count; - while (cmd_count--) - *ptr++ = *cmds++; - } else { - uint64_t *new_buff = NULL; - int count; - - if (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) { - plt_dpi_dbg("Failed to alloc next buffer from NPA"); - return -ENOMEM; - } - - /* - * Figure out how many cmd words will fit in this buffer. - * One location will be needed for the next buffer pointer. - */ - count = dpi->chunk_size_m1 - dpi->chunk_head; - ptr += dpi->chunk_head; - cmd_count -= count; - while (count--) - *ptr++ = *cmds++; - - /* - * chunk next ptr is 2 DWORDS - * second DWORD is reserved. - */ - *ptr++ = (uint64_t)new_buff; - *ptr = 0; - - /* - * The current buffer is full and has a link to the next - * buffers. Time to write the rest of the commands into the new - * buffer. - */ - dpi->chunk_base = new_buff; - dpi->chunk_head = cmd_count; - ptr = new_buff; - while (cmd_count--) - *ptr++ = *cmds++; - - /* queue index may be greater than pool size */ - if (dpi->chunk_head == dpi->chunk_size_m1) { - if (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) { - plt_dpi_dbg("Failed to alloc next buffer from NPA"); - return -ENOMEM; - } - /* Write next buffer address */ - *ptr = (uint64_t)new_buff; - dpi->chunk_base = new_buff; - dpi->chunk_head = 0; - } - } - - return 0; -} - -static int -cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, uint32_t length, - uint64_t flags) -{ - struct cnxk_dpi_vf_s *dpivf = dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - struct cnxk_dpi_compl_s *comp_ptr; - uint64_t cmd[DPI_MAX_CMD_SIZE]; - rte_iova_t fptr, lptr; - int num_words = 0; - int rc; - - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; - header->cn9k.ptr = (uint64_t)comp_ptr; - STRM_INC(dpi_conf->c_desc, tail); - - if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) { - STRM_DEC(dpi_conf->c_desc, tail); - return -ENOSPC; - } - - header->cn9k.nfst = 1; - header->cn9k.nlst = 1; - - /* - * For inbound case, src pointers are last pointers. - * For all other cases, src pointers are first pointers. - */ - if (header->cn9k.xtype == DPI_XTYPE_INBOUND) { - fptr = dst; - lptr = src; - } else { - fptr = src; - lptr = dst; - } - - cmd[0] = header->u[0]; - cmd[1] = header->u[1]; - cmd[2] = header->u[2]; - /* word3 is always 0 */ - num_words += 4; - cmd[num_words++] = length; - cmd[num_words++] = fptr; - cmd[num_words++] = length; - cmd[num_words++] = lptr; - - rc = __dpi_queue_write(dpivf, cmd, num_words); - if (unlikely(rc)) { - STRM_DEC(dpi_conf->c_desc, tail); - return rc; - } - - if (flags & RTE_DMA_OP_FLAG_SUBMIT) { - rte_wmb(); - plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL); - dpi_conf->stats.submitted++; - } else { - dpi_conf->pnum_words += num_words; - dpi_conf->pending++; - } - - return dpi_conf->desc_idx++; -} - -static int -cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, - const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, uint64_t flags) -{ - struct cnxk_dpi_vf_s *dpivf = dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - const struct rte_dma_sge *fptr, *lptr; - struct cnxk_dpi_compl_s *comp_ptr; - uint64_t cmd[DPI_MAX_CMD_SIZE]; - int num_words = 0; - int i, rc; - - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; - header->cn9k.ptr = (uint64_t)comp_ptr; - STRM_INC(dpi_conf->c_desc, tail); - - if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) { - STRM_DEC(dpi_conf->c_desc, tail); - return -ENOSPC; - } - - /* - * For inbound case, src pointers are last pointers. - * For all other cases, src pointers are first pointers. - */ - if (header->cn9k.xtype == DPI_XTYPE_INBOUND) { - header->cn9k.nfst = nb_dst & DPI_MAX_POINTER; - header->cn9k.nlst = nb_src & DPI_MAX_POINTER; - fptr = &dst[0]; - lptr = &src[0]; - } else { - header->cn9k.nfst = nb_src & DPI_MAX_POINTER; - header->cn9k.nlst = nb_dst & DPI_MAX_POINTER; - fptr = &src[0]; - lptr = &dst[0]; - } - - cmd[0] = header->u[0]; - cmd[1] = header->u[1]; - cmd[2] = header->u[2]; - num_words += 4; - for (i = 0; i < header->cn9k.nfst; i++) { - cmd[num_words++] = (uint64_t)fptr->length; - cmd[num_words++] = fptr->addr; - fptr++; - } - - for (i = 0; i < header->cn9k.nlst; i++) { - cmd[num_words++] = (uint64_t)lptr->length; - cmd[num_words++] = lptr->addr; - lptr++; - } - - rc = __dpi_queue_write(dpivf, cmd, num_words); - if (unlikely(rc)) { - STRM_DEC(dpi_conf->c_desc, tail); - return rc; - } - - if (flags & RTE_DMA_OP_FLAG_SUBMIT) { - rte_wmb(); - plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL); - dpi_conf->stats.submitted++; - } else { - dpi_conf->pnum_words += num_words; - dpi_conf->pending++; - } - - return dpi_conf->desc_idx++; -} - -static int -cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, - uint32_t length, uint64_t flags) -{ - struct cnxk_dpi_vf_s *dpivf = dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - struct cnxk_dpi_compl_s *comp_ptr; - uint64_t cmd[DPI_MAX_CMD_SIZE]; - rte_iova_t fptr, lptr; - int num_words = 0; - int rc; - - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; - header->cn10k.ptr = (uint64_t)comp_ptr; - STRM_INC(dpi_conf->c_desc, tail); - - if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) { - STRM_DEC(dpi_conf->c_desc, tail); - return -ENOSPC; - } - - header->cn10k.nfst = 1; - header->cn10k.nlst = 1; - - fptr = src; - lptr = dst; - - cmd[0] = header->u[0]; - cmd[1] = header->u[1]; - cmd[2] = header->u[2]; - /* word3 is always 0 */ - num_words += 4; - cmd[num_words++] = length; - cmd[num_words++] = fptr; - cmd[num_words++] = length; - cmd[num_words++] = lptr; - - rc = __dpi_queue_write(dpivf, cmd, num_words); - if (unlikely(rc)) { - STRM_DEC(dpi_conf->c_desc, tail); - return rc; - } - - if (flags & RTE_DMA_OP_FLAG_SUBMIT) { - rte_wmb(); - plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL); - dpi_conf->stats.submitted++; - } else { - dpi_conf->pnum_words += num_words; - dpi_conf->pending++; - } - - return dpi_conf->desc_idx++; -} - -static int -cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, - const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, - uint64_t flags) -{ - struct cnxk_dpi_vf_s *dpivf = dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - const struct rte_dma_sge *fptr, *lptr; - struct cnxk_dpi_compl_s *comp_ptr; - uint64_t cmd[DPI_MAX_CMD_SIZE]; - int num_words = 0; - int i, rc; - - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; - header->cn10k.ptr = (uint64_t)comp_ptr; - STRM_INC(dpi_conf->c_desc, tail); - - if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) { - STRM_DEC(dpi_conf->c_desc, tail); - return -ENOSPC; - } - - header->cn10k.nfst = nb_src & DPI_MAX_POINTER; - header->cn10k.nlst = nb_dst & DPI_MAX_POINTER; - fptr = &src[0]; - lptr = &dst[0]; - - cmd[0] = header->u[0]; - cmd[1] = header->u[1]; - cmd[2] = header->u[2]; - num_words += 4; - - for (i = 0; i < header->cn10k.nfst; i++) { - cmd[num_words++] = (uint64_t)fptr->length; - cmd[num_words++] = fptr->addr; - fptr++; - } - - for (i = 0; i < header->cn10k.nlst; i++) { - cmd[num_words++] = (uint64_t)lptr->length; - cmd[num_words++] = lptr->addr; - lptr++; - } - - rc = __dpi_queue_write(dpivf, cmd, num_words); - if (unlikely(rc)) { - STRM_DEC(dpi_conf->c_desc, tail); - return rc; - } - - if (flags & RTE_DMA_OP_FLAG_SUBMIT) { - rte_wmb(); - plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL); - dpi_conf->stats.submitted++; - } else { - dpi_conf->pnum_words += num_words; - dpi_conf->pending++; - } - - return dpi_conf->desc_idx++; -} - static uint16_t cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls, uint16_t *last_idx, bool *has_error) @@ -880,17 +511,6 @@ cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan) return 0; } -static const struct rte_dma_dev_ops cn10k_dmadev_ops = { - .dev_close = cnxk_dmadev_close, - .dev_configure = cnxk_dmadev_configure, - .dev_info_get = cnxk_dmadev_info_get, - .dev_start = cnxk_dmadev_start, - .dev_stop = cnxk_dmadev_stop, - .stats_get = cnxk_stats_get, - .stats_reset = cnxk_stats_reset, - .vchan_setup = cn10k_dmadev_vchan_setup, -}; - static const struct rte_dma_dev_ops cnxk_dmadev_ops = { .dev_close = cnxk_dmadev_close, .dev_configure = cnxk_dmadev_configure, @@ -941,12 +561,8 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de dmadev->fp_obj->completed_status = cnxk_dmadev_completed_status; dmadev->fp_obj->burst_capacity = cnxk_damdev_burst_capacity; - if (pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CN10KA || - pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CN10KAS || - pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CNF10KA || - pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CNF10KB || - pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CN10KB) { - dmadev->dev_ops = &cn10k_dmadev_ops; + if (roc_model_is_cn10k()) { + dpivf->is_cn10k = true; dmadev->fp_obj->copy = cn10k_dmadev_copy; dmadev->fp_obj->copy_sg = cn10k_dmadev_copy_sg; } diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h index 65f12d844d..c9032de779 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.h +++ b/drivers/dma/cnxk/cnxk_dmadev.h @@ -4,14 +4,27 @@ #ifndef CNXK_DMADEV_H #define CNXK_DMADEV_H +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include -#define DPI_MAX_POINTER 15 -#define STRM_INC(s, var) ((s).var = ((s).var + 1) & (s).max_cnt) -#define STRM_DEC(s, var) ((s).var = ((s).var - 1) == -1 ? (s).max_cnt : ((s).var - 1)) -#define DPI_MAX_DESC 2048 -#define DPI_MIN_DESC 2 -#define MAX_VCHANS_PER_QUEUE 4 +#define DPI_MAX_POINTER 15 +#define STRM_INC(s, var) ((s).var = ((s).var + 1) & (s).max_cnt) +#define STRM_DEC(s, var) ((s).var = ((s).var - 1) == -1 ? (s).max_cnt : ((s).var - 1)) +#define DPI_MAX_DESC 2048 +#define DPI_MIN_DESC 2 +#define MAX_VCHANS_PER_QUEUE 4 #define DPI_CMD_QUEUE_BUF_SIZE 4096 #define DPI_CMD_QUEUE_BUFS 1024 @@ -21,8 +34,51 @@ #define DPI_REQ_CDATA 0xFF #define CNXK_DMA_POOL_MAX_CACHE_SZ (16) -#define CNXK_DPI_DEV_CONFIG (1ULL << 0) -#define CNXK_DPI_DEV_START (1ULL << 1) +#define CNXK_DPI_DEV_CONFIG (1ULL << 0) +#define CNXK_DPI_DEV_START (1ULL << 1) + +union cnxk_dpi_instr_cmd { + uint64_t u; + struct cn9k_dpi_instr_cmd { + uint64_t aura : 20; + uint64_t func : 16; + uint64_t pt : 2; + uint64_t reserved_102 : 1; + uint64_t pvfe : 1; + uint64_t fl : 1; + uint64_t ii : 1; + uint64_t fi : 1; + uint64_t ca : 1; + uint64_t csel : 1; + uint64_t reserved_109_111 : 3; + uint64_t xtype : 2; + uint64_t reserved_114_119 : 6; + uint64_t fport : 2; + uint64_t reserved_122_123 : 2; + uint64_t lport : 2; + uint64_t reserved_126_127 : 2; + /* Word 1 - End */ + } cn9k; + + struct cn10k_dpi_instr_cmd { + uint64_t nfst : 4; + uint64_t reserved_4_5 : 2; + uint64_t nlst : 4; + uint64_t reserved_10_11 : 2; + uint64_t pvfe : 1; + uint64_t reserved_13 : 1; + uint64_t func : 16; + uint64_t aura : 20; + uint64_t xtype : 2; + uint64_t reserved_52_53 : 2; + uint64_t pt : 2; + uint64_t fport : 2; + uint64_t reserved_58_59 : 2; + uint64_t lport : 2; + uint64_t reserved_62_63 : 2; + /* Word 0 - End */ + } cn10k; +}; struct cnxk_dpi_compl_s { uint64_t cdata; @@ -37,26 +93,39 @@ struct cnxk_dpi_cdesc_data_s { }; struct cnxk_dpi_conf { - union dpi_instr_hdr_s hdr; + union cnxk_dpi_instr_cmd cmd; struct cnxk_dpi_cdesc_data_s c_desc; uint16_t pnum_words; uint16_t pending; uint16_t desc_idx; - uint16_t pad0; struct rte_dma_stats stats; uint64_t completed_offset; }; struct cnxk_dpi_vf_s { + /* Fast path*/ uint64_t *chunk_base; uint16_t chunk_head; uint16_t chunk_size_m1; struct rte_mempool *chunk_pool; struct cnxk_dpi_conf conf[MAX_VCHANS_PER_QUEUE]; + /* Slow path */ struct roc_dpi rdpi; uint32_t aura; uint16_t num_vchans; uint16_t flag; + uint8_t is_cn10k; } __plt_cache_aligned; +int cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, + uint32_t length, uint64_t flags); +int cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, + const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, + uint64_t flags); +int cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, + uint32_t length, uint64_t flags); +int cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, + const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, + uint64_t flags); + #endif diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c new file mode 100644 index 0000000000..db1e57bf51 --- /dev/null +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -0,0 +1,455 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2021 Marvell International Ltd. + */ + +#include + +#include "cnxk_dmadev.h" + +#define DMA_DW_PER_SINGLE_CMD 8 +#define DMA_HDR_LEN 4 +#define DMA_CMD_LEN(src, dst) (DMA_HDR_LEN + (src << 1) + (dst << 1)) + +static __plt_always_inline void +__dpi_cpy_scalar(uint64_t *src, uint64_t *dst, uint8_t n) +{ + uint8_t i; + + for (i = 0; i < n; i++) + dst[i] = src[i]; +} + +static __plt_always_inline void +__dpi_cpy_scalar_sg(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n) +{ + uint8_t i; + + for (i = 0; i < n; i++) { + *dst++ = src[i].length; + *dst++ = src[i].addr; + } +} + +static __plt_always_inline uint8_t +__dpi_cpy_scalar_sg_lmt(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n, uint16_t lmt) +{ + uint8_t i; + + for (i = 0; i < n && lmt; i++) { + *dst++ = src[i].length; + *dst++ = src[i].addr; + lmt -= 2; + } + + return i; +} + +#if defined(RTE_ARCH_ARM64) +static __plt_always_inline void +__dpi_cpy_vector(uint64_t *src, uint64_t *dst, uint8_t n) +{ + uint64x2_t vec; + uint8_t i; + + for (i = 0; i < n; i += 2) { + vec = vld1q_u64((const uint64_t *)&src[i]); + vst1q_u64(&dst[i], vec); + } +} + +static __plt_always_inline void +__dpi_cpy_vector_sg(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n) +{ + uint64x2_t mask = {0xFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL}; + uint64x2_t vec; + uint8_t i; + + for (i = 0; i < n; i++) { + vec = vld1q_u64((const uint64_t *)&src[i]); + vec = vextq_u64(vec, vec, 1); + vec = vandq_u64(vec, mask); + vst1q_u64(dst, vec); + dst += 2; + } +} + +static __plt_always_inline uint8_t +__dpi_cpy_vector_sg_lmt(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n, uint16_t lmt) +{ + uint64x2_t mask = {0xFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL}; + uint64x2_t vec; + uint8_t i; + + for (i = 0; i < n && lmt; i++) { + vec = vld1q_u64((const uint64_t *)&src[i]); + vec = vextq_u64(vec, vec, 1); + vec = vandq_u64(vec, mask); + vst1q_u64(dst, vec); + dst += 2; + lmt -= 2; + } + + return i; +} +#endif + +static __plt_always_inline void +__dpi_cpy(uint64_t *src, uint64_t *dst, uint8_t n) +{ +#if defined(RTE_ARCH_ARM64) + __dpi_cpy_vector(src, dst, n); +#else + __dpi_cpy_scalar(src, dst, n); +#endif +} + +static __plt_always_inline void +__dpi_cpy_sg(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n) +{ +#if defined(RTE_ARCH_ARM64) + __dpi_cpy_vector_sg(src, dst, n); +#else + __dpi_cpy_scalar_sg(src, dst, n); +#endif +} + +static __plt_always_inline uint8_t +__dpi_cpy_sg_lmt(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n, uint16_t lmt) +{ +#if defined(RTE_ARCH_ARM64) + return __dpi_cpy_vector_sg_lmt(src, dst, n, lmt); +#else + return __dpi_cpy_scalar_sg_lmt(src, dst, n, lmt); +#endif +} + +static __plt_always_inline int +__dpi_queue_write_single(struct cnxk_dpi_vf_s *dpi, uint64_t *cmd) +{ + uint64_t *ptr = dpi->chunk_base; + + /* + * Normally there is plenty of room in the current buffer for the + * command + */ + if (dpi->chunk_head + DMA_DW_PER_SINGLE_CMD < dpi->chunk_size_m1) { + ptr += dpi->chunk_head; + + __dpi_cpy_scalar(cmd, ptr, DMA_DW_PER_SINGLE_CMD); + dpi->chunk_head += DMA_DW_PER_SINGLE_CMD; + } else { + uint64_t *new_buff = NULL; + int count; + + if (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) { + plt_dpi_dbg("Failed to alloc next buffer from NPA"); + return -ENOSPC; + } + + /* + * Figure out how many cmd words will fit in this buffer. + * One location will be needed for the next buffer pointer. + */ + count = dpi->chunk_size_m1 - dpi->chunk_head; + ptr += dpi->chunk_head; + + __dpi_cpy_scalar(cmd, ptr, count); + + ptr += count; + *ptr = (uint64_t)new_buff; + ptr = new_buff; + + __dpi_cpy_scalar(cmd + count, ptr, DMA_DW_PER_SINGLE_CMD - count); + + /* + * The current buffer is full and has a link to the next + * buffers. Time to write the rest of the commands into + * the new buffer. + */ + dpi->chunk_base = new_buff; + dpi->chunk_head = DMA_DW_PER_SINGLE_CMD - count; + } + + return 0; +} + +static __plt_always_inline int +__dpi_queue_write_sg(struct cnxk_dpi_vf_s *dpi, uint64_t *hdr, const struct rte_dma_sge *src, + const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst) +{ + uint8_t cmd_len = DMA_CMD_LEN(nb_src, nb_dst); + uint64_t *ptr = dpi->chunk_base; + + /* + * Normally there is plenty of room in the current buffer for the + * command + */ + if (dpi->chunk_head + cmd_len < dpi->chunk_size_m1) { + ptr += dpi->chunk_head; + + __dpi_cpy(hdr, ptr, DMA_HDR_LEN); + ptr += DMA_HDR_LEN; + __dpi_cpy_sg(src, ptr, nb_src); + ptr += (nb_src << 1); + __dpi_cpy_sg(dst, ptr, nb_dst); + + dpi->chunk_head += cmd_len; + } else { + uint64_t *new_buff = NULL, *buf; + uint16_t count; + + if (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) { + plt_dpi_dbg("Failed to alloc next buffer from NPA"); + return -ENOSPC; + } + + /* + * Figure out how many cmd words will fit in this buffer. + * One location will be needed for the next buffer pointer. + */ + count = dpi->chunk_size_m1 - dpi->chunk_head; + ptr += dpi->chunk_head; + buf = new_buff; + if (count <= 4) { + __dpi_cpy(hdr, ptr, count); + ptr += count; + __dpi_cpy(&hdr[count], buf, 4); + buf += (4 - count); + } else { + uint8_t i; + + __dpi_cpy(hdr, ptr, 4); + ptr += 4; + count -= 4; + + i = __dpi_cpy_sg_lmt(src, ptr, nb_src, count); + src += i; + nb_src -= i; + count -= (i << 1); + ptr += (i << 1); + + i = __dpi_cpy_sg_lmt(dst, ptr, nb_dst, count); + dst += i; + nb_dst -= i; + ptr += (i << 1); + } + *ptr = (uint64_t)new_buff; + + __dpi_cpy_sg(src, buf, nb_src); + buf += (nb_src << 1); + + __dpi_cpy_sg(dst, buf, nb_dst); + buf += (nb_dst << 1); + + /* + * The current buffer is full and has a link to the next + * buffers. Time to write the rest of the commands into + * the new buffer. + */ + dpi->chunk_base = new_buff; + dpi->chunk_head = buf - new_buff; + } + + return 0; +} + +int +cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, uint32_t length, + uint64_t flags) +{ + struct cnxk_dpi_vf_s *dpivf = dev_private; + struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; + uint64_t cmd[DMA_DW_PER_SINGLE_CMD]; + struct cnxk_dpi_compl_s *comp_ptr; + int rc; + + if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == + dpi_conf->c_desc.head)) + return -ENOSPC; + + comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + STRM_INC(dpi_conf->c_desc, tail); + + cmd[0] = (1UL << 54) | (1UL << 48); + cmd[1] = dpi_conf->cmd.u; + cmd[2] = (uint64_t)comp_ptr; + cmd[4] = length; + cmd[6] = length; + + /* + * For inbound case, src pointers are last pointers. + * For all other cases, src pointers are first pointers. + */ + if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { + cmd[5] = dst; + cmd[7] = src; + } else { + cmd[5] = src; + cmd[7] = dst; + } + + rc = __dpi_queue_write_single(dpivf, cmd); + if (unlikely(rc)) { + STRM_DEC(dpi_conf->c_desc, tail); + return rc; + } + + if (flags & RTE_DMA_OP_FLAG_SUBMIT) { + rte_wmb(); + plt_write64(dpi_conf->pnum_words + DMA_DW_PER_SINGLE_CMD, + dpivf->rdpi.rbase + DPI_VDMA_DBELL); + dpi_conf->stats.submitted += dpi_conf->pending + 1; + dpi_conf->pnum_words = 0; + dpi_conf->pending = 0; + } else { + dpi_conf->pnum_words += DMA_DW_PER_SINGLE_CMD; + dpi_conf->pending++; + } + + return dpi_conf->desc_idx++; +} + +int +cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, + const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, uint64_t flags) +{ + struct cnxk_dpi_vf_s *dpivf = dev_private; + struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; + const struct rte_dma_sge *fptr, *lptr; + struct cnxk_dpi_compl_s *comp_ptr; + uint64_t hdr[4]; + int rc; + + if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == + dpi_conf->c_desc.head)) + return -ENOSPC; + + comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + STRM_INC(dpi_conf->c_desc, tail); + + hdr[1] = dpi_conf->cmd.u; + hdr[2] = (uint64_t)comp_ptr; + + /* + * For inbound case, src pointers are last pointers. + * For all other cases, src pointers are first pointers. + */ + if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { + fptr = dst; + lptr = src; + RTE_SWAP(nb_src, nb_dst); + } else { + fptr = src; + lptr = dst; + } + hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; + + rc = __dpi_queue_write_sg(dpivf, hdr, fptr, lptr, nb_src, nb_dst); + if (unlikely(rc)) { + STRM_DEC(dpi_conf->c_desc, tail); + return rc; + } + + if (flags & RTE_DMA_OP_FLAG_SUBMIT) { + rte_wmb(); + plt_write64(dpi_conf->pnum_words + DMA_CMD_LEN(nb_src, nb_dst), + dpivf->rdpi.rbase + DPI_VDMA_DBELL); + dpi_conf->stats.submitted += dpi_conf->pending + 1; + dpi_conf->pnum_words = 0; + dpi_conf->pending = 0; + } else { + dpi_conf->pnum_words += DMA_CMD_LEN(nb_src, nb_dst); + dpi_conf->pending++; + } + + return dpi_conf->desc_idx++; +} + +int +cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, + uint32_t length, uint64_t flags) +{ + struct cnxk_dpi_vf_s *dpivf = dev_private; + struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; + uint64_t cmd[DMA_DW_PER_SINGLE_CMD]; + struct cnxk_dpi_compl_s *comp_ptr; + int rc; + + if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == + dpi_conf->c_desc.head)) + return -ENOSPC; + + comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + STRM_INC(dpi_conf->c_desc, tail); + + cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U; + cmd[1] = (uint64_t)comp_ptr; + cmd[2] = 0; + cmd[4] = length; + cmd[5] = src; + cmd[6] = length; + cmd[7] = dst; + + rc = __dpi_queue_write_single(dpivf, cmd); + if (unlikely(rc)) { + STRM_DEC(dpi_conf->c_desc, tail); + return rc; + } + + if (flags & RTE_DMA_OP_FLAG_SUBMIT) { + rte_wmb(); + plt_write64(dpi_conf->pnum_words + DMA_DW_PER_SINGLE_CMD, + dpivf->rdpi.rbase + DPI_VDMA_DBELL); + dpi_conf->stats.submitted += dpi_conf->pending + 1; + dpi_conf->pnum_words = 0; + dpi_conf->pending = 0; + } else { + dpi_conf->pnum_words += 8; + dpi_conf->pending++; + } + + return dpi_conf->desc_idx++; +} + +int +cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, + const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, + uint64_t flags) +{ + struct cnxk_dpi_vf_s *dpivf = dev_private; + struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; + struct cnxk_dpi_compl_s *comp_ptr; + uint64_t hdr[4]; + int rc; + + if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == + dpi_conf->c_desc.head)) + return -ENOSPC; + + comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + STRM_INC(dpi_conf->c_desc, tail); + + hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src; + hdr[1] = (uint64_t)comp_ptr; + hdr[2] = 0; + + rc = __dpi_queue_write_sg(dpivf, hdr, src, dst, nb_src, nb_dst); + if (unlikely(rc)) { + STRM_DEC(dpi_conf->c_desc, tail); + return rc; + } + + if (flags & RTE_DMA_OP_FLAG_SUBMIT) { + rte_wmb(); + plt_write64(dpi_conf->pnum_words + DMA_CMD_LEN(nb_src, nb_dst), + dpivf->rdpi.rbase + DPI_VDMA_DBELL); + dpi_conf->stats.submitted += dpi_conf->pending + 1; + dpi_conf->pnum_words = 0; + dpi_conf->pending = 0; + } else { + dpi_conf->pnum_words += DMA_CMD_LEN(nb_src, nb_dst); + dpi_conf->pending++; + } + + return dpi_conf->desc_idx++; +} diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build index b868fb14cb..e557349368 100644 --- a/drivers/dma/cnxk/meson.build +++ b/drivers/dma/cnxk/meson.build @@ -1,6 +1,13 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(C) 2021 Marvell International Ltd. +error_cflags = ['-Wno-uninitialized'] +foreach flag: error_cflags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + deps += ['bus_pci', 'common_cnxk', 'dmadev'] -sources = files('cnxk_dmadev.c') +sources = files('cnxk_dmadev.c', 'cnxk_dmadev_fp.c') require_iova_in_mbuf = false