From patchwork Sat Sep 9 16:54:33 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pavan Nikhilesh Bhagavatula X-Patchwork-Id: 131322 X-Patchwork-Delegate: jerinj@marvell.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id E2AE242556; Sat, 9 Sep 2023 18:54:51 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 15CE2402E6; Sat, 9 Sep 2023 18:54:46 +0200 (CEST) Received: from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com [67.231.156.173]) by mails.dpdk.org (Postfix) with ESMTP id 86053402D8 for ; Sat, 9 Sep 2023 18:54:44 +0200 (CEST) Received: from pps.filterd (m0045851.ppops.net [127.0.0.1]) by mx0b-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 389EMSCa018927 for ; Sat, 9 Sep 2023 09:54:43 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding : content-type; s=pfpt0220; bh=xAzB+u5c4ynyLHtNZI91oWSSy2326OKlMKC7EspyKpQ=; b=abBPoDX4QMBwtXcQQv6ggszY0yUvvSZnM3riRMvOFogH2PazLnSYHLo6uSwrhcZzlBho ygQKpQWUD1vZDbt796HeDXeaWI2xcNVHG33At2ZM3dufYQNteAYHjgClrdcpKn1Y2U3U aEOmZqiGL3JQd2G0x+HnldoPQ0DdF51lF+9+MXIdeTasaM2yWc+5aOQ1w4t46yWwIUJS hwJDr+zyhndpull+rCRiOESbzwe73Rd7Dm8Q2Oj6nO3sMvKvaNhzsrDpJCpX9yrw4w3u 8dI7upLzRBWB6ldLXa312JOHkJ1sKi/dxtyi18hcW14ekHigRJnzcD05c1gO6KPiUh/r +g== Received: from dc5-exch02.marvell.com ([199.233.59.182]) by mx0b-0016f401.pphosted.com (PPS) with ESMTPS id 3t0r7kgjqa-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT) for ; Sat, 09 Sep 2023 09:54:43 -0700 Received: from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH02.marvell.com (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.48; Sat, 9 Sep 2023 09:54:41 -0700 Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.48 via Frontend Transport; Sat, 9 Sep 2023 09:54:41 -0700 Received: from MININT-80QBFE8.corp.innovium.com (MININT-80QBFE8.marvell.com [10.28.164.106]) by maili.marvell.com (Postfix) with ESMTP id 986103F7083; Sat, 9 Sep 2023 09:54:39 -0700 (PDT) From: To: , Vamsi Attunuru CC: , Pavan Nikhilesh Subject: [PATCH v8 2/2] dma/cnxk: rewrite DMA fastpath Date: Sat, 9 Sep 2023 22:24:33 +0530 Message-ID: <20230909165433.6692-2-pbhagavatula@marvell.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20230909165433.6692-1-pbhagavatula@marvell.com> References: <20230909163709.4718-1-pbhagavatula@marvell.com> <20230909165433.6692-1-pbhagavatula@marvell.com> MIME-Version: 1.0 X-Proofpoint-GUID: 2s4WRrp_FedJLbilZZyJD5O_wzwPGPcX X-Proofpoint-ORIG-GUID: 2s4WRrp_FedJLbilZZyJD5O_wzwPGPcX X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.267,Aquarius:18.0.957,Hydra:6.0.601,FMLib:17.11.176.26 definitions=2023-09-09_15,2023-09-05_01,2023-05-22_02 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org From: Pavan Nikhilesh Rewrite DMA fastpath to use NEON instructions and reduce number of words read from config. Signed-off-by: Pavan Nikhilesh --- drivers/dma/cnxk/cnxk_dmadev.c | 428 ++--------------------------- drivers/dma/cnxk/cnxk_dmadev.h | 59 +++- drivers/dma/cnxk/cnxk_dmadev_fp.c | 436 ++++++++++++++++++++++++++++++ drivers/dma/cnxk/meson.build | 9 +- 4 files changed, 528 insertions(+), 404 deletions(-) create mode 100644 drivers/dma/cnxk/cnxk_dmadev_fp.c diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c index f58bb92dbc..26680edfde 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.c +++ b/drivers/dma/cnxk/cnxk_dmadev.c @@ -115,19 +115,9 @@ cnxk_dmadev_configure(struct rte_dma_dev *dev, const struct rte_dma_conf *conf, return 0; } -static int -cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, - const struct rte_dma_vchan_conf *conf, uint32_t conf_sz) +static void +cn9k_dmadev_setup_hdr(union cnxk_dpi_instr_cmd *header, const struct rte_dma_vchan_conf *conf) { - struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - uint16_t max_desc; - uint32_t size; - int i; - - RTE_SET_USED(conf_sz); - header->cn9k.pt = DPI_HDR_PT_ZBW_CA; switch (conf->direction) { @@ -163,54 +153,11 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, header->cn9k.fport = conf->dst_port.pcie.coreid; header->cn9k.pvfe = 0; }; - - /* Free up descriptor memory before allocating. */ - cnxk_dmadev_vchan_free(dpivf, vchan); - - max_desc = conf->nb_desc; - if (!rte_is_power_of_2(max_desc)) - max_desc = rte_align32pow2(max_desc); - - if (max_desc > CNXK_DPI_MAX_DESC) - max_desc = CNXK_DPI_MAX_DESC; - - size = (max_desc * sizeof(struct cnxk_dpi_compl_s *)); - dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0); - - if (dpi_conf->c_desc.compl_ptr == NULL) { - plt_err("Failed to allocate for comp_data"); - return -ENOMEM; - } - - for (i = 0; i < max_desc; i++) { - dpi_conf->c_desc.compl_ptr[i] = - rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0); - if (!dpi_conf->c_desc.compl_ptr[i]) { - plt_err("Failed to allocate for descriptor memory"); - return -ENOMEM; - } - - dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA; - } - - dpi_conf->c_desc.max_cnt = (max_desc - 1); - - return 0; } -static int -cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, - const struct rte_dma_vchan_conf *conf, uint32_t conf_sz) +static void +cn10k_dmadev_setup_hdr(union cnxk_dpi_instr_cmd *header, const struct rte_dma_vchan_conf *conf) { - struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - uint16_t max_desc; - uint32_t size; - int i; - - RTE_SET_USED(conf_sz); - header->cn10k.pt = DPI_HDR_PT_ZBW_CA; switch (conf->direction) { @@ -246,6 +193,27 @@ cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, header->cn10k.fport = conf->dst_port.pcie.coreid; header->cn10k.pvfe = 0; }; +} + +static int +cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, + const struct rte_dma_vchan_conf *conf, uint32_t conf_sz) +{ + struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private; + struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; + union cnxk_dpi_instr_cmd *header; + uint16_t max_desc; + uint32_t size; + int i; + + RTE_SET_USED(conf_sz); + + header = (union cnxk_dpi_instr_cmd *)&dpi_conf->cmd.u; + + if (dpivf->is_cn10k) + cn10k_dmadev_setup_hdr(header, conf); + else + cn9k_dmadev_setup_hdr(header, conf); /* Free up descriptor memory before allocating. */ cnxk_dmadev_vchan_free(dpivf, vchan); @@ -371,333 +339,6 @@ cnxk_dmadev_close(struct rte_dma_dev *dev) return 0; } -static inline int -__dpi_queue_write(struct cnxk_dpi_vf_s *dpi, uint64_t *cmds, int cmd_count) -{ - uint64_t *ptr = dpi->chunk_base; - - if ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > DPI_MAX_CMD_SIZE) || cmds == NULL) - return -EINVAL; - - /* - * Normally there is plenty of room in the current buffer for the - * command - */ - if (dpi->chunk_head + cmd_count < dpi->chunk_size_m1) { - ptr += dpi->chunk_head; - dpi->chunk_head += cmd_count; - while (cmd_count--) - *ptr++ = *cmds++; - } else { - uint64_t *new_buff = NULL; - int count; - - if (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) { - plt_dpi_dbg("Failed to alloc next buffer from NPA"); - return -ENOMEM; - } - - /* - * Figure out how many cmd words will fit in this buffer. - * One location will be needed for the next buffer pointer. - */ - count = dpi->chunk_size_m1 - dpi->chunk_head; - ptr += dpi->chunk_head; - cmd_count -= count; - while (count--) - *ptr++ = *cmds++; - - /* - * chunk next ptr is 2 DWORDS - * second DWORD is reserved. - */ - *ptr++ = (uint64_t)new_buff; - *ptr = 0; - - /* - * The current buffer is full and has a link to the next - * buffers. Time to write the rest of the commands into the new - * buffer. - */ - dpi->chunk_base = new_buff; - dpi->chunk_head = cmd_count; - ptr = new_buff; - while (cmd_count--) - *ptr++ = *cmds++; - - /* queue index may be greater than pool size */ - if (dpi->chunk_head == dpi->chunk_size_m1) { - if (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) { - plt_dpi_dbg("Failed to alloc next buffer from NPA"); - return -ENOMEM; - } - - /* Write next buffer address */ - *ptr = (uint64_t)new_buff; - dpi->chunk_base = new_buff; - dpi->chunk_head = 0; - } - } - - return 0; -} - -static int -cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, uint32_t length, - uint64_t flags) -{ - struct cnxk_dpi_vf_s *dpivf = dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - struct cnxk_dpi_compl_s *comp_ptr; - uint64_t cmd[DPI_MAX_CMD_SIZE]; - rte_iova_t fptr, lptr; - int num_words = 0; - int rc; - - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; - header->cn9k.ptr = (uint64_t)comp_ptr; - CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); - - if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) { - CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); - return -ENOSPC; - } - - header->cn9k.nfst = 1; - header->cn9k.nlst = 1; - - /* - * For inbound case, src pointers are last pointers. - * For all other cases, src pointers are first pointers. - */ - if (header->cn9k.xtype == DPI_XTYPE_INBOUND) { - fptr = dst; - lptr = src; - } else { - fptr = src; - lptr = dst; - } - - cmd[0] = header->u[0]; - cmd[1] = header->u[1]; - cmd[2] = header->u[2]; - /* word3 is always 0 */ - num_words += 4; - cmd[num_words++] = length; - cmd[num_words++] = fptr; - cmd[num_words++] = length; - cmd[num_words++] = lptr; - - rc = __dpi_queue_write(dpivf, cmd, num_words); - if (unlikely(rc)) { - CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); - return rc; - } - - if (flags & RTE_DMA_OP_FLAG_SUBMIT) { - rte_wmb(); - plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL); - dpi_conf->stats.submitted++; - } else { - dpi_conf->pnum_words += num_words; - dpi_conf->pending++; - } - - return dpi_conf->desc_idx++; -} - -static int -cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, - const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, uint64_t flags) -{ - struct cnxk_dpi_vf_s *dpivf = dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - const struct rte_dma_sge *fptr, *lptr; - struct cnxk_dpi_compl_s *comp_ptr; - uint64_t cmd[DPI_MAX_CMD_SIZE]; - int num_words = 0; - int i, rc; - - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; - header->cn9k.ptr = (uint64_t)comp_ptr; - CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); - - if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) { - CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); - return -ENOSPC; - } - - /* - * For inbound case, src pointers are last pointers. - * For all other cases, src pointers are first pointers. - */ - if (header->cn9k.xtype == DPI_XTYPE_INBOUND) { - header->cn9k.nfst = nb_dst & CNXK_DPI_MAX_POINTER; - header->cn9k.nlst = nb_src & CNXK_DPI_MAX_POINTER; - fptr = &dst[0]; - lptr = &src[0]; - } else { - header->cn9k.nfst = nb_src & CNXK_DPI_MAX_POINTER; - header->cn9k.nlst = nb_dst & CNXK_DPI_MAX_POINTER; - fptr = &src[0]; - lptr = &dst[0]; - } - - cmd[0] = header->u[0]; - cmd[1] = header->u[1]; - cmd[2] = header->u[2]; - num_words += 4; - for (i = 0; i < header->cn9k.nfst; i++) { - cmd[num_words++] = (uint64_t)fptr->length; - cmd[num_words++] = fptr->addr; - fptr++; - } - - for (i = 0; i < header->cn9k.nlst; i++) { - cmd[num_words++] = (uint64_t)lptr->length; - cmd[num_words++] = lptr->addr; - lptr++; - } - - rc = __dpi_queue_write(dpivf, cmd, num_words); - if (unlikely(rc)) { - CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); - return rc; - } - - if (flags & RTE_DMA_OP_FLAG_SUBMIT) { - rte_wmb(); - plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL); - dpi_conf->stats.submitted++; - } else { - dpi_conf->pnum_words += num_words; - dpi_conf->pending++; - } - - return dpi_conf->desc_idx++; -} - -static int -cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, - uint32_t length, uint64_t flags) -{ - struct cnxk_dpi_vf_s *dpivf = dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - struct cnxk_dpi_compl_s *comp_ptr; - uint64_t cmd[DPI_MAX_CMD_SIZE]; - rte_iova_t fptr, lptr; - int num_words = 0; - int rc; - - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; - header->cn10k.ptr = (uint64_t)comp_ptr; - CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); - - if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) { - CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); - return -ENOSPC; - } - - header->cn10k.nfst = 1; - header->cn10k.nlst = 1; - - fptr = src; - lptr = dst; - - cmd[0] = header->u[0]; - cmd[1] = header->u[1]; - cmd[2] = header->u[2]; - /* word3 is always 0 */ - num_words += 4; - cmd[num_words++] = length; - cmd[num_words++] = fptr; - cmd[num_words++] = length; - cmd[num_words++] = lptr; - - rc = __dpi_queue_write(dpivf, cmd, num_words); - if (unlikely(rc)) { - CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); - return rc; - } - - if (flags & RTE_DMA_OP_FLAG_SUBMIT) { - rte_wmb(); - plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL); - dpi_conf->stats.submitted++; - } else { - dpi_conf->pnum_words += num_words; - dpi_conf->pending++; - } - - return dpi_conf->desc_idx++; -} - -static int -cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, - const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, - uint64_t flags) -{ - struct cnxk_dpi_vf_s *dpivf = dev_private; - struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - union dpi_instr_hdr_s *header = &dpi_conf->hdr; - const struct rte_dma_sge *fptr, *lptr; - struct cnxk_dpi_compl_s *comp_ptr; - uint64_t cmd[DPI_MAX_CMD_SIZE]; - int num_words = 0; - int i, rc; - - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; - header->cn10k.ptr = (uint64_t)comp_ptr; - CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); - - if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) { - CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); - return -ENOSPC; - } - - header->cn10k.nfst = nb_src & CNXK_DPI_MAX_POINTER; - header->cn10k.nlst = nb_dst & CNXK_DPI_MAX_POINTER; - fptr = &src[0]; - lptr = &dst[0]; - - cmd[0] = header->u[0]; - cmd[1] = header->u[1]; - cmd[2] = header->u[2]; - num_words += 4; - - for (i = 0; i < header->cn10k.nfst; i++) { - cmd[num_words++] = (uint64_t)fptr->length; - cmd[num_words++] = fptr->addr; - fptr++; - } - - for (i = 0; i < header->cn10k.nlst; i++) { - cmd[num_words++] = (uint64_t)lptr->length; - cmd[num_words++] = lptr->addr; - lptr++; - } - - rc = __dpi_queue_write(dpivf, cmd, num_words); - if (unlikely(rc)) { - CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); - return rc; - } - - if (flags & RTE_DMA_OP_FLAG_SUBMIT) { - rte_wmb(); - plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL); - dpi_conf->stats.submitted++; - } else { - dpi_conf->pnum_words += num_words; - dpi_conf->pending++; - } - - return dpi_conf->desc_idx++; -} - static uint16_t cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls, uint16_t *last_idx, bool *has_error) @@ -856,17 +497,6 @@ cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan) return 0; } -static const struct rte_dma_dev_ops cn10k_dmadev_ops = { - .dev_close = cnxk_dmadev_close, - .dev_configure = cnxk_dmadev_configure, - .dev_info_get = cnxk_dmadev_info_get, - .dev_start = cnxk_dmadev_start, - .dev_stop = cnxk_dmadev_stop, - .stats_get = cnxk_stats_get, - .stats_reset = cnxk_stats_reset, - .vchan_setup = cn10k_dmadev_vchan_setup, -}; - static const struct rte_dma_dev_ops cnxk_dmadev_ops = { .dev_close = cnxk_dmadev_close, .dev_configure = cnxk_dmadev_configure, @@ -917,12 +547,8 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de dmadev->fp_obj->completed_status = cnxk_dmadev_completed_status; dmadev->fp_obj->burst_capacity = cnxk_damdev_burst_capacity; - if (pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CN10KA || - pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CN10KAS || - pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CNF10KA || - pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CNF10KB || - pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CN10KB) { - dmadev->dev_ops = &cn10k_dmadev_ops; + if (roc_model_is_cn10k()) { + dpivf->is_cn10k = true; dmadev->fp_obj->copy = cn10k_dmadev_copy; dmadev->fp_obj->copy_sg = cn10k_dmadev_copy_sg; } diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h index 72663e44af..350ae73b5c 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.h +++ b/drivers/dma/cnxk/cnxk_dmadev.h @@ -28,6 +28,7 @@ #define CNXK_DPI_MAX_VCHANS_PER_QUEUE 4 #define CNXK_DPI_QUEUE_BUF_SIZE 16256 #define CNXK_DPI_POOL_MAX_CACHE_SZ (16) +#define CNXK_DPI_DW_PER_SINGLE_CMD 8 #define CNXK_DPI_HDR_LEN 4 #define CNXK_DPI_CMD_LEN(src, dst) (CNXK_DPI_HDR_LEN + ((src) << 1) + ((dst) << 1)) #define CNXK_DPI_MAX_CMD_SZ CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER, \ @@ -39,6 +40,49 @@ */ #define CNXK_DPI_REQ_CDATA 0xFF +union cnxk_dpi_instr_cmd { + uint64_t u; + struct cn9k_dpi_instr_cmd { + uint64_t aura : 20; + uint64_t func : 16; + uint64_t pt : 2; + uint64_t reserved_102 : 1; + uint64_t pvfe : 1; + uint64_t fl : 1; + uint64_t ii : 1; + uint64_t fi : 1; + uint64_t ca : 1; + uint64_t csel : 1; + uint64_t reserved_109_111 : 3; + uint64_t xtype : 2; + uint64_t reserved_114_119 : 6; + uint64_t fport : 2; + uint64_t reserved_122_123 : 2; + uint64_t lport : 2; + uint64_t reserved_126_127 : 2; + /* Word 1 - End */ + } cn9k; + + struct cn10k_dpi_instr_cmd { + uint64_t nfst : 4; + uint64_t reserved_4_5 : 2; + uint64_t nlst : 4; + uint64_t reserved_10_11 : 2; + uint64_t pvfe : 1; + uint64_t reserved_13 : 1; + uint64_t func : 16; + uint64_t aura : 20; + uint64_t xtype : 2; + uint64_t reserved_52_53 : 2; + uint64_t pt : 2; + uint64_t fport : 2; + uint64_t reserved_58_59 : 2; + uint64_t lport : 2; + uint64_t reserved_62_63 : 2; + /* Word 0 - End */ + } cn10k; +}; + struct cnxk_dpi_compl_s { uint64_t cdata; void *cb_data; @@ -52,12 +96,11 @@ struct cnxk_dpi_cdesc_data_s { }; struct cnxk_dpi_conf { - union dpi_instr_hdr_s hdr; + union cnxk_dpi_instr_cmd cmd; struct cnxk_dpi_cdesc_data_s c_desc; uint16_t pnum_words; uint16_t pending; uint16_t desc_idx; - uint16_t pad0; struct rte_dma_stats stats; uint64_t completed_offset; }; @@ -74,6 +117,18 @@ struct cnxk_dpi_vf_s { uint32_t aura; uint16_t num_vchans; uint16_t flag; + uint8_t is_cn10k; } __plt_cache_aligned; +int cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, + uint32_t length, uint64_t flags); +int cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, + const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, + uint64_t flags); +int cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, + uint32_t length, uint64_t flags); +int cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, + const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, + uint64_t flags); + #endif diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c new file mode 100644 index 0000000000..16d7b5426b --- /dev/null +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -0,0 +1,436 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2021 Marvell International Ltd. + */ + +#include + +#include "cnxk_dmadev.h" + +static __plt_always_inline void +__dpi_cpy_scalar(uint64_t *src, uint64_t *dst, uint8_t n) +{ + uint8_t i; + + for (i = 0; i < n; i++) + dst[i] = src[i]; +} + +#if defined(RTE_ARCH_ARM64) +static __plt_always_inline void +__dpi_cpy_vector(uint64_t *src, uint64_t *dst, uint8_t n) +{ + uint64x2_t vec; + uint8_t i; + + for (i = 0; i < n; i += 2) { + vec = vld1q_u64((const uint64_t *)&src[i]); + vst1q_u64(&dst[i], vec); + } +} + +static __plt_always_inline void +__dpi_cpy_vector_sg(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n) +{ + uint64x2_t mask = {0xFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL}; + uint64x2_t vec; + uint8_t i; + + for (i = 0; i < n; i++) { + vec = vld1q_u64((const uint64_t *)&src[i]); + vec = vextq_u64(vec, vec, 1); + vec = vandq_u64(vec, mask); + vst1q_u64(dst, vec); + dst += 2; + } +} + +static __plt_always_inline uint8_t +__dpi_cpy_vector_sg_lmt(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n, uint16_t lmt) +{ + uint64x2_t mask = {0xFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL}; + uint64x2_t vec; + uint8_t i; + + for (i = 0; i < n && lmt; i++) { + vec = vld1q_u64((const uint64_t *)&src[i]); + vec = vextq_u64(vec, vec, 1); + vec = vandq_u64(vec, mask); + vst1q_u64(dst, vec); + dst += 2; + lmt -= 2; + } + + return i; +} +#else +static __plt_always_inline void +__dpi_cpy_scalar_sg(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n) +{ + uint8_t i; + + for (i = 0; i < n; i++) { + *dst++ = src[i].length; + *dst++ = src[i].addr; + } +} + +static __plt_always_inline uint8_t +__dpi_cpy_scalar_sg_lmt(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n, uint16_t lmt) +{ + uint8_t i; + + for (i = 0; i < n && lmt; i++) { + *dst++ = src[i].length; + *dst++ = src[i].addr; + lmt -= 2; + } + + return i; +} +#endif + +static __plt_always_inline void +__dpi_cpy(uint64_t *src, uint64_t *dst, uint8_t n) +{ +#if defined(RTE_ARCH_ARM64) + __dpi_cpy_vector(src, dst, n); +#else + __dpi_cpy_scalar(src, dst, n); +#endif +} + +static __plt_always_inline void +__dpi_cpy_sg(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n) +{ +#if defined(RTE_ARCH_ARM64) + __dpi_cpy_vector_sg(src, dst, n); +#else + __dpi_cpy_scalar_sg(src, dst, n); +#endif +} + +static __plt_always_inline uint8_t +__dpi_cpy_sg_lmt(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n, uint16_t lmt) +{ +#if defined(RTE_ARCH_ARM64) + return __dpi_cpy_vector_sg_lmt(src, dst, n, lmt); +#else + return __dpi_cpy_scalar_sg_lmt(src, dst, n, lmt); +#endif +} + +static __plt_always_inline int +__dpi_queue_write_single(struct cnxk_dpi_vf_s *dpi, uint64_t *cmd) +{ + uint64_t *ptr = dpi->chunk_base; + + /* Check if command fits in the current chunk. */ + if (dpi->chunk_head + CNXK_DPI_DW_PER_SINGLE_CMD < dpi->chunk_size_m1) { + ptr += dpi->chunk_head; + + __dpi_cpy_scalar(cmd, ptr, CNXK_DPI_DW_PER_SINGLE_CMD); + dpi->chunk_head += CNXK_DPI_DW_PER_SINGLE_CMD; + } else { + uint64_t *new_buff = NULL; + int count; + + if (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) { + plt_dpi_dbg("Failed to alloc next buffer from NPA"); + return -ENOSPC; + } + + /* + * Figure out how many cmd words will fit in the current chunk + * and copy them. + */ + count = dpi->chunk_size_m1 - dpi->chunk_head; + ptr += dpi->chunk_head; + + __dpi_cpy_scalar(cmd, ptr, count); + + ptr += count; + *ptr = (uint64_t)new_buff; + ptr = new_buff; + + /* Copy the remaining cmd words to new chunk. */ + __dpi_cpy_scalar(cmd + count, ptr, CNXK_DPI_DW_PER_SINGLE_CMD - count); + + dpi->chunk_base = new_buff; + dpi->chunk_head = CNXK_DPI_DW_PER_SINGLE_CMD - count; + } + + return 0; +} + +static __plt_always_inline int +__dpi_queue_write_sg(struct cnxk_dpi_vf_s *dpi, uint64_t *hdr, const struct rte_dma_sge *src, + const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst) +{ + uint8_t cmd_len = CNXK_DPI_CMD_LEN(nb_src, nb_dst); + uint64_t *ptr = dpi->chunk_base; + + /* Check if command fits in the current chunk. */ + if (dpi->chunk_head + cmd_len < dpi->chunk_size_m1) { + ptr += dpi->chunk_head; + + __dpi_cpy(hdr, ptr, CNXK_DPI_HDR_LEN); + ptr += CNXK_DPI_HDR_LEN; + __dpi_cpy_sg(src, ptr, nb_src); + ptr += (nb_src << 1); + __dpi_cpy_sg(dst, ptr, nb_dst); + + dpi->chunk_head += cmd_len; + } else { + uint64_t *new_buff = NULL, *buf; + uint16_t count; + + if (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) { + plt_dpi_dbg("Failed to alloc next buffer from NPA"); + return -ENOSPC; + } + + /* + * Figure out how many cmd words will fit in the current chunk + * and copy them, copy the rest to the new buffer. + */ + count = dpi->chunk_size_m1 - dpi->chunk_head; + ptr += dpi->chunk_head; + buf = new_buff; + if (count <= 4) { + __dpi_cpy(hdr, ptr, count); + ptr += count; + __dpi_cpy(&hdr[count], buf, 4); + buf += (4 - count); + } else { + uint8_t i; + + __dpi_cpy(hdr, ptr, 4); + ptr += 4; + count -= 4; + + i = __dpi_cpy_sg_lmt(src, ptr, nb_src, count); + src += i; + nb_src -= i; + count -= (i << 1); + ptr += (i << 1); + + i = __dpi_cpy_sg_lmt(dst, ptr, nb_dst, count); + dst += i; + nb_dst -= i; + ptr += (i << 1); + } + *ptr = (uint64_t)new_buff; + + __dpi_cpy_sg(src, buf, nb_src); + buf += (nb_src << 1); + + __dpi_cpy_sg(dst, buf, nb_dst); + buf += (nb_dst << 1); + + dpi->chunk_base = new_buff; + dpi->chunk_head = buf - new_buff; + } + + return 0; +} + +int +cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, uint32_t length, + uint64_t flags) +{ + struct cnxk_dpi_vf_s *dpivf = dev_private; + struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; + uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; + struct cnxk_dpi_compl_s *comp_ptr; + int rc; + + if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == + dpi_conf->c_desc.head)) + return -ENOSPC; + + comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); + + cmd[0] = (1UL << 54) | (1UL << 48); + cmd[1] = dpi_conf->cmd.u; + cmd[2] = (uint64_t)comp_ptr; + cmd[4] = length; + cmd[6] = length; + + /* + * For inbound case, src pointers are last pointers. + * For all other cases, src pointers are first pointers. + */ + if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { + cmd[5] = dst; + cmd[7] = src; + } else { + cmd[5] = src; + cmd[7] = dst; + } + + rc = __dpi_queue_write_single(dpivf, cmd); + if (unlikely(rc)) { + CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); + return rc; + } + + if (flags & RTE_DMA_OP_FLAG_SUBMIT) { + rte_wmb(); + plt_write64(dpi_conf->pnum_words + CNXK_DPI_DW_PER_SINGLE_CMD, + dpivf->rdpi.rbase + DPI_VDMA_DBELL); + dpi_conf->stats.submitted += dpi_conf->pending + 1; + dpi_conf->pnum_words = 0; + dpi_conf->pending = 0; + } else { + dpi_conf->pnum_words += CNXK_DPI_DW_PER_SINGLE_CMD; + dpi_conf->pending++; + } + + return dpi_conf->desc_idx++; +} + +int +cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, + const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, uint64_t flags) +{ + struct cnxk_dpi_vf_s *dpivf = dev_private; + struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; + const struct rte_dma_sge *fptr, *lptr; + struct cnxk_dpi_compl_s *comp_ptr; + uint64_t hdr[4]; + int rc; + + if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == + dpi_conf->c_desc.head)) + return -ENOSPC; + + comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); + + hdr[1] = dpi_conf->cmd.u; + hdr[2] = (uint64_t)comp_ptr; + + /* + * For inbound case, src pointers are last pointers. + * For all other cases, src pointers are first pointers. + */ + if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { + fptr = dst; + lptr = src; + RTE_SWAP(nb_src, nb_dst); + } else { + fptr = src; + lptr = dst; + } + hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; + + rc = __dpi_queue_write_sg(dpivf, hdr, fptr, lptr, nb_src, nb_dst); + if (unlikely(rc)) { + CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); + return rc; + } + + if (flags & RTE_DMA_OP_FLAG_SUBMIT) { + rte_wmb(); + plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst), + dpivf->rdpi.rbase + DPI_VDMA_DBELL); + dpi_conf->stats.submitted += dpi_conf->pending + 1; + dpi_conf->pnum_words = 0; + dpi_conf->pending = 0; + } else { + dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst); + dpi_conf->pending++; + } + + return dpi_conf->desc_idx++; +} + +int +cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, + uint32_t length, uint64_t flags) +{ + struct cnxk_dpi_vf_s *dpivf = dev_private; + struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; + uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; + struct cnxk_dpi_compl_s *comp_ptr; + int rc; + + if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == + dpi_conf->c_desc.head)) + return -ENOSPC; + + comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); + + cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U; + cmd[1] = (uint64_t)comp_ptr; + cmd[2] = 0; + cmd[4] = length; + cmd[5] = src; + cmd[6] = length; + cmd[7] = dst; + + rc = __dpi_queue_write_single(dpivf, cmd); + if (unlikely(rc)) { + CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); + return rc; + } + + if (flags & RTE_DMA_OP_FLAG_SUBMIT) { + rte_wmb(); + plt_write64(dpi_conf->pnum_words + CNXK_DPI_DW_PER_SINGLE_CMD, + dpivf->rdpi.rbase + DPI_VDMA_DBELL); + dpi_conf->stats.submitted += dpi_conf->pending + 1; + dpi_conf->pnum_words = 0; + dpi_conf->pending = 0; + } else { + dpi_conf->pnum_words += 8; + dpi_conf->pending++; + } + + return dpi_conf->desc_idx++; +} + +int +cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src, + const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, + uint64_t flags) +{ + struct cnxk_dpi_vf_s *dpivf = dev_private; + struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; + struct cnxk_dpi_compl_s *comp_ptr; + uint64_t hdr[4]; + int rc; + + if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == + dpi_conf->c_desc.head)) + return -ENOSPC; + + comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); + + hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src; + hdr[1] = (uint64_t)comp_ptr; + hdr[2] = 0; + + rc = __dpi_queue_write_sg(dpivf, hdr, src, dst, nb_src, nb_dst); + if (unlikely(rc)) { + CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail); + return rc; + } + + if (flags & RTE_DMA_OP_FLAG_SUBMIT) { + rte_wmb(); + plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst), + dpivf->rdpi.rbase + DPI_VDMA_DBELL); + dpi_conf->stats.submitted += dpi_conf->pending + 1; + dpi_conf->pnum_words = 0; + dpi_conf->pending = 0; + } else { + dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst); + dpi_conf->pending++; + } + + return dpi_conf->desc_idx++; +} diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build index b868fb14cb..e557349368 100644 --- a/drivers/dma/cnxk/meson.build +++ b/drivers/dma/cnxk/meson.build @@ -1,6 +1,13 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(C) 2021 Marvell International Ltd. +error_cflags = ['-Wno-uninitialized'] +foreach flag: error_cflags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + deps += ['bus_pci', 'common_cnxk', 'dmadev'] -sources = files('cnxk_dmadev.c') +sources = files('cnxk_dmadev.c', 'cnxk_dmadev_fp.c') require_iova_in_mbuf = false