[3/4] dma/cnxk: add dma channel operations

Message ID 20211026041300.28924-3-radhac@marvell.com (mailing list archive)
State Changes Requested, archived
Delegated to: Thomas Monjalon
Headers
Series [1/4] common/cnxk: add DPI DMA support |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Radha Chintakuntla Oct. 26, 2021, 4:12 a.m. UTC
  Add functions for the dmadev vchan setup and DMA operations.

Signed-off-by: Radha Mohan Chintakuntla <radhac@marvell.com>
---
 drivers/dma/cnxk/cnxk_dmadev.c | 322 +++++++++++++++++++++++++++++++++
 drivers/dma/cnxk/cnxk_dmadev.h |  53 ++++++
 drivers/dma/cnxk/version.map   |   3 +
 3 files changed, 378 insertions(+)
 create mode 100644 drivers/dma/cnxk/version.map
  

Comments

Jerin Jacob Oct. 26, 2021, 8:41 a.m. UTC | #1
On Tue, Oct 26, 2021 at 9:43 AM Radha Mohan Chintakuntla
<radhac@marvell.com> wrote:
>
> Add functions for the dmadev vchan setup and DMA operations.
>
> Signed-off-by: Radha Mohan Chintakuntla <radhac@marvell.com>
> ---
>  drivers/dma/cnxk/cnxk_dmadev.c | 322 +++++++++++++++++++++++++++++++++
>  drivers/dma/cnxk/cnxk_dmadev.h |  53 ++++++
>  drivers/dma/cnxk/version.map   |   3 +
>  3 files changed, 378 insertions(+)
>  create mode 100644 drivers/dma/cnxk/version.map
>
> diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
> index 620766743d..8434579aa2 100644
> --- a/drivers/dma/cnxk/cnxk_dmadev.c
> +++ b/drivers/dma/cnxk/cnxk_dmadev.c
> @@ -18,6 +18,322 @@
>  #include <roc_api.h>
>  #include <cnxk_dmadev.h>
>
> +static int
> +cnxk_dmadev_info_get(const struct rte_dma_dev *dev,
> +                    struct rte_dma_info *dev_info, uint32_t size)
> +{
> +       RTE_SET_USED(dev);
> +       RTE_SET_USED(size);
> +
> +       dev_info->max_vchans = 1;
> +       dev_info->nb_vchans = 1;
> +       dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
> +               RTE_DMA_CAPA_MEM_TO_DEV | RTE_DMA_CAPA_DEV_TO_MEM |
> +               RTE_DMA_CAPA_OPS_COPY;
> +       dev_info->max_desc = DPI_MAX_DESC;
> +       dev_info->min_desc = 1;
> +       dev_info->max_sges = DPI_MAX_POINTER;
> +
> +       return 0;
> +}
> +
> +static int
> +cnxk_dmadev_configure(struct rte_dma_dev *dev,
> +                     const struct rte_dma_conf *conf, uint32_t conf_sz)
> +{
> +       struct cnxk_dpi_vf_s *dpivf = NULL;
> +       int rc = 0;
> +
> +       RTE_SET_USED(conf);
> +       RTE_SET_USED(conf);
> +       RTE_SET_USED(conf_sz);
> +       RTE_SET_USED(conf_sz);
> +       dpivf = dev->fp_obj->dev_private;
> +       rc = roc_dpi_queue_configure(&dpivf->rdpi);
> +       if (rc < 0)
> +               plt_err("DMA queue configure failed err = %d", rc);
> +
> +       return rc;
> +}
> +
> +static int
> +cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
> +                       const struct rte_dma_vchan_conf *conf,
> +                       uint32_t conf_sz)
> +{
> +       struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
> +       struct cnxk_dpi_compl_s *comp_data;
> +       int i;
> +
> +       RTE_SET_USED(vchan);
> +       RTE_SET_USED(conf_sz);
> +
> +       switch (conf->direction) {
> +       case RTE_DMA_DIR_DEV_TO_MEM:
> +               dpivf->conf.direction = DPI_XTYPE_INBOUND;
> +               dpivf->conf.src_port = conf->src_port.pcie.coreid;
> +               dpivf->conf.dst_port = 0;
> +               break;
> +       case RTE_DMA_DIR_MEM_TO_DEV:
> +               dpivf->conf.direction = DPI_XTYPE_OUTBOUND;
> +               dpivf->conf.src_port = 0;
> +               dpivf->conf.dst_port = conf->dst_port.pcie.coreid;
> +               break;
> +       case RTE_DMA_DIR_MEM_TO_MEM:
> +               dpivf->conf.direction = DPI_XTYPE_INTERNAL_ONLY;
> +               dpivf->conf.src_port = 0;
> +               dpivf->conf.dst_port = 0;
> +               break;
> +       case RTE_DMA_DIR_DEV_TO_DEV:
> +               dpivf->conf.direction = DPI_XTYPE_EXTERNAL_ONLY;
> +               dpivf->conf.src_port = conf->src_port.pcie.coreid;
> +               dpivf->conf.dst_port = conf->src_port.pcie.coreid;
> +       };
> +
> +       for (i = 0; i < conf->nb_desc; i++) {
> +               comp_data = rte_zmalloc(NULL, sizeof(*comp_data), 0);
> +               dpivf->conf.c_desc.compl_ptr[i] = comp_data;
> +       };
> +       dpivf->conf.c_desc.max_cnt = DPI_MAX_DESC;
> +       dpivf->conf.c_desc.head = 0;
> +       dpivf->conf.c_desc.tail = 0;
> +
> +       return 0;
> +}
> +
> +static int
> +cnxk_dmadev_start(struct rte_dma_dev *dev)
> +{
> +       struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
> +
> +       roc_dpi_queue_start(&dpivf->rdpi);
> +
> +       return 0;
> +}
> +
> +static int
> +cnxk_dmadev_stop(struct rte_dma_dev *dev)
> +{
> +       struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
> +
> +       roc_dpi_queue_stop(&dpivf->rdpi);
> +
> +       return 0;
> +}
> +
> +static int
> +cnxk_dmadev_close(struct rte_dma_dev *dev)
> +{
> +       struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
> +
> +       roc_dpi_queue_stop(&dpivf->rdpi);
> +       roc_dpi_dev_fini(&dpivf->rdpi);
> +
> +       return 0;
> +}
> +
> +static inline int
> +__dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count)
> +{
> +       uint64_t *ptr = dpi->chunk_base;
> +
> +       if ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > DPI_MAX_CMD_SIZE) ||
> +           cmds == NULL)
> +               return -EINVAL;
> +
> +       /*
> +        * Normally there is plenty of room in the current buffer for the
> +        * command
> +        */
> +       if (dpi->chunk_head + cmd_count < dpi->pool_size_m1) {
> +               ptr += dpi->chunk_head;
> +               dpi->chunk_head += cmd_count;
> +               while (cmd_count--)
> +                       *ptr++ = *cmds++;
> +       } else {
> +               int count;
> +               uint64_t *new_buff = dpi->chunk_next;
> +
> +               dpi->chunk_next =
> +                       (void *)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
> +               if (!dpi->chunk_next) {
> +                       plt_err("Failed to alloc next buffer from NPA");
> +                       return -ENOMEM;
> +               }
> +
> +               /*
> +                * Figure out how many cmd words will fit in this buffer.
> +                * One location will be needed for the next buffer pointer.
> +                */
> +               count = dpi->pool_size_m1 - dpi->chunk_head;
> +               ptr += dpi->chunk_head;
> +               cmd_count -= count;
> +               while (count--)
> +                       *ptr++ = *cmds++;
> +
> +               /*
> +                * chunk next ptr is 2 DWORDS
> +                * second DWORD is reserved.
> +                */
> +               *ptr++ = (uint64_t)new_buff;
> +               *ptr = 0;
> +
> +               /*
> +                * The current buffer is full and has a link to the next
> +                * buffers. Time to write the rest of the commands into the new
> +                * buffer.
> +                */
> +               dpi->chunk_base = new_buff;
> +               dpi->chunk_head = cmd_count;
> +               ptr = new_buff;
> +               while (cmd_count--)
> +                       *ptr++ = *cmds++;
> +
> +               /* queue index may be greater than pool size */
> +               if (dpi->chunk_head >= dpi->pool_size_m1) {
> +                       new_buff = dpi->chunk_next;
> +                       dpi->chunk_next =
> +                               (void *)roc_npa_aura_op_alloc(dpi->aura_handle,
> +                                                             0);
> +                       if (!dpi->chunk_next) {
> +                               plt_err("Failed to alloc next buffer from NPA");
> +                               return -ENOMEM;
> +                       }
> +                       /* Write next buffer address */
> +                       *ptr = (uint64_t)new_buff;
> +                       dpi->chunk_base = new_buff;
> +                       dpi->chunk_head = 0;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +static int
> +cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
> +                rte_iova_t dst, uint32_t length, uint64_t flags)
> +{
> +       uint64_t cmd[DPI_MAX_CMD_SIZE] = {0};
> +       union dpi_instr_hdr_s *header = (union dpi_instr_hdr_s *)&cmd[0];
> +       rte_iova_t fptr, lptr;
> +       struct cnxk_dpi_vf_s *dpivf = dev_private;
> +       struct cnxk_dpi_compl_s *comp_ptr;
> +       int num_words = 0;
> +       int rc;
> +
> +       RTE_SET_USED(vchan);
> +
> +       header->s.xtype = dpivf->conf.direction;
> +       header->s.pt = DPI_HDR_PT_ZBW_CA;
> +       comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
> +       comp_ptr->cdata = DPI_REQ_CDATA;
> +       header->s.ptr = (uint64_t)comp_ptr;
> +       STRM_INC(dpivf->conf.c_desc);
> +
> +       /* pvfe should be set for inbound and outbound only */
> +       if (header->s.xtype <= 1)
> +               header->s.pvfe = 1;
> +       num_words += 4;
> +
> +       header->s.nfst = 1;
> +       header->s.nlst = 1;

Including filling zeros in cmd and the rest of the filling can be
moved to slow path..

Please change the logic to populate the static items based on
configure/channel setup
in slowpath and update only per transfer-specific items to have better
performance.



> +       /*
> +        * For inbound case, src pointers are last pointers.
> +        * For all other cases, src pointers are first pointers.
> +        */
> +       if (header->s.xtype == DPI_XTYPE_INBOUND) {
> +               fptr = dst;
> +               lptr = src;
> +               header->s.fport = dpivf->conf.dst_port & 0x3;
> +               header->s.lport = dpivf->conf.src_port & 0x3;
> +       } else {
> +               fptr = src;
> +               lptr = dst;
> +               header->s.fport = dpivf->conf.src_port & 0x3;
> +               header->s.lport = dpivf->conf.dst_port & 0x3;
> +       }
> +
> +       cmd[num_words++] = length;
> +       cmd[num_words++] = fptr;
> +       cmd[num_words++] = length;
> +       cmd[num_words++] = lptr;
> +
> +       rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
> +       if (!rc) {
> +               if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
> +                       rte_wmb();
> +                       plt_write64(num_words,
> +                                   dpivf->rdpi.rbase + DPI_VDMA_DBELL);
> +               }
> +               dpivf->num_words = num_words;
> +       }
> +
> +       return rc;
> +}
> +
> +static uint16_t
> +cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls,
> +                     uint16_t *last_idx, bool *has_error)
> +{
> +       struct cnxk_dpi_vf_s *dpivf = dev_private;
> +       int cnt;
> +
> +       RTE_SET_USED(vchan);
> +       RTE_SET_USED(last_idx);
> +       RTE_SET_USED(has_error);
> +       for (cnt = 0; cnt < nb_cpls; cnt++) {
> +               struct cnxk_dpi_compl_s *comp_ptr =
> +                       dpivf->conf.c_desc.compl_ptr[cnt];
> +
> +               if (comp_ptr->cdata)
> +                       break;
> +       }
> +
> +       dpivf->conf.c_desc.tail = cnt;
> +
> +       return cnt;
> +}
> +
> +static uint16_t
> +cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan,
> +                            const uint16_t nb_cpls, uint16_t *last_idx,
> +                            enum rte_dma_status_code *status)
> +{
> +       struct cnxk_dpi_vf_s *dpivf = dev_private;
> +       int cnt;
> +
> +       RTE_SET_USED(vchan);
> +       RTE_SET_USED(last_idx);
> +       for (cnt = 0; cnt < nb_cpls; cnt++) {
> +               struct cnxk_dpi_compl_s *comp_ptr =
> +                       dpivf->conf.c_desc.compl_ptr[cnt];
> +               status[cnt] = comp_ptr->cdata;
> +       }
> +
> +       dpivf->conf.c_desc.tail = 0;
> +       return cnt;
> +}
> +
> +static int
> +cnxk_dmadev_submit(void *dev_private, uint16_t vchan __rte_unused)
> +{
> +       struct cnxk_dpi_vf_s *dpivf = dev_private;
> +
> +       rte_wmb();
> +       plt_write64(dpivf->num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
> +
> +       return 0;
> +}
> +
> +static const struct rte_dma_dev_ops cnxk_dmadev_ops = {
> +       .dev_info_get = cnxk_dmadev_info_get,
> +       .dev_configure = cnxk_dmadev_configure,
> +       .dev_start = cnxk_dmadev_start,
> +       .dev_stop = cnxk_dmadev_stop,
> +       .vchan_setup = cnxk_dmadev_vchan_setup,
> +       .dev_close = cnxk_dmadev_close,
> +};
> +
>  static int
>  cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused,
>                   struct rte_pci_device *pci_dev)
> @@ -50,6 +366,12 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused,
>
>         dmadev->device = &pci_dev->device;
>         dmadev->fp_obj->dev_private = dpivf;
> +       dmadev->dev_ops = &cnxk_dmadev_ops;
> +
> +       dmadev->fp_obj->copy = cnxk_dmadev_copy;
> +       dmadev->fp_obj->submit = cnxk_dmadev_submit;
> +       dmadev->fp_obj->completed = cnxk_dmadev_completed;
> +       dmadev->fp_obj->completed_status = cnxk_dmadev_completed_status;
>
>         rdpi = &dpivf->rdpi;
>
> diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
> index 9e0bb7b2ce..ce301a5945 100644
> --- a/drivers/dma/cnxk/cnxk_dmadev.h
> +++ b/drivers/dma/cnxk/cnxk_dmadev.h
> @@ -4,8 +4,61 @@
>  #ifndef _CNXK_DMADEV_H_
>  #define _CNXK_DMADEV_H_
>
> +#define DPI_MAX_POINTER                15
> +#define DPI_QUEUE_STOP         0x0
> +#define DPI_QUEUE_START                0x1
> +#define STRM_INC(s)            ((s).tail = ((s).tail + 1) % (s).max_cnt)
> +#define DPI_MAX_DESC           DPI_MAX_POINTER
> +
> +/* DPI Transfer Type, pointer type in DPI_DMA_INSTR_HDR_S[XTYPE] */
> +#define DPI_XTYPE_OUTBOUND      (0)
> +#define DPI_XTYPE_INBOUND       (1)
> +#define DPI_XTYPE_INTERNAL_ONLY (2)
> +#define DPI_XTYPE_EXTERNAL_ONLY (3)
> +#define DPI_XTYPE_MASK         0x3
> +#define DPI_HDR_PT_ZBW_CA      0x0
> +#define DPI_HDR_PT_ZBW_NC      0x1
> +#define DPI_HDR_PT_WQP         0x2
> +#define DPI_HDR_PT_WQP_NOSTATUS        0x0
> +#define DPI_HDR_PT_WQP_STATUSCA        0x1
> +#define DPI_HDR_PT_WQP_STATUSNC        0x3
> +#define DPI_HDR_PT_CNT         0x3
> +#define DPI_HDR_PT_MASK                0x3
> +#define DPI_W0_TT_MASK         0x3
> +#define DPI_W0_GRP_MASK                0x3FF
> +
> +/* Set Completion data to 0xFF when request submitted,
> + * upon successful request completion engine reset to completion status
> + */
> +#define DPI_REQ_CDATA          0xFF
> +
> +#define DPI_MIN_CMD_SIZE       8
> +#define DPI_MAX_CMD_SIZE       64
> +
> +struct cnxk_dpi_compl_s {
> +       uint64_t cdata;
> +       void *cb_data;
> +};
> +
> +struct cnxk_dpi_cdesc_data_s {
> +       struct cnxk_dpi_compl_s *compl_ptr[DPI_MAX_DESC];
> +       uint16_t max_cnt;
> +       uint16_t head;
> +       uint16_t tail;
> +};
> +
> +struct cnxk_dpi_queue_conf {
> +       uint8_t direction;
> +       uint8_t src_port;
> +       uint8_t dst_port;
> +       uint64_t comp_ptr;
> +       struct cnxk_dpi_cdesc_data_s c_desc;
> +};
> +
>  struct cnxk_dpi_vf_s {
>         struct roc_dpi rdpi;
> +       struct cnxk_dpi_queue_conf conf;
> +       uint32_t num_words;
>  };
>
>  #endif
> diff --git a/drivers/dma/cnxk/version.map b/drivers/dma/cnxk/version.map
> new file mode 100644
> index 0000000000..4a76d1d52d
> --- /dev/null
> +++ b/drivers/dma/cnxk/version.map
> @@ -0,0 +1,3 @@
> +DPDK_21 {
> +       local: *;
> +};
> --
> 2.17.1
>
  
Radha Mohan Oct. 28, 2021, 6:18 p.m. UTC | #2
On Tue, Oct 26, 2021 at 1:49 AM Jerin Jacob <jerinjacobk@gmail.com> wrote:
>
> On Tue, Oct 26, 2021 at 9:43 AM Radha Mohan Chintakuntla
> <radhac@marvell.com> wrote:
> >
> > Add functions for the dmadev vchan setup and DMA operations.
> >
> > Signed-off-by: Radha Mohan Chintakuntla <radhac@marvell.com>
> > ---
> >  drivers/dma/cnxk/cnxk_dmadev.c | 322 +++++++++++++++++++++++++++++++++
> >  drivers/dma/cnxk/cnxk_dmadev.h |  53 ++++++
> >  drivers/dma/cnxk/version.map   |   3 +
> >  3 files changed, 378 insertions(+)
> >  create mode 100644 drivers/dma/cnxk/version.map
> >
> > diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
> > index 620766743d..8434579aa2 100644
> > --- a/drivers/dma/cnxk/cnxk_dmadev.c
> > +++ b/drivers/dma/cnxk/cnxk_dmadev.c
> > @@ -18,6 +18,322 @@
> >  #include <roc_api.h>
> >  #include <cnxk_dmadev.h>
> >
> > +static int
> > +cnxk_dmadev_info_get(const struct rte_dma_dev *dev,
> > +                    struct rte_dma_info *dev_info, uint32_t size)
> > +{
> > +       RTE_SET_USED(dev);
> > +       RTE_SET_USED(size);
> > +
> > +       dev_info->max_vchans = 1;
> > +       dev_info->nb_vchans = 1;
> > +       dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
> > +               RTE_DMA_CAPA_MEM_TO_DEV | RTE_DMA_CAPA_DEV_TO_MEM |
> > +               RTE_DMA_CAPA_OPS_COPY;
> > +       dev_info->max_desc = DPI_MAX_DESC;
> > +       dev_info->min_desc = 1;
> > +       dev_info->max_sges = DPI_MAX_POINTER;
> > +
> > +       return 0;
> > +}
> > +
> > +static int
> > +cnxk_dmadev_configure(struct rte_dma_dev *dev,
> > +                     const struct rte_dma_conf *conf, uint32_t conf_sz)
> > +{
> > +       struct cnxk_dpi_vf_s *dpivf = NULL;
> > +       int rc = 0;
> > +
> > +       RTE_SET_USED(conf);
> > +       RTE_SET_USED(conf);
> > +       RTE_SET_USED(conf_sz);
> > +       RTE_SET_USED(conf_sz);
> > +       dpivf = dev->fp_obj->dev_private;
> > +       rc = roc_dpi_queue_configure(&dpivf->rdpi);
> > +       if (rc < 0)
> > +               plt_err("DMA queue configure failed err = %d", rc);
> > +
> > +       return rc;
> > +}
> > +
> > +static int
> > +cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
> > +                       const struct rte_dma_vchan_conf *conf,
> > +                       uint32_t conf_sz)
> > +{
> > +       struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
> > +       struct cnxk_dpi_compl_s *comp_data;
> > +       int i;
> > +
> > +       RTE_SET_USED(vchan);
> > +       RTE_SET_USED(conf_sz);
> > +
> > +       switch (conf->direction) {
> > +       case RTE_DMA_DIR_DEV_TO_MEM:
> > +               dpivf->conf.direction = DPI_XTYPE_INBOUND;
> > +               dpivf->conf.src_port = conf->src_port.pcie.coreid;
> > +               dpivf->conf.dst_port = 0;
> > +               break;
> > +       case RTE_DMA_DIR_MEM_TO_DEV:
> > +               dpivf->conf.direction = DPI_XTYPE_OUTBOUND;
> > +               dpivf->conf.src_port = 0;
> > +               dpivf->conf.dst_port = conf->dst_port.pcie.coreid;
> > +               break;
> > +       case RTE_DMA_DIR_MEM_TO_MEM:
> > +               dpivf->conf.direction = DPI_XTYPE_INTERNAL_ONLY;
> > +               dpivf->conf.src_port = 0;
> > +               dpivf->conf.dst_port = 0;
> > +               break;
> > +       case RTE_DMA_DIR_DEV_TO_DEV:
> > +               dpivf->conf.direction = DPI_XTYPE_EXTERNAL_ONLY;
> > +               dpivf->conf.src_port = conf->src_port.pcie.coreid;
> > +               dpivf->conf.dst_port = conf->src_port.pcie.coreid;
> > +       };
> > +
> > +       for (i = 0; i < conf->nb_desc; i++) {
> > +               comp_data = rte_zmalloc(NULL, sizeof(*comp_data), 0);
> > +               dpivf->conf.c_desc.compl_ptr[i] = comp_data;
> > +       };
> > +       dpivf->conf.c_desc.max_cnt = DPI_MAX_DESC;
> > +       dpivf->conf.c_desc.head = 0;
> > +       dpivf->conf.c_desc.tail = 0;
> > +
> > +       return 0;
> > +}
> > +
> > +static int
> > +cnxk_dmadev_start(struct rte_dma_dev *dev)
> > +{
> > +       struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
> > +
> > +       roc_dpi_queue_start(&dpivf->rdpi);
> > +
> > +       return 0;
> > +}
> > +
> > +static int
> > +cnxk_dmadev_stop(struct rte_dma_dev *dev)
> > +{
> > +       struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
> > +
> > +       roc_dpi_queue_stop(&dpivf->rdpi);
> > +
> > +       return 0;
> > +}
> > +
> > +static int
> > +cnxk_dmadev_close(struct rte_dma_dev *dev)
> > +{
> > +       struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
> > +
> > +       roc_dpi_queue_stop(&dpivf->rdpi);
> > +       roc_dpi_dev_fini(&dpivf->rdpi);
> > +
> > +       return 0;
> > +}
> > +
> > +static inline int
> > +__dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count)
> > +{
> > +       uint64_t *ptr = dpi->chunk_base;
> > +
> > +       if ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > DPI_MAX_CMD_SIZE) ||
> > +           cmds == NULL)
> > +               return -EINVAL;
> > +
> > +       /*
> > +        * Normally there is plenty of room in the current buffer for the
> > +        * command
> > +        */
> > +       if (dpi->chunk_head + cmd_count < dpi->pool_size_m1) {
> > +               ptr += dpi->chunk_head;
> > +               dpi->chunk_head += cmd_count;
> > +               while (cmd_count--)
> > +                       *ptr++ = *cmds++;
> > +       } else {
> > +               int count;
> > +               uint64_t *new_buff = dpi->chunk_next;
> > +
> > +               dpi->chunk_next =
> > +                       (void *)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
> > +               if (!dpi->chunk_next) {
> > +                       plt_err("Failed to alloc next buffer from NPA");
> > +                       return -ENOMEM;
> > +               }
> > +
> > +               /*
> > +                * Figure out how many cmd words will fit in this buffer.
> > +                * One location will be needed for the next buffer pointer.
> > +                */
> > +               count = dpi->pool_size_m1 - dpi->chunk_head;
> > +               ptr += dpi->chunk_head;
> > +               cmd_count -= count;
> > +               while (count--)
> > +                       *ptr++ = *cmds++;
> > +
> > +               /*
> > +                * chunk next ptr is 2 DWORDS
> > +                * second DWORD is reserved.
> > +                */
> > +               *ptr++ = (uint64_t)new_buff;
> > +               *ptr = 0;
> > +
> > +               /*
> > +                * The current buffer is full and has a link to the next
> > +                * buffers. Time to write the rest of the commands into the new
> > +                * buffer.
> > +                */
> > +               dpi->chunk_base = new_buff;
> > +               dpi->chunk_head = cmd_count;
> > +               ptr = new_buff;
> > +               while (cmd_count--)
> > +                       *ptr++ = *cmds++;
> > +
> > +               /* queue index may be greater than pool size */
> > +               if (dpi->chunk_head >= dpi->pool_size_m1) {
> > +                       new_buff = dpi->chunk_next;
> > +                       dpi->chunk_next =
> > +                               (void *)roc_npa_aura_op_alloc(dpi->aura_handle,
> > +                                                             0);
> > +                       if (!dpi->chunk_next) {
> > +                               plt_err("Failed to alloc next buffer from NPA");
> > +                               return -ENOMEM;
> > +                       }
> > +                       /* Write next buffer address */
> > +                       *ptr = (uint64_t)new_buff;
> > +                       dpi->chunk_base = new_buff;
> > +                       dpi->chunk_head = 0;
> > +               }
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static int
> > +cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
> > +                rte_iova_t dst, uint32_t length, uint64_t flags)
> > +{
> > +       uint64_t cmd[DPI_MAX_CMD_SIZE] = {0};
> > +       union dpi_instr_hdr_s *header = (union dpi_instr_hdr_s *)&cmd[0];
> > +       rte_iova_t fptr, lptr;
> > +       struct cnxk_dpi_vf_s *dpivf = dev_private;
> > +       struct cnxk_dpi_compl_s *comp_ptr;
> > +       int num_words = 0;
> > +       int rc;
> > +
> > +       RTE_SET_USED(vchan);
> > +
> > +       header->s.xtype = dpivf->conf.direction;
> > +       header->s.pt = DPI_HDR_PT_ZBW_CA;
> > +       comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
> > +       comp_ptr->cdata = DPI_REQ_CDATA;
> > +       header->s.ptr = (uint64_t)comp_ptr;
> > +       STRM_INC(dpivf->conf.c_desc);
> > +
> > +       /* pvfe should be set for inbound and outbound only */
> > +       if (header->s.xtype <= 1)
> > +               header->s.pvfe = 1;
> > +       num_words += 4;
> > +
> > +       header->s.nfst = 1;
> > +       header->s.nlst = 1;
>
> Including filling zeros in cmd and the rest of the filling can be
> moved to slow path..
>
> Please change the logic to populate the static items based on
> configure/channel setup
> in slowpath and update only per transfer-specific items to have better
> performance.
>
These are instruction header values that we are filling. If you look
at it there is really one 64bit field that can be filled beforehand
a.k.a slowpath in vchan_setup().
Rest of the header can only be filled here like nlst, nfst (these are
number of pointers to be DMA'ed) and completion pointer. So just for
that I do not see a value in moving around the code.

<snip>
  
Jerin Jacob Oct. 29, 2021, 2:54 p.m. UTC | #3
On Thu, Oct 28, 2021 at 11:48 PM Radha Mohan <mohun106@gmail.com> wrote:
>
> On Tue, Oct 26, 2021 at 1:49 AM Jerin Jacob <jerinjacobk@gmail.com> wrote:
> >
> > On Tue, Oct 26, 2021 at 9:43 AM Radha Mohan Chintakuntla
> > <radhac@marvell.com> wrote:
> > >
> > > Add functions for the dmadev vchan setup and DMA operations.
> > >
> > > Signed-off-by: Radha Mohan Chintakuntla <radhac@marvell.com>

> > > +static int
> > > +cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
> > > +                rte_iova_t dst, uint32_t length, uint64_t flags)
> > > +{
> > > +       uint64_t cmd[DPI_MAX_CMD_SIZE] = {0};
> > > +       union dpi_instr_hdr_s *header = (union dpi_instr_hdr_s *)&cmd[0];
> > > +       rte_iova_t fptr, lptr;
> > > +       struct cnxk_dpi_vf_s *dpivf = dev_private;
> > > +       struct cnxk_dpi_compl_s *comp_ptr;
> > > +       int num_words = 0;
> > > +       int rc;
> > > +
> > > +       RTE_SET_USED(vchan);
> > > +
> > > +       header->s.xtype = dpivf->conf.direction;
> > > +       header->s.pt = DPI_HDR_PT_ZBW_CA;
> > > +       comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
> > > +       comp_ptr->cdata = DPI_REQ_CDATA;
> > > +       header->s.ptr = (uint64_t)comp_ptr;
> > > +       STRM_INC(dpivf->conf.c_desc);
> > > +
> > > +       /* pvfe should be set for inbound and outbound only */
> > > +       if (header->s.xtype <= 1)
> > > +               header->s.pvfe = 1;
> > > +       num_words += 4;
> > > +
> > > +       header->s.nfst = 1;
> > > +       header->s.nlst = 1;
> >
> > Including filling zeros in cmd and the rest of the filling can be
> > moved to slow path..
> >
> > Please change the logic to populate the static items based on
> > configure/channel setup
> > in slowpath and update only per transfer-specific items to have better
> > performance.
> >
> These are instruction header values that we are filling. If you look
> at it there is really one 64bit field that can be filled beforehand
> a.k.a slowpath in vchan_setup().
> Rest of the header can only be filled here like nlst, nfst (these are
> number of pointers to be DMA'ed) and completion pointer. So just for
> that I do not see a value in moving around the code.

Two things,

1) By dong like below,
> > > +       header->s.nfst = 1;
> > > +       header->s.nlst = 1;

it will generate multiple stores. One option is to have a local u64
variable and form the required descriptor and write it one shot.
It is a standard optimation strategy used in fastpath.

2) uint64_t cmd[DPI_MAX_CMD_SIZE] = {0}; This will result in memset of
64B, That reason for creating
template based on vchan make sense.

Looks like moving to a template-based scheme need a lot of rework in
the driver,
I will leave you to decide performance vs other aspects as you are
maintaining the driver.
No strong opinion.


>
> <snip>
  
Radha Mohan Oct. 29, 2021, 6:02 p.m. UTC | #4
On Fri, Oct 29, 2021 at 7:54 AM Jerin Jacob <jerinjacobk@gmail.com> wrote:
>
> On Thu, Oct 28, 2021 at 11:48 PM Radha Mohan <mohun106@gmail.com> wrote:
> >
> > On Tue, Oct 26, 2021 at 1:49 AM Jerin Jacob <jerinjacobk@gmail.com> wrote:
> > >
> > > On Tue, Oct 26, 2021 at 9:43 AM Radha Mohan Chintakuntla
> > > <radhac@marvell.com> wrote:
> > > >
> > > > Add functions for the dmadev vchan setup and DMA operations.
> > > >
> > > > Signed-off-by: Radha Mohan Chintakuntla <radhac@marvell.com>
>
> > > > +static int
> > > > +cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
> > > > +                rte_iova_t dst, uint32_t length, uint64_t flags)
> > > > +{
> > > > +       uint64_t cmd[DPI_MAX_CMD_SIZE] = {0};
> > > > +       union dpi_instr_hdr_s *header = (union dpi_instr_hdr_s *)&cmd[0];
> > > > +       rte_iova_t fptr, lptr;
> > > > +       struct cnxk_dpi_vf_s *dpivf = dev_private;
> > > > +       struct cnxk_dpi_compl_s *comp_ptr;
> > > > +       int num_words = 0;
> > > > +       int rc;
> > > > +
> > > > +       RTE_SET_USED(vchan);
> > > > +
> > > > +       header->s.xtype = dpivf->conf.direction;
> > > > +       header->s.pt = DPI_HDR_PT_ZBW_CA;
> > > > +       comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
> > > > +       comp_ptr->cdata = DPI_REQ_CDATA;
> > > > +       header->s.ptr = (uint64_t)comp_ptr;
> > > > +       STRM_INC(dpivf->conf.c_desc);
> > > > +
> > > > +       /* pvfe should be set for inbound and outbound only */
> > > > +       if (header->s.xtype <= 1)
> > > > +               header->s.pvfe = 1;
> > > > +       num_words += 4;
> > > > +
> > > > +       header->s.nfst = 1;
> > > > +       header->s.nlst = 1;
> > >
> > > Including filling zeros in cmd and the rest of the filling can be
> > > moved to slow path..
> > >
> > > Please change the logic to populate the static items based on
> > > configure/channel setup
> > > in slowpath and update only per transfer-specific items to have better
> > > performance.
> > >
> > These are instruction header values that we are filling. If you look
> > at it there is really one 64bit field that can be filled beforehand
> > a.k.a slowpath in vchan_setup().
> > Rest of the header can only be filled here like nlst, nfst (these are
> > number of pointers to be DMA'ed) and completion pointer. So just for
> > that I do not see a value in moving around the code.
>
> Two things,
>
> 1) By dong like below,
> > > > +       header->s.nfst = 1;
> > > > +       header->s.nlst = 1;
>
> it will generate multiple stores.

No it won't for this case. Here is how the compiler generated the
writes to the 64-bit fields of the header field.

 7a4:   d2e00821        mov     x1, #0x41000000000000           //
#18295873486192640

>One option is to have a local u64
> variable and form the required descriptor and write it one shot.
> It is a standard optimation strategy used in fastpath.
Maybe not here.

>
> 2) uint64_t cmd[DPI_MAX_CMD_SIZE] = {0}; This will result in memset of
> 64B, That reason for creating
> template based on vchan make sense.
>
> Looks like moving to a template-based scheme need a lot of rework in
> the driver,
> I will leave you to decide performance vs other aspects as you are
> maintaining the driver.
> No strong opinion.
>
Ok understand. We'll do a v2 with some improvments.
>
> >
> > <snip>
  

Patch

diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index 620766743d..8434579aa2 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -18,6 +18,322 @@ 
 #include <roc_api.h>
 #include <cnxk_dmadev.h>
 
+static int
+cnxk_dmadev_info_get(const struct rte_dma_dev *dev,
+		     struct rte_dma_info *dev_info, uint32_t size)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(size);
+
+	dev_info->max_vchans = 1;
+	dev_info->nb_vchans = 1;
+	dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
+		RTE_DMA_CAPA_MEM_TO_DEV | RTE_DMA_CAPA_DEV_TO_MEM |
+		RTE_DMA_CAPA_OPS_COPY;
+	dev_info->max_desc = DPI_MAX_DESC;
+	dev_info->min_desc = 1;
+	dev_info->max_sges = DPI_MAX_POINTER;
+
+	return 0;
+}
+
+static int
+cnxk_dmadev_configure(struct rte_dma_dev *dev,
+		      const struct rte_dma_conf *conf, uint32_t conf_sz)
+{
+	struct cnxk_dpi_vf_s *dpivf = NULL;
+	int rc = 0;
+
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	RTE_SET_USED(conf_sz);
+	dpivf = dev->fp_obj->dev_private;
+	rc = roc_dpi_queue_configure(&dpivf->rdpi);
+	if (rc < 0)
+		plt_err("DMA queue configure failed err = %d", rc);
+
+	return rc;
+}
+
+static int
+cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
+			const struct rte_dma_vchan_conf *conf,
+			uint32_t conf_sz)
+{
+	struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
+	struct cnxk_dpi_compl_s *comp_data;
+	int i;
+
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(conf_sz);
+
+	switch (conf->direction) {
+	case RTE_DMA_DIR_DEV_TO_MEM:
+		dpivf->conf.direction = DPI_XTYPE_INBOUND;
+		dpivf->conf.src_port = conf->src_port.pcie.coreid;
+		dpivf->conf.dst_port = 0;
+		break;
+	case RTE_DMA_DIR_MEM_TO_DEV:
+		dpivf->conf.direction = DPI_XTYPE_OUTBOUND;
+		dpivf->conf.src_port = 0;
+		dpivf->conf.dst_port = conf->dst_port.pcie.coreid;
+		break;
+	case RTE_DMA_DIR_MEM_TO_MEM:
+		dpivf->conf.direction = DPI_XTYPE_INTERNAL_ONLY;
+		dpivf->conf.src_port = 0;
+		dpivf->conf.dst_port = 0;
+		break;
+	case RTE_DMA_DIR_DEV_TO_DEV:
+		dpivf->conf.direction = DPI_XTYPE_EXTERNAL_ONLY;
+		dpivf->conf.src_port = conf->src_port.pcie.coreid;
+		dpivf->conf.dst_port = conf->src_port.pcie.coreid;
+	};
+
+	for (i = 0; i < conf->nb_desc; i++) {
+		comp_data = rte_zmalloc(NULL, sizeof(*comp_data), 0);
+		dpivf->conf.c_desc.compl_ptr[i] = comp_data;
+	};
+	dpivf->conf.c_desc.max_cnt = DPI_MAX_DESC;
+	dpivf->conf.c_desc.head = 0;
+	dpivf->conf.c_desc.tail = 0;
+
+	return 0;
+}
+
+static int
+cnxk_dmadev_start(struct rte_dma_dev *dev)
+{
+	struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
+
+	roc_dpi_queue_start(&dpivf->rdpi);
+
+	return 0;
+}
+
+static int
+cnxk_dmadev_stop(struct rte_dma_dev *dev)
+{
+	struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
+
+	roc_dpi_queue_stop(&dpivf->rdpi);
+
+	return 0;
+}
+
+static int
+cnxk_dmadev_close(struct rte_dma_dev *dev)
+{
+	struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
+
+	roc_dpi_queue_stop(&dpivf->rdpi);
+	roc_dpi_dev_fini(&dpivf->rdpi);
+
+	return 0;
+}
+
+static inline int
+__dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count)
+{
+	uint64_t *ptr = dpi->chunk_base;
+
+	if ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > DPI_MAX_CMD_SIZE) ||
+	    cmds == NULL)
+		return -EINVAL;
+
+	/*
+	 * Normally there is plenty of room in the current buffer for the
+	 * command
+	 */
+	if (dpi->chunk_head + cmd_count < dpi->pool_size_m1) {
+		ptr += dpi->chunk_head;
+		dpi->chunk_head += cmd_count;
+		while (cmd_count--)
+			*ptr++ = *cmds++;
+	} else {
+		int count;
+		uint64_t *new_buff = dpi->chunk_next;
+
+		dpi->chunk_next =
+			(void *)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
+		if (!dpi->chunk_next) {
+			plt_err("Failed to alloc next buffer from NPA");
+			return -ENOMEM;
+		}
+
+		/*
+		 * Figure out how many cmd words will fit in this buffer.
+		 * One location will be needed for the next buffer pointer.
+		 */
+		count = dpi->pool_size_m1 - dpi->chunk_head;
+		ptr += dpi->chunk_head;
+		cmd_count -= count;
+		while (count--)
+			*ptr++ = *cmds++;
+
+		/*
+		 * chunk next ptr is 2 DWORDS
+		 * second DWORD is reserved.
+		 */
+		*ptr++ = (uint64_t)new_buff;
+		*ptr = 0;
+
+		/*
+		 * The current buffer is full and has a link to the next
+		 * buffers. Time to write the rest of the commands into the new
+		 * buffer.
+		 */
+		dpi->chunk_base = new_buff;
+		dpi->chunk_head = cmd_count;
+		ptr = new_buff;
+		while (cmd_count--)
+			*ptr++ = *cmds++;
+
+		/* queue index may be greater than pool size */
+		if (dpi->chunk_head >= dpi->pool_size_m1) {
+			new_buff = dpi->chunk_next;
+			dpi->chunk_next =
+				(void *)roc_npa_aura_op_alloc(dpi->aura_handle,
+							      0);
+			if (!dpi->chunk_next) {
+				plt_err("Failed to alloc next buffer from NPA");
+				return -ENOMEM;
+			}
+			/* Write next buffer address */
+			*ptr = (uint64_t)new_buff;
+			dpi->chunk_base = new_buff;
+			dpi->chunk_head = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int
+cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
+		 rte_iova_t dst, uint32_t length, uint64_t flags)
+{
+	uint64_t cmd[DPI_MAX_CMD_SIZE] = {0};
+	union dpi_instr_hdr_s *header = (union dpi_instr_hdr_s *)&cmd[0];
+	rte_iova_t fptr, lptr;
+	struct cnxk_dpi_vf_s *dpivf = dev_private;
+	struct cnxk_dpi_compl_s *comp_ptr;
+	int num_words = 0;
+	int rc;
+
+	RTE_SET_USED(vchan);
+
+	header->s.xtype = dpivf->conf.direction;
+	header->s.pt = DPI_HDR_PT_ZBW_CA;
+	comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
+	comp_ptr->cdata = DPI_REQ_CDATA;
+	header->s.ptr = (uint64_t)comp_ptr;
+	STRM_INC(dpivf->conf.c_desc);
+
+	/* pvfe should be set for inbound and outbound only */
+	if (header->s.xtype <= 1)
+		header->s.pvfe = 1;
+	num_words += 4;
+
+	header->s.nfst = 1;
+	header->s.nlst = 1;
+	/*
+	 * For inbound case, src pointers are last pointers.
+	 * For all other cases, src pointers are first pointers.
+	 */
+	if (header->s.xtype == DPI_XTYPE_INBOUND) {
+		fptr = dst;
+		lptr = src;
+		header->s.fport = dpivf->conf.dst_port & 0x3;
+		header->s.lport = dpivf->conf.src_port & 0x3;
+	} else {
+		fptr = src;
+		lptr = dst;
+		header->s.fport = dpivf->conf.src_port & 0x3;
+		header->s.lport = dpivf->conf.dst_port & 0x3;
+	}
+
+	cmd[num_words++] = length;
+	cmd[num_words++] = fptr;
+	cmd[num_words++] = length;
+	cmd[num_words++] = lptr;
+
+	rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+	if (!rc) {
+		if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+			rte_wmb();
+			plt_write64(num_words,
+				    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+		}
+		dpivf->num_words = num_words;
+	}
+
+	return rc;
+}
+
+static uint16_t
+cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls,
+		      uint16_t *last_idx, bool *has_error)
+{
+	struct cnxk_dpi_vf_s *dpivf = dev_private;
+	int cnt;
+
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(last_idx);
+	RTE_SET_USED(has_error);
+	for (cnt = 0; cnt < nb_cpls; cnt++) {
+		struct cnxk_dpi_compl_s *comp_ptr =
+			dpivf->conf.c_desc.compl_ptr[cnt];
+
+		if (comp_ptr->cdata)
+			break;
+	}
+
+	dpivf->conf.c_desc.tail = cnt;
+
+	return cnt;
+}
+
+static uint16_t
+cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan,
+			     const uint16_t nb_cpls, uint16_t *last_idx,
+			     enum rte_dma_status_code *status)
+{
+	struct cnxk_dpi_vf_s *dpivf = dev_private;
+	int cnt;
+
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(last_idx);
+	for (cnt = 0; cnt < nb_cpls; cnt++) {
+		struct cnxk_dpi_compl_s *comp_ptr =
+			dpivf->conf.c_desc.compl_ptr[cnt];
+		status[cnt] = comp_ptr->cdata;
+	}
+
+	dpivf->conf.c_desc.tail = 0;
+	return cnt;
+}
+
+static int
+cnxk_dmadev_submit(void *dev_private, uint16_t vchan __rte_unused)
+{
+	struct cnxk_dpi_vf_s *dpivf = dev_private;
+
+	rte_wmb();
+	plt_write64(dpivf->num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+
+	return 0;
+}
+
+static const struct rte_dma_dev_ops cnxk_dmadev_ops = {
+	.dev_info_get = cnxk_dmadev_info_get,
+	.dev_configure = cnxk_dmadev_configure,
+	.dev_start = cnxk_dmadev_start,
+	.dev_stop = cnxk_dmadev_stop,
+	.vchan_setup = cnxk_dmadev_vchan_setup,
+	.dev_close = cnxk_dmadev_close,
+};
+
 static int
 cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		  struct rte_pci_device *pci_dev)
@@ -50,6 +366,12 @@  cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused,
 
 	dmadev->device = &pci_dev->device;
 	dmadev->fp_obj->dev_private = dpivf;
+	dmadev->dev_ops = &cnxk_dmadev_ops;
+
+	dmadev->fp_obj->copy = cnxk_dmadev_copy;
+	dmadev->fp_obj->submit = cnxk_dmadev_submit;
+	dmadev->fp_obj->completed = cnxk_dmadev_completed;
+	dmadev->fp_obj->completed_status = cnxk_dmadev_completed_status;
 
 	rdpi = &dpivf->rdpi;
 
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index 9e0bb7b2ce..ce301a5945 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -4,8 +4,61 @@ 
 #ifndef _CNXK_DMADEV_H_
 #define _CNXK_DMADEV_H_
 
+#define DPI_MAX_POINTER		15
+#define DPI_QUEUE_STOP		0x0
+#define DPI_QUEUE_START		0x1
+#define STRM_INC(s)		((s).tail = ((s).tail + 1) % (s).max_cnt)
+#define DPI_MAX_DESC		DPI_MAX_POINTER
+
+/* DPI Transfer Type, pointer type in DPI_DMA_INSTR_HDR_S[XTYPE] */
+#define DPI_XTYPE_OUTBOUND      (0)
+#define DPI_XTYPE_INBOUND       (1)
+#define DPI_XTYPE_INTERNAL_ONLY (2)
+#define DPI_XTYPE_EXTERNAL_ONLY (3)
+#define DPI_XTYPE_MASK		0x3
+#define DPI_HDR_PT_ZBW_CA	0x0
+#define DPI_HDR_PT_ZBW_NC	0x1
+#define DPI_HDR_PT_WQP		0x2
+#define DPI_HDR_PT_WQP_NOSTATUS	0x0
+#define DPI_HDR_PT_WQP_STATUSCA	0x1
+#define DPI_HDR_PT_WQP_STATUSNC	0x3
+#define DPI_HDR_PT_CNT		0x3
+#define DPI_HDR_PT_MASK		0x3
+#define DPI_W0_TT_MASK		0x3
+#define DPI_W0_GRP_MASK		0x3FF
+
+/* Set Completion data to 0xFF when request submitted,
+ * upon successful request completion engine reset to completion status
+ */
+#define DPI_REQ_CDATA		0xFF
+
+#define DPI_MIN_CMD_SIZE	8
+#define DPI_MAX_CMD_SIZE	64
+
+struct cnxk_dpi_compl_s {
+	uint64_t cdata;
+	void *cb_data;
+};
+
+struct cnxk_dpi_cdesc_data_s {
+	struct cnxk_dpi_compl_s *compl_ptr[DPI_MAX_DESC];
+	uint16_t max_cnt;
+	uint16_t head;
+	uint16_t tail;
+};
+
+struct cnxk_dpi_queue_conf {
+	uint8_t direction;
+	uint8_t src_port;
+	uint8_t dst_port;
+	uint64_t comp_ptr;
+	struct cnxk_dpi_cdesc_data_s c_desc;
+};
+
 struct cnxk_dpi_vf_s {
 	struct roc_dpi rdpi;
+	struct cnxk_dpi_queue_conf conf;
+	uint32_t num_words;
 };
 
 #endif
diff --git a/drivers/dma/cnxk/version.map b/drivers/dma/cnxk/version.map
new file mode 100644
index 0000000000..4a76d1d52d
--- /dev/null
+++ b/drivers/dma/cnxk/version.map
@@ -0,0 +1,3 @@ 
+DPDK_21 {
+	local: *;
+};