[v5,07/12] net/nfp: add flower ctrl VNIC related logics

Message ID 1659681155-16525-8-git-send-email-chaoyong.he@corigine.com (mailing list archive)
State Changes Requested, archived
Delegated to: Andrew Rybchenko
Headers
Series preparation for the rte_flow offload of nfp PMD |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Chaoyong He Aug. 5, 2022, 6:32 a.m. UTC
  This commit adds the setup/start logic for the ctrl vNIC. This vNIC
is used by the PMD and flower firmware as a communication channel
between driver and firmware. In the case of OVS it is also used to
communicate flow statistics from hardware to the driver.

A rte_eth device is not exposed to DPDK for this vNIC as it is strictly
used internally by flower logic. Rx and Tx logic will be added later for
this vNIC.

Signed-off-by: Chaoyong He <chaoyong.he@corigine.com>
Reviewed-by: Niklas Söderlund <niklas.soderlund@corigine.com>
---
 drivers/net/nfp/flower/nfp_flower.c | 385 +++++++++++++++++++++++++++++++++++-
 drivers/net/nfp/flower/nfp_flower.h |   6 +
 2 files changed, 388 insertions(+), 3 deletions(-)
  

Comments

Andrew Rybchenko Aug. 5, 2022, 1:05 p.m. UTC | #1
On 8/5/22 09:32, Chaoyong He wrote:
> This commit adds the setup/start logic for the ctrl vNIC. This vNIC

"This commit adds" -> "Add"

> is used by the PMD and flower firmware as a communication channel
> between driver and firmware. In the case of OVS it is also used to
> communicate flow statistics from hardware to the driver.
> 
> A rte_eth device is not exposed to DPDK for this vNIC as it is strictly
> used internally by flower logic. Rx and Tx logic will be added later for
> this vNIC.
> 
> Signed-off-by: Chaoyong He <chaoyong.he@corigine.com>
> Reviewed-by: Niklas Söderlund <niklas.soderlund@corigine.com>
> ---
>   drivers/net/nfp/flower/nfp_flower.c | 385 +++++++++++++++++++++++++++++++++++-
>   drivers/net/nfp/flower/nfp_flower.h |   6 +
>   2 files changed, 388 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/nfp/flower/nfp_flower.c b/drivers/net/nfp/flower/nfp_flower.c
> index 2498020..51df504 100644
> --- a/drivers/net/nfp/flower/nfp_flower.c
> +++ b/drivers/net/nfp/flower/nfp_flower.c
> @@ -26,6 +26,10 @@
>   #define MEMPOOL_CACHE_SIZE 512
>   #define DEFAULT_FLBUF_SIZE 9216
>   
> +#define CTRL_VNIC_NB_DESC 64
> +#define CTRL_VNIC_RX_FREE_THRESH 32
> +#define CTRL_VNIC_TX_FREE_THRESH 32
> +
>   /*
>    * Simple dev ops functions for the flower PF. Because a rte_device is exposed
>    * to DPDK the flower logic also makes use of helper functions like
> @@ -543,6 +547,302 @@
>   	return ret;
>   }
>   
> +static void
> +nfp_flower_cleanup_ctrl_vnic(struct nfp_net_hw *hw)
> +{
> +	uint32_t i;
> +	struct nfp_net_rxq *rxq;
> +	struct nfp_net_txq *txq;
> +	struct rte_eth_dev *eth_dev;
> +
> +	eth_dev = hw->eth_dev;
> +
> +	for (i = 0; i < hw->max_tx_queues; i++) {
> +		txq = eth_dev->data->tx_queues[i];
> +		if (txq) {

Compare vs NULL as you do in other cases and as DPDK coding style
recommends.

> +			rte_free(txq->txbufs);
> +			rte_eth_dma_zone_free(eth_dev, "ctrl_tx_ring", i);
> +			rte_free(txq);
> +		}
> +	}
> +
> +	for (i = 0; i < hw->max_rx_queues; i++) {
> +		rxq = eth_dev->data->rx_queues[i];
> +		if (rxq) {

Compare vs NULL

> +			rte_free(rxq->rxbufs);
> +			rte_eth_dma_zone_free(eth_dev, "ctrl_rx_ring", i);
> +			rte_free(rxq);
> +		}
> +	}
> +
> +	rte_free(eth_dev->data->tx_queues);
> +	rte_free(eth_dev->data->rx_queues);
> +	rte_free(eth_dev->data);
> +	rte_free(eth_dev);
> +}
> +
> +static int
> +nfp_flower_init_ctrl_vnic(struct nfp_net_hw *hw)
> +{
> +	uint32_t i;
> +	int ret = 0;
> +	uint16_t nb_desc;
> +	unsigned int numa_node;
> +	struct rte_mempool *mp;
> +	uint16_t rx_free_thresh;
> +	uint16_t tx_free_thresh;
> +	struct nfp_net_rxq *rxq;
> +	struct nfp_net_txq *txq;
> +	struct nfp_pf_dev *pf_dev;
> +	struct rte_eth_dev *eth_dev;
> +	const struct rte_memzone *tz;
> +	struct nfp_app_flower *app_flower;
> +
> +	/* Hardcoded values for now */
> +	nb_desc = CTRL_VNIC_NB_DESC;
> +	rx_free_thresh = CTRL_VNIC_RX_FREE_THRESH;

What's the point to introduce the variable and use it only
once below?

> +	tx_free_thresh = CTRL_VNIC_TX_FREE_THRESH;

Same here.

> +	numa_node = rte_socket_id();
> +
> +	/* Set up some pointers here for ease of use */
> +	pf_dev = hw->pf_dev;
> +	app_flower = NFP_APP_PRIV_TO_APP_FLOWER(pf_dev->app_priv);
> +
> +	ret = nfp_flower_init_vnic_common(hw, "ctrl_vnic");
> +	if (ret)

Compare vs 0

> +		goto done;
> +
> +	/* Allocate memory for the eth_dev of the vNIC */
> +	hw->eth_dev = rte_zmalloc("ctrl_vnic_eth_dev",

Why not rte_eth_dev_allocate()? Isn't an ethdev?
Why do you bypsss ethdev layer in this case completely and do
everything yourself?

> +		sizeof(struct rte_eth_dev), RTE_CACHE_LINE_SIZE);
> +	if (hw->eth_dev == NULL) {
> +		ret = -ENOMEM;
> +		goto done;
> +	}
> +
> +	/* Grab the pointer to the newly created rte_eth_dev here */
> +	eth_dev = hw->eth_dev;
> +
> +	/* Also allocate memory for the data part of the eth_dev */
> +	eth_dev->data = rte_zmalloc("ctrl_vnic_eth_dev_data",
> +		sizeof(struct rte_eth_dev_data), RTE_CACHE_LINE_SIZE);
> +	if (eth_dev->data == NULL) {
> +		ret = -ENOMEM;
> +		goto eth_dev_cleanup;
> +	}
> +
> +	eth_dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues",
> +		sizeof(eth_dev->data->rx_queues[0]) * hw->max_rx_queues,
> +		RTE_CACHE_LINE_SIZE);
> +	if (eth_dev->data->rx_queues == NULL) {
> +		PMD_INIT_LOG(ERR, "rte_zmalloc failed for ctrl vnic rx queues");
> +		ret = -ENOMEM;
> +		goto dev_data_cleanup;
> +	}
> +
> +	eth_dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues",
> +		sizeof(eth_dev->data->tx_queues[0]) * hw->max_tx_queues,
> +		RTE_CACHE_LINE_SIZE);
> +	if (eth_dev->data->tx_queues == NULL) {
> +		PMD_INIT_LOG(ERR, "rte_zmalloc failed for ctrl vnic tx queues");
> +		ret = -ENOMEM;
> +		goto rx_queue_cleanup;
> +	}
> +
> +	eth_dev->device = &pf_dev->pci_dev->device;
> +	eth_dev->data->nb_tx_queues = hw->max_tx_queues;
> +	eth_dev->data->nb_rx_queues = hw->max_rx_queues;
> +	eth_dev->data->dev_private = hw;
> +
> +	/* Create a mbuf pool for the vNIC */
> +	app_flower->ctrl_pktmbuf_pool = rte_pktmbuf_pool_create("ctrl_mbuf_pool",
> +		4 * nb_desc, 64, 0, 9216, numa_node);
> +	if (app_flower->ctrl_pktmbuf_pool == NULL) {
> +		PMD_INIT_LOG(ERR, "create mbuf pool for ctrl vnic failed");
> +		ret = -ENOMEM;
> +		goto tx_queue_cleanup;
> +	}
> +
> +	mp = app_flower->ctrl_pktmbuf_pool;
> +
> +	/* Set up the Rx queues */
> +	PMD_INIT_LOG(INFO, "Configuring flower ctrl vNIC Rx queue");
> +	for (i = 0; i < hw->max_rx_queues; i++) {
> +		/* Hardcoded number of desc to 64 */
> +		rxq = rte_zmalloc_socket("ethdev RX queue",
> +			sizeof(struct nfp_net_rxq), RTE_CACHE_LINE_SIZE,
> +			numa_node);
> +		if (rxq == NULL) {
> +			PMD_DRV_LOG(ERR, "Error allocating rxq");
> +			ret = -ENOMEM;
> +			goto rx_queue_setup_cleanup;
> +		}
> +
> +		eth_dev->data->rx_queues[i] = rxq;
> +
> +		/* Hw queues mapping based on firmware configuration */
> +		rxq->qidx = i;
> +		rxq->fl_qcidx = i * hw->stride_rx;
> +		rxq->rx_qcidx = rxq->fl_qcidx + (hw->stride_rx - 1);
> +		rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx);
> +		rxq->qcp_rx = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->rx_qcidx);
> +
> +		/*
> +		 * Tracking mbuf size for detecting a potential mbuf overflow due to
> +		 * RX offset
> +		 */
> +		rxq->mem_pool = mp;
> +		rxq->mbuf_size = rxq->mem_pool->elt_size;
> +		rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
> +		hw->flbufsz = rxq->mbuf_size;
> +
> +		rxq->rx_count = nb_desc;
> +		rxq->rx_free_thresh = rx_free_thresh;
> +		rxq->drop_en = 1;
> +
> +		/*
> +		 * Allocate RX ring hardware descriptors. A memzone large enough to
> +		 * handle the maximum ring size is allocated in order to allow for
> +		 * resizing in later calls to the queue setup function.
> +		 */
> +		tz = rte_eth_dma_zone_reserve(eth_dev, "ctrl_rx_ring", i,
> +			sizeof(struct nfp_net_rx_desc) * NFP_NET_MAX_RX_DESC,
> +			NFP_MEMZONE_ALIGN, numa_node);
> +		if (tz == NULL) {
> +			PMD_DRV_LOG(ERR, "Error allocating rx dma");
> +			rte_free(rxq);
> +			ret = -ENOMEM;
> +			goto rx_queue_setup_cleanup;
> +		}
> +
> +		/* Saving physical and virtual addresses for the RX ring */
> +		rxq->dma = (uint64_t)tz->iova;
> +		rxq->rxds = (struct nfp_net_rx_desc *)tz->addr;
> +
> +		/* mbuf pointers array for referencing mbufs linked to RX descriptors */
> +		rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs",
> +			sizeof(*rxq->rxbufs) * nb_desc, RTE_CACHE_LINE_SIZE,
> +			numa_node);
> +		if (rxq->rxbufs == NULL) {
> +			rte_eth_dma_zone_free(eth_dev, "ctrl_rx_ring", i);
> +			rte_free(rxq);
> +			ret = -ENOMEM;
> +			goto rx_queue_setup_cleanup;
> +		}
> +
> +		nfp_net_reset_rx_queue(rxq);
> +
> +		rxq->hw = hw;
> +
> +		/*
> +		 * Telling the HW about the physical address of the RX ring and number
> +		 * of descriptors in log2 format
> +		 */
> +		nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(i), rxq->dma);
> +		nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(i), rte_log2_u32(nb_desc));
> +	}
> +
> +	/* Now the Tx queues */
> +	PMD_INIT_LOG(INFO, "Configuring flower ctrl vNIC Tx queue");
> +	for (i = 0; i < hw->max_tx_queues; i++) {
> +		/* Hardcoded number of desc to 64 */
> +		/* Allocating tx queue data structure */
> +		txq = rte_zmalloc_socket("ethdev TX queue",
> +			sizeof(struct nfp_net_txq), RTE_CACHE_LINE_SIZE,
> +			numa_node);
> +		if (txq == NULL) {
> +			PMD_DRV_LOG(ERR, "Error allocating txq");
> +			ret = -ENOMEM;
> +			goto tx_queue_setup_cleanup;
> +		}
> +
> +		eth_dev->data->tx_queues[i] = txq;
> +
> +		/*
> +		 * Allocate TX ring hardware descriptors. A memzone large enough to
> +		 * handle the maximum ring size is allocated in order to allow for
> +		 * resizing in later calls to the queue setup function.
> +		 */
> +		tz = rte_eth_dma_zone_reserve(eth_dev, "ctrl_tx_ring", i,
> +			sizeof(struct nfp_net_nfd3_tx_desc) * NFP_NET_MAX_TX_DESC,
> +			NFP_MEMZONE_ALIGN, numa_node);
> +		if (tz == NULL) {
> +			PMD_DRV_LOG(ERR, "Error allocating tx dma");
> +			rte_free(txq);
> +			ret = -ENOMEM;
> +			goto tx_queue_setup_cleanup;
> +		}
> +
> +		txq->tx_count = nb_desc;
> +		txq->tx_free_thresh = tx_free_thresh;
> +		txq->tx_pthresh = DEFAULT_TX_PTHRESH;
> +		txq->tx_hthresh = DEFAULT_TX_HTHRESH;
> +		txq->tx_wthresh = DEFAULT_TX_WTHRESH;
> +
> +		/* queue mapping based on firmware configuration */
> +		txq->qidx = i;
> +		txq->tx_qcidx = i * hw->stride_tx;
> +		txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq->tx_qcidx);
> +
> +		/* Saving physical and virtual addresses for the TX ring */
> +		txq->dma = (uint64_t)tz->iova;
> +		txq->txds = (struct nfp_net_nfd3_tx_desc *)tz->addr;
> +
> +		/* mbuf pointers array for referencing mbufs linked to TX descriptors */
> +		txq->txbufs = rte_zmalloc_socket("txq->txbufs",
> +			sizeof(*txq->txbufs) * nb_desc, RTE_CACHE_LINE_SIZE,
> +			numa_node);
> +		if (txq->txbufs == NULL) {
> +			rte_eth_dma_zone_free(eth_dev, "ctrl_tx_ring", i);
> +			rte_free(txq);
> +			ret = -ENOMEM;
> +			goto tx_queue_setup_cleanup;
> +		}
> +
> +		nfp_net_reset_tx_queue(txq);
> +
> +		txq->hw = hw;
> +
> +		/*
> +		 * Telling the HW about the physical address of the TX ring and number
> +		 * of descriptors in log2 format
> +		 */
> +		nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(i), txq->dma);
> +		nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(i), rte_log2_u32(nb_desc));
> +	}
> +
> +	return 0;
> +
> +tx_queue_setup_cleanup:
> +	for (i = 0; i < hw->max_tx_queues; i++) {
> +		txq = eth_dev->data->tx_queues[i];
> +		if (txq) {

Compare vs NULL

> +			rte_free(txq->txbufs);
> +			rte_eth_dma_zone_free(eth_dev, "ctrl_tx_ring", i);
> +			rte_free(txq);
> +		}
> +	}
> +rx_queue_setup_cleanup:
> +	for (i = 0; i < hw->max_rx_queues; i++) {
> +		rxq = eth_dev->data->rx_queues[i];
> +		if (rxq) {

Compare vs NULL

> +			rte_free(rxq->rxbufs);
> +			rte_eth_dma_zone_free(eth_dev, "ctrl_rx_ring", i);
> +			rte_free(rxq);
> +		}
> +	}
> +tx_queue_cleanup:
> +	rte_free(eth_dev->data->tx_queues);
> +rx_queue_cleanup:
> +	rte_free(eth_dev->data->rx_queues);
> +dev_data_cleanup:
> +	rte_free(eth_dev->data);
> +eth_dev_cleanup:
> +	rte_free(eth_dev);
> +done:
> +	return ret;
> +}
> +
>   static int
>   nfp_flower_start_pf_vnic(struct nfp_net_hw *hw)
>   {
> @@ -561,12 +861,57 @@
>   	return 0;
>   }
>   
> +static int
> +nfp_flower_start_ctrl_vnic(struct nfp_net_hw *hw)
> +{
> +	int ret;
> +	uint32_t update;
> +	uint32_t new_ctrl;
> +	struct rte_eth_dev *dev;
> +
> +	dev = hw->eth_dev;
> +
> +	/* Disabling queues just in case... */
> +	nfp_net_disable_queues(dev);
> +
> +	/* Enabling the required queues in the device */
> +	nfp_net_enable_queues(dev);
> +
> +	/* Writing configuration parameters in the device */
> +	nfp_net_params_setup(hw);
> +
> +	new_ctrl = NFP_NET_CFG_CTRL_ENABLE;
> +	update = NFP_NET_CFG_UPDATE_GEN | NFP_NET_CFG_UPDATE_RING |
> +		 NFP_NET_CFG_UPDATE_MSIX;
> +
> +	rte_wmb();
> +
> +	/* If an error when reconfig we avoid to change hw state */
> +	ret = nfp_net_reconfig(hw, new_ctrl, update);
> +	if (ret) {

Compare vs 0

> +		PMD_INIT_LOG(ERR, "Failed to reconfig ctrl vnic");
> +		return -EIO;
> +	}
> +
> +	hw->ctrl = new_ctrl;
> +
> +	/* Setup the freelist ring */
> +	ret = nfp_net_rx_freelist_setup(dev);
> +	if (ret) {

Compare vs 0

> +		PMD_INIT_LOG(ERR, "Error with flower ctrl vNIC freelist setup");
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
>   int
>   nfp_init_app_flower(struct nfp_pf_dev *pf_dev)
>   {
>   	int ret;
>   	unsigned int numa_node;
>   	struct nfp_net_hw *pf_hw;
> +	struct nfp_net_hw *ctrl_hw;
>   	struct nfp_app_flower *app_flower;
>   
>   	numa_node = rte_socket_id();
> @@ -612,29 +957,63 @@
>   	pf_hw->pf_dev = pf_dev;
>   	pf_hw->cpp = pf_dev->cpp;
>   
> +	/* The ctrl vNIC struct comes directly after the PF one */
> +	app_flower->ctrl_hw = pf_hw + 1;
> +	ctrl_hw = app_flower->ctrl_hw;
> +
> +	/* Map the ctrl vNIC ctrl bar */
> +	ctrl_hw->ctrl_bar = nfp_rtsym_map(pf_dev->sym_tbl, "_pf0_net_ctrl_bar",
> +		32768, &ctrl_hw->ctrl_area);
> +	if (ctrl_hw->ctrl_bar == NULL) {
> +		PMD_INIT_LOG(ERR, "Cloud not map the ctrl vNIC ctrl bar");
> +		ret = -ENODEV;
> +		goto pf_cpp_area_cleanup;
> +	}
> +
> +	/* Now populate the ctrl vNIC */
> +	ctrl_hw->pf_dev = pf_dev;
> +	ctrl_hw->cpp = pf_dev->cpp;
> +
>   	ret = nfp_flower_init_pf_vnic(app_flower->pf_hw);
>   	if (ret) {
>   		PMD_INIT_LOG(ERR, "Could not initialize flower PF vNIC");
> -		goto pf_cpp_area_cleanup;
> +		goto ctrl_cpp_area_cleanup;
> +	}
> +
> +	ret = nfp_flower_init_ctrl_vnic(app_flower->ctrl_hw);
> +	if (ret) {

Compare vs 0

> +		PMD_INIT_LOG(ERR, "Could not initialize flower ctrl vNIC");
> +		goto pf_vnic_cleanup;
>   	}
>   
>   	/* Start the PF vNIC */
>   	ret = nfp_flower_start_pf_vnic(app_flower->pf_hw);
>   	if (ret) {
>   		PMD_INIT_LOG(ERR, "Could not start flower PF vNIC");
> -		goto pf_vnic_cleanup;
> +		goto ctrl_vnic_cleanup;
> +	}
> +
> +	/* Start the ctrl vNIC */
> +	ret = nfp_flower_start_ctrl_vnic(app_flower->ctrl_hw);
> +	if (ret) {

Compare vs 0

> +		PMD_INIT_LOG(ERR, "Could not start flower ctrl vNIC");
> +		goto ctrl_vnic_cleanup;
>   	}
>   
>   	/* Start up flower services */
>   	if (nfp_flower_enable_services(app_flower)) {
>   		ret = -ESRCH;
> -		goto pf_vnic_cleanup;
> +		goto ctrl_vnic_cleanup;
>   	}
>   
>   	return 0;
>   
> +ctrl_vnic_cleanup:
> +	nfp_flower_cleanup_ctrl_vnic(app_flower->ctrl_hw);
>   pf_vnic_cleanup:
>   	nfp_flower_cleanup_pf_vnic(app_flower->pf_hw);
> +ctrl_cpp_area_cleanup:
> +	nfp_cpp_area_free(ctrl_hw->ctrl_area);
>   pf_cpp_area_cleanup:
>   	nfp_cpp_area_free(pf_dev->ctrl_area);
>   eth_tbl_cleanup:
> diff --git a/drivers/net/nfp/flower/nfp_flower.h b/drivers/net/nfp/flower/nfp_flower.h
> index f6fd4eb..f11ef6d 100644
> --- a/drivers/net/nfp/flower/nfp_flower.h
> +++ b/drivers/net/nfp/flower/nfp_flower.h
> @@ -21,6 +21,12 @@ struct nfp_app_flower {
>   	/* Pointer to the PF vNIC */
>   	struct nfp_net_hw *pf_hw;
>   
> +	/* Pointer to a mempool for the ctrlvNIC */
> +	struct rte_mempool *ctrl_pktmbuf_pool;
> +
> +	/* Pointer to the ctrl vNIC */
> +	struct nfp_net_hw *ctrl_hw;
> +
>   	/* the eth table as reported by firmware */
>   	struct nfp_eth_table *nfp_eth_table;
>   };
  
Chaoyong He Aug. 8, 2022, 11:32 a.m. UTC | #2
> On 8/5/22 09:32, Chaoyong He wrote:
> > This commit adds the setup/start logic for the ctrl vNIC. This vNIC
> 
> "This commit adds" -> "Add"
> 
> > is used by the PMD and flower firmware as a communication channel
> > between driver and firmware. In the case of OVS it is also used to
> > communicate flow statistics from hardware to the driver.
> >
> > A rte_eth device is not exposed to DPDK for this vNIC as it is
> > strictly used internally by flower logic. Rx and Tx logic will be
> > added later for this vNIC.
> >
> > Signed-off-by: Chaoyong He <chaoyong.he@corigine.com>
> > Reviewed-by: Niklas Söderlund <niklas.soderlund@corigine.com>
> > ---
> >   drivers/net/nfp/flower/nfp_flower.c | 385
> +++++++++++++++++++++++++++++++++++-
> >   drivers/net/nfp/flower/nfp_flower.h |   6 +
> >   2 files changed, 388 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/net/nfp/flower/nfp_flower.c
> > b/drivers/net/nfp/flower/nfp_flower.c
> > index 2498020..51df504 100644
> > --- a/drivers/net/nfp/flower/nfp_flower.c
> > +++ b/drivers/net/nfp/flower/nfp_flower.c
> > @@ -26,6 +26,10 @@
> >   #define MEMPOOL_CACHE_SIZE 512
> >   #define DEFAULT_FLBUF_SIZE 9216
> >
> > +#define CTRL_VNIC_NB_DESC 64
> > +#define CTRL_VNIC_RX_FREE_THRESH 32
> > +#define CTRL_VNIC_TX_FREE_THRESH 32
> > +
> >   /*
> >    * Simple dev ops functions for the flower PF. Because a rte_device is
> exposed
> >    * to DPDK the flower logic also makes use of helper functions like
> > @@ -543,6 +547,302 @@
> >   	return ret;
> >   }
> >
> > +static void
> > +nfp_flower_cleanup_ctrl_vnic(struct nfp_net_hw *hw) {
> > +	uint32_t i;
> > +	struct nfp_net_rxq *rxq;
> > +	struct nfp_net_txq *txq;
> > +	struct rte_eth_dev *eth_dev;
> > +
> > +	eth_dev = hw->eth_dev;
> > +
> > +	for (i = 0; i < hw->max_tx_queues; i++) {
> > +		txq = eth_dev->data->tx_queues[i];
> > +		if (txq) {
> 
> Compare vs NULL as you do in other cases and as DPDK coding style
> recommends.
> 
> > +			rte_free(txq->txbufs);
> > +			rte_eth_dma_zone_free(eth_dev, "ctrl_tx_ring", i);
> > +			rte_free(txq);
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < hw->max_rx_queues; i++) {
> > +		rxq = eth_dev->data->rx_queues[i];
> > +		if (rxq) {
> 
> Compare vs NULL
> 
> > +			rte_free(rxq->rxbufs);
> > +			rte_eth_dma_zone_free(eth_dev, "ctrl_rx_ring", i);
> > +			rte_free(rxq);
> > +		}
> > +	}
> > +
> > +	rte_free(eth_dev->data->tx_queues);
> > +	rte_free(eth_dev->data->rx_queues);
> > +	rte_free(eth_dev->data);
> > +	rte_free(eth_dev);
> > +}
> > +
> > +static int
> > +nfp_flower_init_ctrl_vnic(struct nfp_net_hw *hw) {
> > +	uint32_t i;
> > +	int ret = 0;
> > +	uint16_t nb_desc;
> > +	unsigned int numa_node;
> > +	struct rte_mempool *mp;
> > +	uint16_t rx_free_thresh;
> > +	uint16_t tx_free_thresh;
> > +	struct nfp_net_rxq *rxq;
> > +	struct nfp_net_txq *txq;
> > +	struct nfp_pf_dev *pf_dev;
> > +	struct rte_eth_dev *eth_dev;
> > +	const struct rte_memzone *tz;
> > +	struct nfp_app_flower *app_flower;
> > +
> > +	/* Hardcoded values for now */
> > +	nb_desc = CTRL_VNIC_NB_DESC;
> > +	rx_free_thresh = CTRL_VNIC_RX_FREE_THRESH;
> 
> What's the point to introduce the variable and use it only once below?
> 
> > +	tx_free_thresh = CTRL_VNIC_TX_FREE_THRESH;
> 
> Same here.
> 
> > +	numa_node = rte_socket_id();
> > +
> > +	/* Set up some pointers here for ease of use */
> > +	pf_dev = hw->pf_dev;
> > +	app_flower = NFP_APP_PRIV_TO_APP_FLOWER(pf_dev->app_priv);
> > +
> > +	ret = nfp_flower_init_vnic_common(hw, "ctrl_vnic");
> > +	if (ret)
> 
> Compare vs 0
> 
> > +		goto done;
> > +
> > +	/* Allocate memory for the eth_dev of the vNIC */
> > +	hw->eth_dev = rte_zmalloc("ctrl_vnic_eth_dev",
> 
> Why not rte_eth_dev_allocate()? Isn't an ethdev?
> Why do you bypsss ethdev layer in this case completely and do everything
> yourself?

Here we created an ethdev locally to nfp PMD, we want the user totally won't be aware of it.
If we use rte_eth_dev_allocate() to create it, it will be in array 'rte_ethdev_devices[]', that's not we want.

> 
> > +		sizeof(struct rte_eth_dev), RTE_CACHE_LINE_SIZE);
> > +	if (hw->eth_dev == NULL) {
> > +		ret = -ENOMEM;
> > +		goto done;
> > +	}
> > +
> > +	/* Grab the pointer to the newly created rte_eth_dev here */
> > +	eth_dev = hw->eth_dev;
> > +
> > +	/* Also allocate memory for the data part of the eth_dev */
> > +	eth_dev->data = rte_zmalloc("ctrl_vnic_eth_dev_data",
> > +		sizeof(struct rte_eth_dev_data), RTE_CACHE_LINE_SIZE);
> > +	if (eth_dev->data == NULL) {
> > +		ret = -ENOMEM;
> > +		goto eth_dev_cleanup;
> > +	}
> > +
> > +	eth_dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues",
> > +		sizeof(eth_dev->data->rx_queues[0]) * hw-
> >max_rx_queues,
> > +		RTE_CACHE_LINE_SIZE);
> > +	if (eth_dev->data->rx_queues == NULL) {
> > +		PMD_INIT_LOG(ERR, "rte_zmalloc failed for ctrl vnic rx
> queues");
> > +		ret = -ENOMEM;
> > +		goto dev_data_cleanup;
> > +	}
> > +
> > +	eth_dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues",
> > +		sizeof(eth_dev->data->tx_queues[0]) * hw-
> >max_tx_queues,
> > +		RTE_CACHE_LINE_SIZE);
> > +	if (eth_dev->data->tx_queues == NULL) {
> > +		PMD_INIT_LOG(ERR, "rte_zmalloc failed for ctrl vnic tx
> queues");
> > +		ret = -ENOMEM;
> > +		goto rx_queue_cleanup;
> > +	}
> > +
> > +	eth_dev->device = &pf_dev->pci_dev->device;
> > +	eth_dev->data->nb_tx_queues = hw->max_tx_queues;
> > +	eth_dev->data->nb_rx_queues = hw->max_rx_queues;
> > +	eth_dev->data->dev_private = hw;
> > +
> > +	/* Create a mbuf pool for the vNIC */
> > +	app_flower->ctrl_pktmbuf_pool =
> rte_pktmbuf_pool_create("ctrl_mbuf_pool",
> > +		4 * nb_desc, 64, 0, 9216, numa_node);
> > +	if (app_flower->ctrl_pktmbuf_pool == NULL) {
> > +		PMD_INIT_LOG(ERR, "create mbuf pool for ctrl vnic failed");
> > +		ret = -ENOMEM;
> > +		goto tx_queue_cleanup;
> > +	}
> > +
> > +	mp = app_flower->ctrl_pktmbuf_pool;
> > +
> > +	/* Set up the Rx queues */
> > +	PMD_INIT_LOG(INFO, "Configuring flower ctrl vNIC Rx queue");
> > +	for (i = 0; i < hw->max_rx_queues; i++) {
> > +		/* Hardcoded number of desc to 64 */
> > +		rxq = rte_zmalloc_socket("ethdev RX queue",
> > +			sizeof(struct nfp_net_rxq), RTE_CACHE_LINE_SIZE,
> > +			numa_node);
> > +		if (rxq == NULL) {
> > +			PMD_DRV_LOG(ERR, "Error allocating rxq");
> > +			ret = -ENOMEM;
> > +			goto rx_queue_setup_cleanup;
> > +		}
> > +
> > +		eth_dev->data->rx_queues[i] = rxq;
> > +
> > +		/* Hw queues mapping based on firmware configuration */
> > +		rxq->qidx = i;
> > +		rxq->fl_qcidx = i * hw->stride_rx;
> > +		rxq->rx_qcidx = rxq->fl_qcidx + (hw->stride_rx - 1);
> > +		rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq-
> >fl_qcidx);
> > +		rxq->qcp_rx = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq-
> >rx_qcidx);
> > +
> > +		/*
> > +		 * Tracking mbuf size for detecting a potential mbuf overflow
> due to
> > +		 * RX offset
> > +		 */
> > +		rxq->mem_pool = mp;
> > +		rxq->mbuf_size = rxq->mem_pool->elt_size;
> > +		rxq->mbuf_size -= (sizeof(struct rte_mbuf) +
> RTE_PKTMBUF_HEADROOM);
> > +		hw->flbufsz = rxq->mbuf_size;
> > +
> > +		rxq->rx_count = nb_desc;
> > +		rxq->rx_free_thresh = rx_free_thresh;
> > +		rxq->drop_en = 1;
> > +
> > +		/*
> > +		 * Allocate RX ring hardware descriptors. A memzone large
> enough to
> > +		 * handle the maximum ring size is allocated in order to allow
> for
> > +		 * resizing in later calls to the queue setup function.
> > +		 */
> > +		tz = rte_eth_dma_zone_reserve(eth_dev, "ctrl_rx_ring", i,
> > +			sizeof(struct nfp_net_rx_desc) *
> NFP_NET_MAX_RX_DESC,
> > +			NFP_MEMZONE_ALIGN, numa_node);
> > +		if (tz == NULL) {
> > +			PMD_DRV_LOG(ERR, "Error allocating rx dma");
> > +			rte_free(rxq);
> > +			ret = -ENOMEM;
> > +			goto rx_queue_setup_cleanup;
> > +		}
> > +
> > +		/* Saving physical and virtual addresses for the RX ring */
> > +		rxq->dma = (uint64_t)tz->iova;
> > +		rxq->rxds = (struct nfp_net_rx_desc *)tz->addr;
> > +
> > +		/* mbuf pointers array for referencing mbufs linked to RX
> descriptors */
> > +		rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs",
> > +			sizeof(*rxq->rxbufs) * nb_desc,
> RTE_CACHE_LINE_SIZE,
> > +			numa_node);
> > +		if (rxq->rxbufs == NULL) {
> > +			rte_eth_dma_zone_free(eth_dev, "ctrl_rx_ring", i);
> > +			rte_free(rxq);
> > +			ret = -ENOMEM;
> > +			goto rx_queue_setup_cleanup;
> > +		}
> > +
> > +		nfp_net_reset_rx_queue(rxq);
> > +
> > +		rxq->hw = hw;
> > +
> > +		/*
> > +		 * Telling the HW about the physical address of the RX ring
> and number
> > +		 * of descriptors in log2 format
> > +		 */
> > +		nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(i), rxq->dma);
> > +		nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(i),
> rte_log2_u32(nb_desc));
> > +	}
> > +
> > +	/* Now the Tx queues */
> > +	PMD_INIT_LOG(INFO, "Configuring flower ctrl vNIC Tx queue");
> > +	for (i = 0; i < hw->max_tx_queues; i++) {
> > +		/* Hardcoded number of desc to 64 */
> > +		/* Allocating tx queue data structure */
> > +		txq = rte_zmalloc_socket("ethdev TX queue",
> > +			sizeof(struct nfp_net_txq), RTE_CACHE_LINE_SIZE,
> > +			numa_node);
> > +		if (txq == NULL) {
> > +			PMD_DRV_LOG(ERR, "Error allocating txq");
> > +			ret = -ENOMEM;
> > +			goto tx_queue_setup_cleanup;
> > +		}
> > +
> > +		eth_dev->data->tx_queues[i] = txq;
> > +
> > +		/*
> > +		 * Allocate TX ring hardware descriptors. A memzone large
> enough to
> > +		 * handle the maximum ring size is allocated in order to allow
> for
> > +		 * resizing in later calls to the queue setup function.
> > +		 */
> > +		tz = rte_eth_dma_zone_reserve(eth_dev, "ctrl_tx_ring", i,
> > +			sizeof(struct nfp_net_nfd3_tx_desc) *
> NFP_NET_MAX_TX_DESC,
> > +			NFP_MEMZONE_ALIGN, numa_node);
> > +		if (tz == NULL) {
> > +			PMD_DRV_LOG(ERR, "Error allocating tx dma");
> > +			rte_free(txq);
> > +			ret = -ENOMEM;
> > +			goto tx_queue_setup_cleanup;
> > +		}
> > +
> > +		txq->tx_count = nb_desc;
> > +		txq->tx_free_thresh = tx_free_thresh;
> > +		txq->tx_pthresh = DEFAULT_TX_PTHRESH;
> > +		txq->tx_hthresh = DEFAULT_TX_HTHRESH;
> > +		txq->tx_wthresh = DEFAULT_TX_WTHRESH;
> > +
> > +		/* queue mapping based on firmware configuration */
> > +		txq->qidx = i;
> > +		txq->tx_qcidx = i * hw->stride_tx;
> > +		txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq-
> >tx_qcidx);
> > +
> > +		/* Saving physical and virtual addresses for the TX ring */
> > +		txq->dma = (uint64_t)tz->iova;
> > +		txq->txds = (struct nfp_net_nfd3_tx_desc *)tz->addr;
> > +
> > +		/* mbuf pointers array for referencing mbufs linked to TX
> descriptors */
> > +		txq->txbufs = rte_zmalloc_socket("txq->txbufs",
> > +			sizeof(*txq->txbufs) * nb_desc,
> RTE_CACHE_LINE_SIZE,
> > +			numa_node);
> > +		if (txq->txbufs == NULL) {
> > +			rte_eth_dma_zone_free(eth_dev, "ctrl_tx_ring", i);
> > +			rte_free(txq);
> > +			ret = -ENOMEM;
> > +			goto tx_queue_setup_cleanup;
> > +		}
> > +
> > +		nfp_net_reset_tx_queue(txq);
> > +
> > +		txq->hw = hw;
> > +
> > +		/*
> > +		 * Telling the HW about the physical address of the TX ring
> and number
> > +		 * of descriptors in log2 format
> > +		 */
> > +		nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(i), txq->dma);
> > +		nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(i),
> rte_log2_u32(nb_desc));
> > +	}
> > +
> > +	return 0;
> > +
> > +tx_queue_setup_cleanup:
> > +	for (i = 0; i < hw->max_tx_queues; i++) {
> > +		txq = eth_dev->data->tx_queues[i];
> > +		if (txq) {
> 
> Compare vs NULL
> 
> > +			rte_free(txq->txbufs);
> > +			rte_eth_dma_zone_free(eth_dev, "ctrl_tx_ring", i);
> > +			rte_free(txq);
> > +		}
> > +	}
> > +rx_queue_setup_cleanup:
> > +	for (i = 0; i < hw->max_rx_queues; i++) {
> > +		rxq = eth_dev->data->rx_queues[i];
> > +		if (rxq) {
> 
> Compare vs NULL
> 
> > +			rte_free(rxq->rxbufs);
> > +			rte_eth_dma_zone_free(eth_dev, "ctrl_rx_ring", i);
> > +			rte_free(rxq);
> > +		}
> > +	}
> > +tx_queue_cleanup:
> > +	rte_free(eth_dev->data->tx_queues);
> > +rx_queue_cleanup:
> > +	rte_free(eth_dev->data->rx_queues);
> > +dev_data_cleanup:
> > +	rte_free(eth_dev->data);
> > +eth_dev_cleanup:
> > +	rte_free(eth_dev);
> > +done:
> > +	return ret;
> > +}
> > +
> >   static int
> >   nfp_flower_start_pf_vnic(struct nfp_net_hw *hw)
> >   {
> > @@ -561,12 +861,57 @@
> >   	return 0;
> >   }
> >
> > +static int
> > +nfp_flower_start_ctrl_vnic(struct nfp_net_hw *hw) {
> > +	int ret;
> > +	uint32_t update;
> > +	uint32_t new_ctrl;
> > +	struct rte_eth_dev *dev;
> > +
> > +	dev = hw->eth_dev;
> > +
> > +	/* Disabling queues just in case... */
> > +	nfp_net_disable_queues(dev);
> > +
> > +	/* Enabling the required queues in the device */
> > +	nfp_net_enable_queues(dev);
> > +
> > +	/* Writing configuration parameters in the device */
> > +	nfp_net_params_setup(hw);
> > +
> > +	new_ctrl = NFP_NET_CFG_CTRL_ENABLE;
> > +	update = NFP_NET_CFG_UPDATE_GEN |
> NFP_NET_CFG_UPDATE_RING |
> > +		 NFP_NET_CFG_UPDATE_MSIX;
> > +
> > +	rte_wmb();
> > +
> > +	/* If an error when reconfig we avoid to change hw state */
> > +	ret = nfp_net_reconfig(hw, new_ctrl, update);
> > +	if (ret) {
> 
> Compare vs 0
> 
> > +		PMD_INIT_LOG(ERR, "Failed to reconfig ctrl vnic");
> > +		return -EIO;
> > +	}
> > +
> > +	hw->ctrl = new_ctrl;
> > +
> > +	/* Setup the freelist ring */
> > +	ret = nfp_net_rx_freelist_setup(dev);
> > +	if (ret) {
> 
> Compare vs 0
> 
> > +		PMD_INIT_LOG(ERR, "Error with flower ctrl vNIC freelist
> setup");
> > +		return -EIO;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >   int
> >   nfp_init_app_flower(struct nfp_pf_dev *pf_dev)
> >   {
> >   	int ret;
> >   	unsigned int numa_node;
> >   	struct nfp_net_hw *pf_hw;
> > +	struct nfp_net_hw *ctrl_hw;
> >   	struct nfp_app_flower *app_flower;
> >
> >   	numa_node = rte_socket_id();
> > @@ -612,29 +957,63 @@
> >   	pf_hw->pf_dev = pf_dev;
> >   	pf_hw->cpp = pf_dev->cpp;
> >
> > +	/* The ctrl vNIC struct comes directly after the PF one */
> > +	app_flower->ctrl_hw = pf_hw + 1;
> > +	ctrl_hw = app_flower->ctrl_hw;
> > +
> > +	/* Map the ctrl vNIC ctrl bar */
> > +	ctrl_hw->ctrl_bar = nfp_rtsym_map(pf_dev->sym_tbl,
> "_pf0_net_ctrl_bar",
> > +		32768, &ctrl_hw->ctrl_area);
> > +	if (ctrl_hw->ctrl_bar == NULL) {
> > +		PMD_INIT_LOG(ERR, "Cloud not map the ctrl vNIC ctrl bar");
> > +		ret = -ENODEV;
> > +		goto pf_cpp_area_cleanup;
> > +	}
> > +
> > +	/* Now populate the ctrl vNIC */
> > +	ctrl_hw->pf_dev = pf_dev;
> > +	ctrl_hw->cpp = pf_dev->cpp;
> > +
> >   	ret = nfp_flower_init_pf_vnic(app_flower->pf_hw);
> >   	if (ret) {
> >   		PMD_INIT_LOG(ERR, "Could not initialize flower PF vNIC");
> > -		goto pf_cpp_area_cleanup;
> > +		goto ctrl_cpp_area_cleanup;
> > +	}
> > +
> > +	ret = nfp_flower_init_ctrl_vnic(app_flower->ctrl_hw);
> > +	if (ret) {
> 
> Compare vs 0
> 
> > +		PMD_INIT_LOG(ERR, "Could not initialize flower ctrl vNIC");
> > +		goto pf_vnic_cleanup;
> >   	}
> >
> >   	/* Start the PF vNIC */
> >   	ret = nfp_flower_start_pf_vnic(app_flower->pf_hw);
> >   	if (ret) {
> >   		PMD_INIT_LOG(ERR, "Could not start flower PF vNIC");
> > -		goto pf_vnic_cleanup;
> > +		goto ctrl_vnic_cleanup;
> > +	}
> > +
> > +	/* Start the ctrl vNIC */
> > +	ret = nfp_flower_start_ctrl_vnic(app_flower->ctrl_hw);
> > +	if (ret) {
> 
> Compare vs 0
> 
> > +		PMD_INIT_LOG(ERR, "Could not start flower ctrl vNIC");
> > +		goto ctrl_vnic_cleanup;
> >   	}
> >
> >   	/* Start up flower services */
> >   	if (nfp_flower_enable_services(app_flower)) {
> >   		ret = -ESRCH;
> > -		goto pf_vnic_cleanup;
> > +		goto ctrl_vnic_cleanup;
> >   	}
> >
> >   	return 0;
> >
> > +ctrl_vnic_cleanup:
> > +	nfp_flower_cleanup_ctrl_vnic(app_flower->ctrl_hw);
> >   pf_vnic_cleanup:
> >   	nfp_flower_cleanup_pf_vnic(app_flower->pf_hw);
> > +ctrl_cpp_area_cleanup:
> > +	nfp_cpp_area_free(ctrl_hw->ctrl_area);
> >   pf_cpp_area_cleanup:
> >   	nfp_cpp_area_free(pf_dev->ctrl_area);
> >   eth_tbl_cleanup:
> > diff --git a/drivers/net/nfp/flower/nfp_flower.h
> > b/drivers/net/nfp/flower/nfp_flower.h
> > index f6fd4eb..f11ef6d 100644
> > --- a/drivers/net/nfp/flower/nfp_flower.h
> > +++ b/drivers/net/nfp/flower/nfp_flower.h
> > @@ -21,6 +21,12 @@ struct nfp_app_flower {
> >   	/* Pointer to the PF vNIC */
> >   	struct nfp_net_hw *pf_hw;
> >
> > +	/* Pointer to a mempool for the ctrlvNIC */
> > +	struct rte_mempool *ctrl_pktmbuf_pool;
> > +
> > +	/* Pointer to the ctrl vNIC */
> > +	struct nfp_net_hw *ctrl_hw;
> > +
> >   	/* the eth table as reported by firmware */
> >   	struct nfp_eth_table *nfp_eth_table;
> >   };
  
Stephen Hemminger Aug. 8, 2022, 2:45 p.m. UTC | #3
On Mon, 8 Aug 2022 11:32:30 +0000
Chaoyong He <chaoyong.he@corigine.com> wrote:

> > > +		goto done;
> > > +
> > > +	/* Allocate memory for the eth_dev of the vNIC */
> > > +	hw->eth_dev = rte_zmalloc("ctrl_vnic_eth_dev",  
> > 
> > Why not rte_eth_dev_allocate()? Isn't an ethdev?
> > Why do you bypsss ethdev layer in this case completely and do everything
> > yourself?  
> 
> Here we created an ethdev locally to nfp PMD, we want the user totally won't be aware of it.
> If we use rte_eth_dev_allocate() to create it, it will be in array 'rte_ethdev_devices[]', that's not we want.

Having a floating ethdev does open the code and users up to a number of potential bugs.
What is the value of port_id on that ethdev? What is the mechanism to ensure it doesn't
conflict with other ones in the system.
  
Chaoyong He Aug. 10, 2022, 1:51 a.m. UTC | #4
> On Mon, 8 Aug 2022 11:32:30 +0000
> Chaoyong He <chaoyong.he@corigine.com> wrote:
> 
> > > > +		goto done;
> > > > +
> > > > +	/* Allocate memory for the eth_dev of the vNIC */
> > > > +	hw->eth_dev = rte_zmalloc("ctrl_vnic_eth_dev",
> > >
> > > Why not rte_eth_dev_allocate()? Isn't an ethdev?
> > > Why do you bypsss ethdev layer in this case completely and do
> > > everything yourself?
> >
> > Here we created an ethdev locally to nfp PMD, we want the user totally
> won't be aware of it.
> > If we use rte_eth_dev_allocate() to create it, it will be in array
> 'rte_ethdev_devices[]', that's not we want.
> 
> Having a floating ethdev does open the code and users up to a number of
> potential bugs.
> What is the value of port_id on that ethdev? What is the mechanism to
> ensure it doesn't conflict with other ones in the system.

The 'port_id' is the 'Device [external] port identifier', which related with the
'rte_ethdev_devices[]' I think.
Here the ethdev we created is not exposed to the user and is not in the 'rte_ethdev_devices[]'
array, so it can't be invoked by the user at all.
And we invoke this ethdev through a pointer in the `struct nfp_net_hw`,
so I think there should no conflict with other ones in the system.
  
Stephen Hemminger Aug. 10, 2022, 7:39 p.m. UTC | #5
On Wed, 10 Aug 2022 01:51:55 +0000
Chaoyong He <chaoyong.he@corigine.com> wrote:

> > On Mon, 8 Aug 2022 11:32:30 +0000
> > Chaoyong He <chaoyong.he@corigine.com> wrote:
> >   
> > > > > +		goto done;
> > > > > +
> > > > > +	/* Allocate memory for the eth_dev of the vNIC */
> > > > > +	hw->eth_dev = rte_zmalloc("ctrl_vnic_eth_dev",  
> > > >
> > > > Why not rte_eth_dev_allocate()? Isn't an ethdev?
> > > > Why do you bypsss ethdev layer in this case completely and do
> > > > everything yourself?  
> > >
> > > Here we created an ethdev locally to nfp PMD, we want the user totally  
> > won't be aware of it.  
> > > If we use rte_eth_dev_allocate() to create it, it will be in array  
> > 'rte_ethdev_devices[]', that's not we want.
> > 
> > Having a floating ethdev does open the code and users up to a number of
> > potential bugs.
> > What is the value of port_id on that ethdev? What is the mechanism to
> > ensure it doesn't conflict with other ones in the system.  
> 
> The 'port_id' is the 'Device [external] port identifier', which related with the
> 'rte_ethdev_devices[]' I think.
> Here the ethdev we created is not exposed to the user and is not in the 'rte_ethdev_devices[]'
> array, so it can't be invoked by the user at all.
> And we invoke this ethdev through a pointer in the `struct nfp_net_hw`,
> so I think there should no conflict with other ones in the system.

DPDK already has a port ownership framework to deal with internal
ethernet device ports. Why was this not used?
  
Chaoyong He Aug. 11, 2022, 1:26 a.m. UTC | #6
> On Wed, 10 Aug 2022 01:51:55 +0000
> Chaoyong He <chaoyong.he@corigine.com> wrote:
> 
> > > On Mon, 8 Aug 2022 11:32:30 +0000
> > > Chaoyong He <chaoyong.he@corigine.com> wrote:
> > >
> > > > > > +		goto done;
> > > > > > +
> > > > > > +	/* Allocate memory for the eth_dev of the vNIC */
> > > > > > +	hw->eth_dev = rte_zmalloc("ctrl_vnic_eth_dev",
> > > > >
> > > > > Why not rte_eth_dev_allocate()? Isn't an ethdev?
> > > > > Why do you bypsss ethdev layer in this case completely and do
> > > > > everything yourself?
> > > >
> > > > Here we created an ethdev locally to nfp PMD, we want the user
> > > > totally
> > > won't be aware of it.
> > > > If we use rte_eth_dev_allocate() to create it, it will be in array
> > > 'rte_ethdev_devices[]', that's not we want.
> > >
> > > Having a floating ethdev does open the code and users up to a number
> > > of potential bugs.
> > > What is the value of port_id on that ethdev? What is the mechanism
> > > to ensure it doesn't conflict with other ones in the system.
> >
> > The 'port_id' is the 'Device [external] port identifier', which
> > related with the 'rte_ethdev_devices[]' I think.
> > Here the ethdev we created is not exposed to the user and is not in the
> 'rte_ethdev_devices[]'
> > array, so it can't be invoked by the user at all.
> > And we invoke this ethdev through a pointer in the `struct
> > nfp_net_hw`, so I think there should no conflict with other ones in the
> system.
> 
> DPDK already has a port ownership framework to deal with internal ethernet
> device ports. Why was this not used?

Sorry I have no knowledge about this framework before. Any document link or logic about
this framework will be greatly appreciated. Thanks!
  
Stephen Hemminger Aug. 11, 2022, 4:24 a.m. UTC | #7
On Thu, 11 Aug 2022 01:26:49 +0000
Chaoyong He <chaoyong.he@corigine.com> wrote:

> > > The 'port_id' is the 'Device [external] port identifier', which
> > > related with the 'rte_ethdev_devices[]' I think.
> > > Here the ethdev we created is not exposed to the user and is not in the  
> > 'rte_ethdev_devices[]'  
> > > array, so it can't be invoked by the user at all.
> > > And we invoke this ethdev through a pointer in the `struct
> > > nfp_net_hw`, so I think there should no conflict with other ones in the  
> > system.
> > 
> > DPDK already has a port ownership framework to deal with internal ethernet
> > device ports. Why was this not used?  
> 
> Sorry I have no knowledge about this framework before. Any document link or logic about
> this framework will be greatly appreciated. Thanks!

It is part of ethdev https://doc.dpdk.org/api/rte__ethdev_8h.html

See rte_eth_dev_owner_new, rte_eth_dev_owner_set, etc
https://doc.dpdk.org/api/rte__ethdev_8h.html#ad6817cc801bf0faa566f52d382214457
  
Chaoyong He Aug. 11, 2022, 6:31 a.m. UTC | #8
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Thursday, August 11, 2022 12:25 PM
> To: Chaoyong He <chaoyong.he@corigine.com>
> Cc: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>; Niklas
> Soderlund <niklas.soderlund@corigine.com>; dev@dpdk.org
> Subject: Re: [PATCH v5 07/12] net/nfp: add flower ctrl VNIC related logics
> 
> On Thu, 11 Aug 2022 01:26:49 +0000
> Chaoyong He <chaoyong.he@corigine.com> wrote:
> 
> > > > The 'port_id' is the 'Device [external] port identifier', which
> > > > related with the 'rte_ethdev_devices[]' I think.
> > > > Here the ethdev we created is not exposed to the user and is not
> > > > in the
> > > 'rte_ethdev_devices[]'
> > > > array, so it can't be invoked by the user at all.
> > > > And we invoke this ethdev through a pointer in the `struct
> > > > nfp_net_hw`, so I think there should no conflict with other ones
> > > > in the
> > > system.
> > >
> > > DPDK already has a port ownership framework to deal with internal
> > > ethernet device ports. Why was this not used?
> >
> > Sorry I have no knowledge about this framework before. Any document
> > link or logic about this framework will be greatly appreciated. Thanks!
> 
> It is part of ethdev https://doc.dpdk.org/api/rte__ethdev_8h.html
> 
> See rte_eth_dev_owner_new, rte_eth_dev_owner_set, etc
> https://doc.dpdk.org/api/rte__ethdev_8h.html#ad6817cc801bf0faa566f52d3
> 82214457

Thank you very much!

If the app uses the rte_eth_dev_owner_* APIs to check the ownership first, it does can
protect the internal ethdev ports.
But right now, the ovs-dpdk seems don't use these APIs at all, and it can use 'port_id' to
get any ethdev port in rte_ethdev_devices[] array.
So maybe it's a good idea to keep our original logic and keep an eye on this area, once
the ovs-dpdk use the rte_eth_dev_owner_* APIs, we'll update the logic here accordingly.

Thanks again!
  
Stephen Hemminger Aug. 11, 2022, 3:07 p.m. UTC | #9
On Thu, 11 Aug 2022 06:31:31 +0000
Chaoyong He <chaoyong.he@corigine.com> wrote:

> > -----Original Message-----
> > From: Stephen Hemminger <stephen@networkplumber.org>
> > Sent: Thursday, August 11, 2022 12:25 PM
> > To: Chaoyong He <chaoyong.he@corigine.com>
> > Cc: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>; Niklas
> > Soderlund <niklas.soderlund@corigine.com>; dev@dpdk.org
> > Subject: Re: [PATCH v5 07/12] net/nfp: add flower ctrl VNIC related logics
> > 
> > On Thu, 11 Aug 2022 01:26:49 +0000
> > Chaoyong He <chaoyong.he@corigine.com> wrote:
> >   
> > > > > The 'port_id' is the 'Device [external] port identifier', which
> > > > > related with the 'rte_ethdev_devices[]' I think.
> > > > > Here the ethdev we created is not exposed to the user and is not
> > > > > in the  
> > > > 'rte_ethdev_devices[]'  
> > > > > array, so it can't be invoked by the user at all.
> > > > > And we invoke this ethdev through a pointer in the `struct
> > > > > nfp_net_hw`, so I think there should no conflict with other ones
> > > > > in the  
> > > > system.
> > > >
> > > > DPDK already has a port ownership framework to deal with internal
> > > > ethernet device ports. Why was this not used?  
> > >
> > > Sorry I have no knowledge about this framework before. Any document
> > > link or logic about this framework will be greatly appreciated. Thanks!  
> > 
> > It is part of ethdev https://doc.dpdk.org/api/rte__ethdev_8h.html
> > 
> > See rte_eth_dev_owner_new, rte_eth_dev_owner_set, etc
> > https://doc.dpdk.org/api/rte__ethdev_8h.html#ad6817cc801bf0faa566f52d3
> > 82214457  
> 
> Thank you very much!
> 
> If the app uses the rte_eth_dev_owner_* APIs to check the ownership first, it does can
> protect the internal ethdev ports.
> But right now, the ovs-dpdk seems don't use these APIs at all, and it can use 'port_id' to
> get any ethdev port in rte_ethdev_devices[] array.
> So maybe it's a good idea to keep our original logic and keep an eye on this area, once
> the ovs-dpdk use the rte_eth_dev_owner_* APIs, we'll update the logic here accordingly.
> 
> Thanks again!

Once device is owned by something, then it is no longer show in the FOREACH and other
iterators; so ovs-dpdk should be ok.  This mechanism is how bonding, failsafe, and netvsc
drivers handle sub devices. Therefore OVS should be smart enough to handle it.
  

Patch

diff --git a/drivers/net/nfp/flower/nfp_flower.c b/drivers/net/nfp/flower/nfp_flower.c
index 2498020..51df504 100644
--- a/drivers/net/nfp/flower/nfp_flower.c
+++ b/drivers/net/nfp/flower/nfp_flower.c
@@ -26,6 +26,10 @@ 
 #define MEMPOOL_CACHE_SIZE 512
 #define DEFAULT_FLBUF_SIZE 9216
 
+#define CTRL_VNIC_NB_DESC 64
+#define CTRL_VNIC_RX_FREE_THRESH 32
+#define CTRL_VNIC_TX_FREE_THRESH 32
+
 /*
  * Simple dev ops functions for the flower PF. Because a rte_device is exposed
  * to DPDK the flower logic also makes use of helper functions like
@@ -543,6 +547,302 @@ 
 	return ret;
 }
 
+static void
+nfp_flower_cleanup_ctrl_vnic(struct nfp_net_hw *hw)
+{
+	uint32_t i;
+	struct nfp_net_rxq *rxq;
+	struct nfp_net_txq *txq;
+	struct rte_eth_dev *eth_dev;
+
+	eth_dev = hw->eth_dev;
+
+	for (i = 0; i < hw->max_tx_queues; i++) {
+		txq = eth_dev->data->tx_queues[i];
+		if (txq) {
+			rte_free(txq->txbufs);
+			rte_eth_dma_zone_free(eth_dev, "ctrl_tx_ring", i);
+			rte_free(txq);
+		}
+	}
+
+	for (i = 0; i < hw->max_rx_queues; i++) {
+		rxq = eth_dev->data->rx_queues[i];
+		if (rxq) {
+			rte_free(rxq->rxbufs);
+			rte_eth_dma_zone_free(eth_dev, "ctrl_rx_ring", i);
+			rte_free(rxq);
+		}
+	}
+
+	rte_free(eth_dev->data->tx_queues);
+	rte_free(eth_dev->data->rx_queues);
+	rte_free(eth_dev->data);
+	rte_free(eth_dev);
+}
+
+static int
+nfp_flower_init_ctrl_vnic(struct nfp_net_hw *hw)
+{
+	uint32_t i;
+	int ret = 0;
+	uint16_t nb_desc;
+	unsigned int numa_node;
+	struct rte_mempool *mp;
+	uint16_t rx_free_thresh;
+	uint16_t tx_free_thresh;
+	struct nfp_net_rxq *rxq;
+	struct nfp_net_txq *txq;
+	struct nfp_pf_dev *pf_dev;
+	struct rte_eth_dev *eth_dev;
+	const struct rte_memzone *tz;
+	struct nfp_app_flower *app_flower;
+
+	/* Hardcoded values for now */
+	nb_desc = CTRL_VNIC_NB_DESC;
+	rx_free_thresh = CTRL_VNIC_RX_FREE_THRESH;
+	tx_free_thresh = CTRL_VNIC_TX_FREE_THRESH;
+	numa_node = rte_socket_id();
+
+	/* Set up some pointers here for ease of use */
+	pf_dev = hw->pf_dev;
+	app_flower = NFP_APP_PRIV_TO_APP_FLOWER(pf_dev->app_priv);
+
+	ret = nfp_flower_init_vnic_common(hw, "ctrl_vnic");
+	if (ret)
+		goto done;
+
+	/* Allocate memory for the eth_dev of the vNIC */
+	hw->eth_dev = rte_zmalloc("ctrl_vnic_eth_dev",
+		sizeof(struct rte_eth_dev), RTE_CACHE_LINE_SIZE);
+	if (hw->eth_dev == NULL) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	/* Grab the pointer to the newly created rte_eth_dev here */
+	eth_dev = hw->eth_dev;
+
+	/* Also allocate memory for the data part of the eth_dev */
+	eth_dev->data = rte_zmalloc("ctrl_vnic_eth_dev_data",
+		sizeof(struct rte_eth_dev_data), RTE_CACHE_LINE_SIZE);
+	if (eth_dev->data == NULL) {
+		ret = -ENOMEM;
+		goto eth_dev_cleanup;
+	}
+
+	eth_dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues",
+		sizeof(eth_dev->data->rx_queues[0]) * hw->max_rx_queues,
+		RTE_CACHE_LINE_SIZE);
+	if (eth_dev->data->rx_queues == NULL) {
+		PMD_INIT_LOG(ERR, "rte_zmalloc failed for ctrl vnic rx queues");
+		ret = -ENOMEM;
+		goto dev_data_cleanup;
+	}
+
+	eth_dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues",
+		sizeof(eth_dev->data->tx_queues[0]) * hw->max_tx_queues,
+		RTE_CACHE_LINE_SIZE);
+	if (eth_dev->data->tx_queues == NULL) {
+		PMD_INIT_LOG(ERR, "rte_zmalloc failed for ctrl vnic tx queues");
+		ret = -ENOMEM;
+		goto rx_queue_cleanup;
+	}
+
+	eth_dev->device = &pf_dev->pci_dev->device;
+	eth_dev->data->nb_tx_queues = hw->max_tx_queues;
+	eth_dev->data->nb_rx_queues = hw->max_rx_queues;
+	eth_dev->data->dev_private = hw;
+
+	/* Create a mbuf pool for the vNIC */
+	app_flower->ctrl_pktmbuf_pool = rte_pktmbuf_pool_create("ctrl_mbuf_pool",
+		4 * nb_desc, 64, 0, 9216, numa_node);
+	if (app_flower->ctrl_pktmbuf_pool == NULL) {
+		PMD_INIT_LOG(ERR, "create mbuf pool for ctrl vnic failed");
+		ret = -ENOMEM;
+		goto tx_queue_cleanup;
+	}
+
+	mp = app_flower->ctrl_pktmbuf_pool;
+
+	/* Set up the Rx queues */
+	PMD_INIT_LOG(INFO, "Configuring flower ctrl vNIC Rx queue");
+	for (i = 0; i < hw->max_rx_queues; i++) {
+		/* Hardcoded number of desc to 64 */
+		rxq = rte_zmalloc_socket("ethdev RX queue",
+			sizeof(struct nfp_net_rxq), RTE_CACHE_LINE_SIZE,
+			numa_node);
+		if (rxq == NULL) {
+			PMD_DRV_LOG(ERR, "Error allocating rxq");
+			ret = -ENOMEM;
+			goto rx_queue_setup_cleanup;
+		}
+
+		eth_dev->data->rx_queues[i] = rxq;
+
+		/* Hw queues mapping based on firmware configuration */
+		rxq->qidx = i;
+		rxq->fl_qcidx = i * hw->stride_rx;
+		rxq->rx_qcidx = rxq->fl_qcidx + (hw->stride_rx - 1);
+		rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx);
+		rxq->qcp_rx = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->rx_qcidx);
+
+		/*
+		 * Tracking mbuf size for detecting a potential mbuf overflow due to
+		 * RX offset
+		 */
+		rxq->mem_pool = mp;
+		rxq->mbuf_size = rxq->mem_pool->elt_size;
+		rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
+		hw->flbufsz = rxq->mbuf_size;
+
+		rxq->rx_count = nb_desc;
+		rxq->rx_free_thresh = rx_free_thresh;
+		rxq->drop_en = 1;
+
+		/*
+		 * Allocate RX ring hardware descriptors. A memzone large enough to
+		 * handle the maximum ring size is allocated in order to allow for
+		 * resizing in later calls to the queue setup function.
+		 */
+		tz = rte_eth_dma_zone_reserve(eth_dev, "ctrl_rx_ring", i,
+			sizeof(struct nfp_net_rx_desc) * NFP_NET_MAX_RX_DESC,
+			NFP_MEMZONE_ALIGN, numa_node);
+		if (tz == NULL) {
+			PMD_DRV_LOG(ERR, "Error allocating rx dma");
+			rte_free(rxq);
+			ret = -ENOMEM;
+			goto rx_queue_setup_cleanup;
+		}
+
+		/* Saving physical and virtual addresses for the RX ring */
+		rxq->dma = (uint64_t)tz->iova;
+		rxq->rxds = (struct nfp_net_rx_desc *)tz->addr;
+
+		/* mbuf pointers array for referencing mbufs linked to RX descriptors */
+		rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs",
+			sizeof(*rxq->rxbufs) * nb_desc, RTE_CACHE_LINE_SIZE,
+			numa_node);
+		if (rxq->rxbufs == NULL) {
+			rte_eth_dma_zone_free(eth_dev, "ctrl_rx_ring", i);
+			rte_free(rxq);
+			ret = -ENOMEM;
+			goto rx_queue_setup_cleanup;
+		}
+
+		nfp_net_reset_rx_queue(rxq);
+
+		rxq->hw = hw;
+
+		/*
+		 * Telling the HW about the physical address of the RX ring and number
+		 * of descriptors in log2 format
+		 */
+		nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(i), rxq->dma);
+		nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(i), rte_log2_u32(nb_desc));
+	}
+
+	/* Now the Tx queues */
+	PMD_INIT_LOG(INFO, "Configuring flower ctrl vNIC Tx queue");
+	for (i = 0; i < hw->max_tx_queues; i++) {
+		/* Hardcoded number of desc to 64 */
+		/* Allocating tx queue data structure */
+		txq = rte_zmalloc_socket("ethdev TX queue",
+			sizeof(struct nfp_net_txq), RTE_CACHE_LINE_SIZE,
+			numa_node);
+		if (txq == NULL) {
+			PMD_DRV_LOG(ERR, "Error allocating txq");
+			ret = -ENOMEM;
+			goto tx_queue_setup_cleanup;
+		}
+
+		eth_dev->data->tx_queues[i] = txq;
+
+		/*
+		 * Allocate TX ring hardware descriptors. A memzone large enough to
+		 * handle the maximum ring size is allocated in order to allow for
+		 * resizing in later calls to the queue setup function.
+		 */
+		tz = rte_eth_dma_zone_reserve(eth_dev, "ctrl_tx_ring", i,
+			sizeof(struct nfp_net_nfd3_tx_desc) * NFP_NET_MAX_TX_DESC,
+			NFP_MEMZONE_ALIGN, numa_node);
+		if (tz == NULL) {
+			PMD_DRV_LOG(ERR, "Error allocating tx dma");
+			rte_free(txq);
+			ret = -ENOMEM;
+			goto tx_queue_setup_cleanup;
+		}
+
+		txq->tx_count = nb_desc;
+		txq->tx_free_thresh = tx_free_thresh;
+		txq->tx_pthresh = DEFAULT_TX_PTHRESH;
+		txq->tx_hthresh = DEFAULT_TX_HTHRESH;
+		txq->tx_wthresh = DEFAULT_TX_WTHRESH;
+
+		/* queue mapping based on firmware configuration */
+		txq->qidx = i;
+		txq->tx_qcidx = i * hw->stride_tx;
+		txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq->tx_qcidx);
+
+		/* Saving physical and virtual addresses for the TX ring */
+		txq->dma = (uint64_t)tz->iova;
+		txq->txds = (struct nfp_net_nfd3_tx_desc *)tz->addr;
+
+		/* mbuf pointers array for referencing mbufs linked to TX descriptors */
+		txq->txbufs = rte_zmalloc_socket("txq->txbufs",
+			sizeof(*txq->txbufs) * nb_desc, RTE_CACHE_LINE_SIZE,
+			numa_node);
+		if (txq->txbufs == NULL) {
+			rte_eth_dma_zone_free(eth_dev, "ctrl_tx_ring", i);
+			rte_free(txq);
+			ret = -ENOMEM;
+			goto tx_queue_setup_cleanup;
+		}
+
+		nfp_net_reset_tx_queue(txq);
+
+		txq->hw = hw;
+
+		/*
+		 * Telling the HW about the physical address of the TX ring and number
+		 * of descriptors in log2 format
+		 */
+		nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(i), txq->dma);
+		nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(i), rte_log2_u32(nb_desc));
+	}
+
+	return 0;
+
+tx_queue_setup_cleanup:
+	for (i = 0; i < hw->max_tx_queues; i++) {
+		txq = eth_dev->data->tx_queues[i];
+		if (txq) {
+			rte_free(txq->txbufs);
+			rte_eth_dma_zone_free(eth_dev, "ctrl_tx_ring", i);
+			rte_free(txq);
+		}
+	}
+rx_queue_setup_cleanup:
+	for (i = 0; i < hw->max_rx_queues; i++) {
+		rxq = eth_dev->data->rx_queues[i];
+		if (rxq) {
+			rte_free(rxq->rxbufs);
+			rte_eth_dma_zone_free(eth_dev, "ctrl_rx_ring", i);
+			rte_free(rxq);
+		}
+	}
+tx_queue_cleanup:
+	rte_free(eth_dev->data->tx_queues);
+rx_queue_cleanup:
+	rte_free(eth_dev->data->rx_queues);
+dev_data_cleanup:
+	rte_free(eth_dev->data);
+eth_dev_cleanup:
+	rte_free(eth_dev);
+done:
+	return ret;
+}
+
 static int
 nfp_flower_start_pf_vnic(struct nfp_net_hw *hw)
 {
@@ -561,12 +861,57 @@ 
 	return 0;
 }
 
+static int
+nfp_flower_start_ctrl_vnic(struct nfp_net_hw *hw)
+{
+	int ret;
+	uint32_t update;
+	uint32_t new_ctrl;
+	struct rte_eth_dev *dev;
+
+	dev = hw->eth_dev;
+
+	/* Disabling queues just in case... */
+	nfp_net_disable_queues(dev);
+
+	/* Enabling the required queues in the device */
+	nfp_net_enable_queues(dev);
+
+	/* Writing configuration parameters in the device */
+	nfp_net_params_setup(hw);
+
+	new_ctrl = NFP_NET_CFG_CTRL_ENABLE;
+	update = NFP_NET_CFG_UPDATE_GEN | NFP_NET_CFG_UPDATE_RING |
+		 NFP_NET_CFG_UPDATE_MSIX;
+
+	rte_wmb();
+
+	/* If an error when reconfig we avoid to change hw state */
+	ret = nfp_net_reconfig(hw, new_ctrl, update);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Failed to reconfig ctrl vnic");
+		return -EIO;
+	}
+
+	hw->ctrl = new_ctrl;
+
+	/* Setup the freelist ring */
+	ret = nfp_net_rx_freelist_setup(dev);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Error with flower ctrl vNIC freelist setup");
+		return -EIO;
+	}
+
+	return 0;
+}
+
 int
 nfp_init_app_flower(struct nfp_pf_dev *pf_dev)
 {
 	int ret;
 	unsigned int numa_node;
 	struct nfp_net_hw *pf_hw;
+	struct nfp_net_hw *ctrl_hw;
 	struct nfp_app_flower *app_flower;
 
 	numa_node = rte_socket_id();
@@ -612,29 +957,63 @@ 
 	pf_hw->pf_dev = pf_dev;
 	pf_hw->cpp = pf_dev->cpp;
 
+	/* The ctrl vNIC struct comes directly after the PF one */
+	app_flower->ctrl_hw = pf_hw + 1;
+	ctrl_hw = app_flower->ctrl_hw;
+
+	/* Map the ctrl vNIC ctrl bar */
+	ctrl_hw->ctrl_bar = nfp_rtsym_map(pf_dev->sym_tbl, "_pf0_net_ctrl_bar",
+		32768, &ctrl_hw->ctrl_area);
+	if (ctrl_hw->ctrl_bar == NULL) {
+		PMD_INIT_LOG(ERR, "Cloud not map the ctrl vNIC ctrl bar");
+		ret = -ENODEV;
+		goto pf_cpp_area_cleanup;
+	}
+
+	/* Now populate the ctrl vNIC */
+	ctrl_hw->pf_dev = pf_dev;
+	ctrl_hw->cpp = pf_dev->cpp;
+
 	ret = nfp_flower_init_pf_vnic(app_flower->pf_hw);
 	if (ret) {
 		PMD_INIT_LOG(ERR, "Could not initialize flower PF vNIC");
-		goto pf_cpp_area_cleanup;
+		goto ctrl_cpp_area_cleanup;
+	}
+
+	ret = nfp_flower_init_ctrl_vnic(app_flower->ctrl_hw);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Could not initialize flower ctrl vNIC");
+		goto pf_vnic_cleanup;
 	}
 
 	/* Start the PF vNIC */
 	ret = nfp_flower_start_pf_vnic(app_flower->pf_hw);
 	if (ret) {
 		PMD_INIT_LOG(ERR, "Could not start flower PF vNIC");
-		goto pf_vnic_cleanup;
+		goto ctrl_vnic_cleanup;
+	}
+
+	/* Start the ctrl vNIC */
+	ret = nfp_flower_start_ctrl_vnic(app_flower->ctrl_hw);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Could not start flower ctrl vNIC");
+		goto ctrl_vnic_cleanup;
 	}
 
 	/* Start up flower services */
 	if (nfp_flower_enable_services(app_flower)) {
 		ret = -ESRCH;
-		goto pf_vnic_cleanup;
+		goto ctrl_vnic_cleanup;
 	}
 
 	return 0;
 
+ctrl_vnic_cleanup:
+	nfp_flower_cleanup_ctrl_vnic(app_flower->ctrl_hw);
 pf_vnic_cleanup:
 	nfp_flower_cleanup_pf_vnic(app_flower->pf_hw);
+ctrl_cpp_area_cleanup:
+	nfp_cpp_area_free(ctrl_hw->ctrl_area);
 pf_cpp_area_cleanup:
 	nfp_cpp_area_free(pf_dev->ctrl_area);
 eth_tbl_cleanup:
diff --git a/drivers/net/nfp/flower/nfp_flower.h b/drivers/net/nfp/flower/nfp_flower.h
index f6fd4eb..f11ef6d 100644
--- a/drivers/net/nfp/flower/nfp_flower.h
+++ b/drivers/net/nfp/flower/nfp_flower.h
@@ -21,6 +21,12 @@  struct nfp_app_flower {
 	/* Pointer to the PF vNIC */
 	struct nfp_net_hw *pf_hw;
 
+	/* Pointer to a mempool for the ctrlvNIC */
+	struct rte_mempool *ctrl_pktmbuf_pool;
+
+	/* Pointer to the ctrl vNIC */
+	struct nfp_net_hw *ctrl_hw;
+
 	/* the eth table as reported by firmware */
 	struct nfp_eth_table *nfp_eth_table;
 };