[dpdk-dev,v4,02/12] vhost: support multiple queues in virtio dev

Message ID 1439366567-3402-3-git-send-email-changchun.ouyang@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Ouyang Changchun Aug. 12, 2015, 8:02 a.m. UTC
  Each virtio device could have multiple queues, say 2 or 4, at most 8.
Enabling this feature allows virtio device/port on guest has the ability to
use different vCPU to receive/transmit packets from/to each queue.

In multiple queues mode, virtio device readiness means all queues of
this virtio device are ready, cleanup/destroy a virtio device also
requires clearing all queues belong to it.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
Changes in v4:
  - rebase and fix conflicts
  - resolve comments
  - init each virtq pair if mq is on

Changes in v3:
  - fix coding style
  - check virtqueue idx validity

Changes in v2:
  - remove the q_num_set api
  - add the qp_num_get api
  - determine the queue pair num from qemu message
  - rework for reset owner message handler
  - dynamically alloc mem for dev virtqueue
  - queue pair num could be 0x8000
  - fix checkpatch errors

 lib/librte_vhost/rte_virtio_net.h             |  10 +-
 lib/librte_vhost/vhost-net.h                  |   1 +
 lib/librte_vhost/vhost_rxtx.c                 |  52 +++++---
 lib/librte_vhost/vhost_user/vhost-net-user.c  |   4 +-
 lib/librte_vhost/vhost_user/virtio-net-user.c |  76 +++++++++---
 lib/librte_vhost/vhost_user/virtio-net-user.h |   2 +
 lib/librte_vhost/virtio-net.c                 | 165 +++++++++++++++++---------
 7 files changed, 222 insertions(+), 88 deletions(-)
  

Comments

Flavio Leitner Aug. 13, 2015, 12:52 p.m. UTC | #1
On Wed, Aug 12, 2015 at 04:02:37PM +0800, Ouyang Changchun wrote:
> Each virtio device could have multiple queues, say 2 or 4, at most 8.
> Enabling this feature allows virtio device/port on guest has the ability to
> use different vCPU to receive/transmit packets from/to each queue.
> 
> In multiple queues mode, virtio device readiness means all queues of
> this virtio device are ready, cleanup/destroy a virtio device also
> requires clearing all queues belong to it.
> 
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
> Changes in v4:
>   - rebase and fix conflicts
>   - resolve comments
>   - init each virtq pair if mq is on
> 
> Changes in v3:
>   - fix coding style
>   - check virtqueue idx validity
> 
> Changes in v2:
>   - remove the q_num_set api
>   - add the qp_num_get api
>   - determine the queue pair num from qemu message
>   - rework for reset owner message handler
>   - dynamically alloc mem for dev virtqueue
>   - queue pair num could be 0x8000
>   - fix checkpatch errors
> 
>  lib/librte_vhost/rte_virtio_net.h             |  10 +-
>  lib/librte_vhost/vhost-net.h                  |   1 +
>  lib/librte_vhost/vhost_rxtx.c                 |  52 +++++---
>  lib/librte_vhost/vhost_user/vhost-net-user.c  |   4 +-
>  lib/librte_vhost/vhost_user/virtio-net-user.c |  76 +++++++++---
>  lib/librte_vhost/vhost_user/virtio-net-user.h |   2 +
>  lib/librte_vhost/virtio-net.c                 | 165 +++++++++++++++++---------
>  7 files changed, 222 insertions(+), 88 deletions(-)
> 
> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> index b9bf320..d9e887f 100644
> --- a/lib/librte_vhost/rte_virtio_net.h
> +++ b/lib/librte_vhost/rte_virtio_net.h
> @@ -59,7 +59,6 @@ struct rte_mbuf;
>  /* Backend value set by guest. */
>  #define VIRTIO_DEV_STOPPED -1
>  
> -
>  /* Enum for virtqueue management. */
>  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
>  
> @@ -96,13 +95,14 @@ struct vhost_virtqueue {
>   * Device structure contains all configuration information relating to the device.
>   */
>  struct virtio_net {
> -	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
>  	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
> +	struct vhost_virtqueue	**virtqueue;    /**< Contains all virtqueue information. */
>  	uint64_t		features;	/**< Negotiated feature set. */
>  	uint64_t		device_fh;	/**< device identifier. */
>  	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
>  #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
>  	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
> +	uint32_t		virt_qp_nb;
>  	void			*priv;		/**< private context */
>  } __rte_cache_aligned;
>  
> @@ -235,4 +235,10 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>  uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
>  
> +/**
> + * This function get the queue pair number of one vhost device.
> + * @return
> + *  num of queue pair of specified virtio device.
> + */
> +uint16_t rte_vhost_qp_num_get(struct virtio_net *dev);
>  #endif /* _VIRTIO_NET_H_ */
> diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
> index c69b60b..7dff14d 100644
> --- a/lib/librte_vhost/vhost-net.h
> +++ b/lib/librte_vhost/vhost-net.h
> @@ -115,4 +115,5 @@ struct vhost_net_device_ops {
>  
>  
>  struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
> +int alloc_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx);
>  #endif /* _VHOST_NET_CDEV_H_ */
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 0d07338..db4ad88 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -43,6 +43,18 @@
>  #define MAX_PKT_BURST 32
>  
>  /**
> + * Check the virtqueue idx validility,
> + * return 1 if pass, otherwise 0.
> + */
> +static inline uint8_t __attribute__((always_inline))
> +check_virtqueue_idx(uint16_t virtq_idx, uint8_t is_tx, uint32_t virtq_num)
> +{
> +	if ((is_tx ^ (virtq_idx & 0x1)) || (virtq_idx >= virtq_num))
> +		return 0;
> +	return 1;
> +}
> +
> +/**
>   * This function adds buffers to the virtio devices RX virtqueue. Buffers can
>   * be received from the physical port or from another virtio device. A packet
>   * count is returned to indicate the number of packets that are succesfully
> @@ -68,12 +80,15 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	uint8_t success = 0;
>  
>  	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
> -	if (unlikely(queue_id != VIRTIO_RXQ)) {
> -		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
> +	if (unlikely(check_virtqueue_idx(queue_id, 0,
> +		VIRTIO_QNUM * dev->virt_qp_nb) == 0)) {
> +		RTE_LOG(ERR, VHOST_DATA,
> +			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
> +			 __func__, dev->device_fh, queue_id);
>  		return 0;
>  	}
>  
> -	vq = dev->virtqueue[VIRTIO_RXQ];
> +	vq = dev->virtqueue[queue_id];
>  	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
>  
>  	/*
> @@ -235,8 +250,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  }
>  
>  static inline uint32_t __attribute__((always_inline))
> -copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
> -	uint16_t res_end_idx, struct rte_mbuf *pkt)
> +copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t queue_id,
> +	uint16_t res_base_idx, uint16_t res_end_idx,
> +	struct rte_mbuf *pkt)
>  {
>  	uint32_t vec_idx = 0;
>  	uint32_t entry_success = 0;
> @@ -264,8 +280,9 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
>  	 * Convert from gpa to vva
>  	 * (guest physical addr -> vhost virtual addr)
>  	 */
> -	vq = dev->virtqueue[VIRTIO_RXQ];
> -	vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
> +	vq = dev->virtqueue[queue_id];
> +	vb_addr =
> +		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
>  	vb_hdr_addr = vb_addr;
>  
>  	/* Prefetch buffer address. */
> @@ -464,11 +481,15 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
>  
>  	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
>  		dev->device_fh);
> -	if (unlikely(queue_id != VIRTIO_RXQ)) {
> -		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
> +	if (unlikely(check_virtqueue_idx(queue_id, 0,
> +		VIRTIO_QNUM * dev->virt_qp_nb) == 0)) {
> +		RTE_LOG(ERR, VHOST_DATA,
> +			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
> +			 __func__, dev->device_fh, queue_id);
> +		return 0;
>  	}
>  
> -	vq = dev->virtqueue[VIRTIO_RXQ];
> +	vq = dev->virtqueue[queue_id];
>  	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
>  
>  	if (count == 0)
> @@ -509,7 +530,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
>  							res_cur_idx);
>  		} while (success == 0);
>  
> -		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
> +		entry_success = copy_from_mbuf_to_vring(dev, queue_id, res_base_idx,
>  			res_cur_idx, pkts[pkt_idx]);
>  
>  		rte_compiler_barrier();
> @@ -559,12 +580,15 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  	uint16_t free_entries, entry_success = 0;
>  	uint16_t avail_idx;
>  
> -	if (unlikely(queue_id != VIRTIO_TXQ)) {
> -		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
> +	if (unlikely(check_virtqueue_idx(queue_id, 1,
> +		VIRTIO_QNUM * dev->virt_qp_nb) == 0)) {
> +		RTE_LOG(ERR, VHOST_DATA,
> +			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
> +			 __func__, dev->device_fh, queue_id);
>  		return 0;
>  	}
>  
> -	vq = dev->virtqueue[VIRTIO_TXQ];
> +	vq = dev->virtqueue[queue_id];
>  	avail_idx =  *((volatile uint16_t *)&vq->avail->idx);
>  
>  	/* If there are no available buffers then return. */
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
> index f406a94..3d7c373 100644
> --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> @@ -383,7 +383,9 @@ vserver_message_handler(int connfd, void *dat, int *remove)
>  		ops->set_owner(ctx);
>  		break;
>  	case VHOST_USER_RESET_OWNER:
> -		ops->reset_owner(ctx);
> +		RTE_LOG(INFO, VHOST_CONFIG,
> +			"(%"PRIu64") VHOST_NET_RESET_OWNER\n", ctx.fh);
> +		user_reset_owner(ctx, &msg.payload.state);
>  		break;
>  
>  	case VHOST_USER_SET_MEM_TABLE:
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> index c1ffc38..4c1d4df 100644
> --- a/lib/librte_vhost/vhost_user/virtio-net-user.c
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> @@ -209,30 +209,46 @@ static int
>  virtio_is_ready(struct virtio_net *dev)
>  {
>  	struct vhost_virtqueue *rvq, *tvq;
> +	uint32_t q_idx;
>  
>  	/* mq support in future.*/
> -	rvq = dev->virtqueue[VIRTIO_RXQ];
> -	tvq = dev->virtqueue[VIRTIO_TXQ];
> -	if (rvq && tvq && rvq->desc && tvq->desc &&
> -		(rvq->kickfd != (eventfd_t)-1) &&
> -		(rvq->callfd != (eventfd_t)-1) &&
> -		(tvq->kickfd != (eventfd_t)-1) &&
> -		(tvq->callfd != (eventfd_t)-1)) {
> -		RTE_LOG(INFO, VHOST_CONFIG,
> -			"virtio is now ready for processing.\n");
> -		return 1;
> +	for (q_idx = 0; q_idx < dev->virt_qp_nb; q_idx++) {
> +		uint32_t virt_rx_q_idx = q_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> +		uint32_t virt_tx_q_idx = q_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> +
> +		rvq = dev->virtqueue[virt_rx_q_idx];
> +		tvq = dev->virtqueue[virt_tx_q_idx];
> +		if ((rvq == NULL) || (tvq == NULL) ||
> +			(rvq->desc == NULL) || (tvq->desc == NULL) ||
> +			(rvq->kickfd == (eventfd_t)-1) ||
> +			(rvq->callfd == (eventfd_t)-1) ||
> +			(tvq->kickfd == (eventfd_t)-1) ||
> +			(tvq->callfd == (eventfd_t)-1)) {
> +			RTE_LOG(INFO, VHOST_CONFIG,
> +				"virtio isn't ready for processing.\n");
> +			return 0;
> +		}
>  	}
>  	RTE_LOG(INFO, VHOST_CONFIG,
> -		"virtio isn't ready for processing.\n");
> -	return 0;
> +		"virtio is now ready for processing.\n");
> +	return 1;
>  }
>  
>  void
>  user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>  {
>  	struct vhost_vring_file file;
> +	struct virtio_net *dev = get_device(ctx);
> +	uint32_t cur_qp_idx;
>  
>  	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> +	cur_qp_idx = file.index >> 1;
> +
> +	if (dev->virt_qp_nb < cur_qp_idx + 1) {
> +		if (alloc_vring_queue_pair(dev, cur_qp_idx) == 0)
> +			dev->virt_qp_nb = cur_qp_idx + 1;

Looks like it is missing vring initialization here.

	if (dev->virt_qp_nb < cur_qp_idx + 1) {
		if (alloc_vring_queue_pair(dev, cur_qp_idx) == 0) {
			dev->virt_qp_nb = cur_qp_idx + 1;
			init_vring_queue_pair(dev, cur_qp_idx);
		}
	}


fbl


> +	}
> +
>  	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
>  		file.fd = -1;
>  	else
> @@ -290,13 +306,37 @@ user_get_vring_base(struct vhost_device_ctx ctx,
>  	 * sent and only sent in vhost_vring_stop.
>  	 * TODO: cleanup the vring, it isn't usable since here.
>  	 */
> -	if (((int)dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
> -		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> -		dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> +	if (((int)dev->virtqueue[state->index]->kickfd) >= 0) {
> +		close(dev->virtqueue[state->index]->kickfd);
> +		dev->virtqueue[state->index]->kickfd = (eventfd_t)-1;
>  	}
> -	if (((int)dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
> -		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> -		dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> +
> +	return 0;
> +}
> +
> +/*
> + * when virtio is stopped, qemu will send us the RESET_OWNER message.
> + */
> +int
> +user_reset_owner(struct vhost_device_ctx ctx,
> +	struct vhost_vring_state *state)
> +{
> +	struct virtio_net *dev = get_device(ctx);
> +
> +	/* We have to stop the queue (virtio) if it is running. */
> +	if (dev->flags & VIRTIO_DEV_RUNNING)
> +		notify_ops->destroy_device(dev);
> +
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"reset owner --- state idx:%d state num:%d\n", state->index, state->num);
> +	/*
> +	 * Based on current qemu vhost-user implementation, this message is
> +	 * sent and only sent in vhost_net_stop_one.
> +	 * TODO: cleanup the vring, it isn't usable since here.
> +	 */
> +	if (((int)dev->virtqueue[state->index]->kickfd) >= 0) {
> +		close(dev->virtqueue[state->index]->kickfd);
> +		dev->virtqueue[state->index]->kickfd = (eventfd_t)-1;
>  	}
>  
>  	return 0;
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
> index df24860..2429836 100644
> --- a/lib/librte_vhost/vhost_user/virtio-net-user.h
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> @@ -46,4 +46,6 @@ void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
>  int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
>  
>  void user_destroy_device(struct vhost_device_ctx);
> +
> +int user_reset_owner(struct vhost_device_ctx ctx, struct vhost_vring_state *state);
>  #endif
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index b520ec5..2a4b791 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -71,9 +71,10 @@ static struct virtio_net_config_ll *ll_root;
>  #define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
>  				(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
>  				(1ULL << VIRTIO_NET_F_CTRL_RX) | \
> -				(1ULL << VHOST_F_LOG_ALL))
> -static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
> +				(1ULL << VHOST_F_LOG_ALL) | \
> +				(1ULL << VIRTIO_NET_F_MQ))
>  
> +static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
>  
>  /*
>   * Converts QEMU virtual address to Vhost virtual address. This function is
> @@ -182,6 +183,8 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
>  static void
>  cleanup_device(struct virtio_net *dev)
>  {
> +	uint32_t qp_idx;
> +
>  	/* Unmap QEMU memory file if mapped. */
>  	if (dev->mem) {
>  		munmap((void *)(uintptr_t)dev->mem->mapped_address,
> @@ -190,14 +193,18 @@ cleanup_device(struct virtio_net *dev)
>  	}
>  
>  	/* Close any event notifiers opened by device. */
> -	if ((int)dev->virtqueue[VIRTIO_RXQ]->callfd >= 0)
> -		close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
> -	if ((int)dev->virtqueue[VIRTIO_RXQ]->kickfd >= 0)
> -		close((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);
> -	if ((int)dev->virtqueue[VIRTIO_TXQ]->callfd >= 0)
> -		close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
> -	if ((int)dev->virtqueue[VIRTIO_TXQ]->kickfd >= 0)
> -		close((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);
> +	for (qp_idx = 0; qp_idx < dev->virt_qp_nb; qp_idx++) {
> +		uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> +		uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> +		if ((int)dev->virtqueue[virt_rx_q_idx]->callfd >= 0)
> +			close((int)dev->virtqueue[virt_rx_q_idx]->callfd);
> +		if ((int)dev->virtqueue[virt_rx_q_idx]->kickfd >= 0)
> +			close((int)dev->virtqueue[virt_rx_q_idx]->kickfd);
> +		if ((int)dev->virtqueue[virt_tx_q_idx]->callfd >= 0)
> +			close((int)dev->virtqueue[virt_tx_q_idx]->callfd);
> +		if ((int)dev->virtqueue[virt_tx_q_idx]->kickfd >= 0)
> +			close((int)dev->virtqueue[virt_tx_q_idx]->kickfd);
> +	}
>  }
>  
>  /*
> @@ -206,9 +213,17 @@ cleanup_device(struct virtio_net *dev)
>  static void
>  free_device(struct virtio_net_config_ll *ll_dev)
>  {
> -	/* Free any malloc'd memory */
> -	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
> -	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
> +	uint32_t qp_idx;
> +
> +	/*
> +	 * Free any malloc'd memory.
> +	 */
> +	/* Free every queue pair. */
> +	for (qp_idx = 0; qp_idx < ll_dev->dev.virt_qp_nb; qp_idx++) {
> +		uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> +		rte_free(ll_dev->dev.virtqueue[virt_rx_q_idx]);
> +	}
> +	rte_free(ll_dev->dev.virtqueue);
>  	rte_free(ll_dev);
>  }
>  
> @@ -242,6 +257,27 @@ rm_config_ll_entry(struct virtio_net_config_ll *ll_dev,
>  }
>  
>  /*
> + *  Initialise all variables in vring queue pair.
> + */
> +static void
> +init_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx)
> +{
> +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> +	memset(dev->virtqueue[virt_rx_q_idx], 0, sizeof(struct vhost_virtqueue));
> +	memset(dev->virtqueue[virt_tx_q_idx], 0, sizeof(struct vhost_virtqueue));
> +
> +	dev->virtqueue[virt_rx_q_idx]->kickfd = (eventfd_t)-1;
> +	dev->virtqueue[virt_rx_q_idx]->callfd = (eventfd_t)-1;
> +	dev->virtqueue[virt_tx_q_idx]->kickfd = (eventfd_t)-1;
> +	dev->virtqueue[virt_tx_q_idx]->callfd = (eventfd_t)-1;
> +
> +	/* Backends are set to -1 indicating an inactive device. */
> +	dev->virtqueue[virt_rx_q_idx]->backend = VIRTIO_DEV_STOPPED;
> +	dev->virtqueue[virt_tx_q_idx]->backend = VIRTIO_DEV_STOPPED;
> +}
> +
> +/*
>   *  Initialise all variables in device structure.
>   */
>  static void
> @@ -258,17 +294,34 @@ init_device(struct virtio_net *dev)
>  	/* Set everything to 0. */
>  	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
>  		(sizeof(struct virtio_net) - (size_t)vq_offset));
> -	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
> -	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
>  
> -	dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> -	dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> -	dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> -	dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> +	init_vring_queue_pair(dev, 0);
> +	dev->virt_qp_nb = 1;
> +}
> +
> +/*
> + *  Alloc mem for vring queue pair.
> + */
> +int
> +alloc_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx)
> +{
> +	struct vhost_virtqueue *virtqueue = NULL;
> +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
>  
> -	/* Backends are set to -1 indicating an inactive device. */
> -	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> -	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> +	virtqueue = rte_malloc(NULL, sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
> +	if (virtqueue == NULL) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
> +		return -1;
> +	}
> +
> +	dev->virtqueue[virt_rx_q_idx] = virtqueue;
> +	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
> +
> +	init_vring_queue_pair(dev, qp_idx);
> +
> +	return 0;
>  }
>  
>  /*
> @@ -280,7 +333,6 @@ static int
>  new_device(struct vhost_device_ctx ctx)
>  {
>  	struct virtio_net_config_ll *new_ll_dev;
> -	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
>  
>  	/* Setup device and virtqueues. */
>  	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
> @@ -291,28 +343,22 @@ new_device(struct vhost_device_ctx ctx)
>  		return -1;
>  	}
>  
> -	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> -	if (virtqueue_rx == NULL) {
> -		rte_free(new_ll_dev);
> +	new_ll_dev->dev.virtqueue =
> +		rte_malloc(NULL, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX * sizeof(struct vhost_virtqueue *), 0);
> +	if (new_ll_dev->dev.virtqueue == NULL) {
>  		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Failed to allocate memory for rxq.\n",
> +			"(%"PRIu64") Failed to allocate memory for dev.virtqueue.\n",
>  			ctx.fh);
> +		rte_free(new_ll_dev);
>  		return -1;
>  	}
>  
> -	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> -	if (virtqueue_tx == NULL) {
> -		rte_free(virtqueue_rx);
> +	if (alloc_vring_queue_pair(&new_ll_dev->dev, 0) == -1) {
> +		rte_free(new_ll_dev->dev.virtqueue);
>  		rte_free(new_ll_dev);
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Failed to allocate memory for txq.\n",
> -			ctx.fh);
>  		return -1;
>  	}
>  
> -	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
> -	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
> -
>  	/* Initialise device and virtqueues. */
>  	init_device(&new_ll_dev->dev);
>  
> @@ -396,7 +442,7 @@ set_owner(struct vhost_device_ctx ctx)
>   * Called from CUSE IOCTL: VHOST_RESET_OWNER
>   */
>  static int
> -reset_owner(struct vhost_device_ctx ctx)
> +reset_owner(__rte_unused struct vhost_device_ctx ctx)
>  {
>  	struct virtio_net_config_ll *ll_dev;
>  
> @@ -434,6 +480,7 @@ static int
>  set_features(struct vhost_device_ctx ctx, uint64_t *pu)
>  {
>  	struct virtio_net *dev;
> +	uint32_t q_idx;
>  
>  	dev = get_device(ctx);
>  	if (dev == NULL)
> @@ -445,22 +492,26 @@ set_features(struct vhost_device_ctx ctx, uint64_t *pu)
>  	dev->features = *pu;
>  
>  	/* Set the vhost_hlen depending on if VIRTIO_NET_F_MRG_RXBUF is set. */
> -	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") Mergeable RX buffers enabled\n",
> -			dev->device_fh);
> -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> -	} else {
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") Mergeable RX buffers disabled\n",
> -			dev->device_fh);
> -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> -			sizeof(struct virtio_net_hdr);
> -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> -			sizeof(struct virtio_net_hdr);
> +	for (q_idx = 0; q_idx < dev->virt_qp_nb; q_idx++) {
> +		uint32_t virt_rx_q_idx = q_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> +		uint32_t virt_tx_q_idx = q_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> +		if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> +			LOG_DEBUG(VHOST_CONFIG,
> +				"(%"PRIu64") Mergeable RX buffers enabled\n",
> +				dev->device_fh);
> +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> +		} else {
> +			LOG_DEBUG(VHOST_CONFIG,
> +				"(%"PRIu64") Mergeable RX buffers disabled\n",
> +				dev->device_fh);
> +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> +				sizeof(struct virtio_net_hdr);
> +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> +				sizeof(struct virtio_net_hdr);
> +		}
>  	}
>  	return 0;
>  }
> @@ -826,6 +877,14 @@ int rte_vhost_feature_enable(uint64_t feature_mask)
>  	return -1;
>  }
>  
> +uint16_t rte_vhost_qp_num_get(struct virtio_net *dev)
> +{
> +	if (dev == NULL)
> +		return 0;
> +
> +	return dev->virt_qp_nb;
> +}
> +
>  /*
>   * Register ops so that we can add/remove device to data core.
>   */
> -- 
> 1.8.4.2
>
  
Ouyang Changchun Aug. 14, 2015, 2:29 a.m. UTC | #2
Hi Flavio,

Thanks for your comments, see my response below.

> -----Original Message-----
> From: Flavio Leitner [mailto:fbl@sysclose.org]
> Sent: Thursday, August 13, 2015 8:52 PM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 02/12] vhost: support multiple queues in
> virtio dev
> 
> On Wed, Aug 12, 2015 at 04:02:37PM +0800, Ouyang Changchun wrote:
> > Each virtio device could have multiple queues, say 2 or 4, at most 8.
> > Enabling this feature allows virtio device/port on guest has the
> > ability to use different vCPU to receive/transmit packets from/to each
> queue.
> >
> > In multiple queues mode, virtio device readiness means all queues of
> > this virtio device are ready, cleanup/destroy a virtio device also
> > requires clearing all queues belong to it.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> > Changes in v4:
> >   - rebase and fix conflicts
> >   - resolve comments
> >   - init each virtq pair if mq is on
> >
> > Changes in v3:
> >   - fix coding style
> >   - check virtqueue idx validity
> >
> > Changes in v2:
> >   - remove the q_num_set api
> >   - add the qp_num_get api
> >   - determine the queue pair num from qemu message
> >   - rework for reset owner message handler
> >   - dynamically alloc mem for dev virtqueue
> >   - queue pair num could be 0x8000
> >   - fix checkpatch errors
> >
> >  lib/librte_vhost/rte_virtio_net.h             |  10 +-
> >  lib/librte_vhost/vhost-net.h                  |   1 +
> >  lib/librte_vhost/vhost_rxtx.c                 |  52 +++++---
> >  lib/librte_vhost/vhost_user/vhost-net-user.c  |   4 +-
> >  lib/librte_vhost/vhost_user/virtio-net-user.c |  76 +++++++++---
> >  lib/librte_vhost/vhost_user/virtio-net-user.h |   2 +
> >  lib/librte_vhost/virtio-net.c                 | 165 +++++++++++++++++---------
> >  7 files changed, 222 insertions(+), 88 deletions(-)
> >
> > diff --git a/lib/librte_vhost/rte_virtio_net.h
> > b/lib/librte_vhost/rte_virtio_net.h
> > index b9bf320..d9e887f 100644
> > --- a/lib/librte_vhost/rte_virtio_net.h
> > +++ b/lib/librte_vhost/rte_virtio_net.h
> > @@ -59,7 +59,6 @@ struct rte_mbuf;
> >  /* Backend value set by guest. */
> >  #define VIRTIO_DEV_STOPPED -1
> >
> > -
> >  /* Enum for virtqueue management. */
> >  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
> >
> > @@ -96,13 +95,14 @@ struct vhost_virtqueue {
> >   * Device structure contains all configuration information relating to the
> device.
> >   */
> >  struct virtio_net {
> > -	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains
> all virtqueue information. */
> >  	struct virtio_memory	*mem;		/**< QEMU memory and
> memory region information. */
> > +	struct vhost_virtqueue	**virtqueue;    /**< Contains all virtqueue
> information. */
> >  	uint64_t		features;	/**< Negotiated feature set.
> */
> >  	uint64_t		device_fh;	/**< device identifier. */
> >  	uint32_t		flags;		/**< Device flags. Only used
> to check if device is running on data core. */
> >  #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
> >  	char			ifname[IF_NAME_SZ];	/**< Name of the tap
> device or socket path. */
> > +	uint32_t		virt_qp_nb;
> >  	void			*priv;		/**< private context */
> >  } __rte_cache_aligned;
> >
> > @@ -235,4 +235,10 @@ uint16_t rte_vhost_enqueue_burst(struct
> > virtio_net *dev, uint16_t queue_id,  uint16_t
> rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> >  	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t
> > count);
> >
> > +/**
> > + * This function get the queue pair number of one vhost device.
> > + * @return
> > + *  num of queue pair of specified virtio device.
> > + */
> > +uint16_t rte_vhost_qp_num_get(struct virtio_net *dev);
> >  #endif /* _VIRTIO_NET_H_ */
> > diff --git a/lib/librte_vhost/vhost-net.h
> > b/lib/librte_vhost/vhost-net.h index c69b60b..7dff14d 100644
> > --- a/lib/librte_vhost/vhost-net.h
> > +++ b/lib/librte_vhost/vhost-net.h
> > @@ -115,4 +115,5 @@ struct vhost_net_device_ops {
> >
> >
> >  struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
> > +int alloc_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx);
> >  #endif /* _VHOST_NET_CDEV_H_ */
> > diff --git a/lib/librte_vhost/vhost_rxtx.c
> > b/lib/librte_vhost/vhost_rxtx.c index 0d07338..db4ad88 100644
> > --- a/lib/librte_vhost/vhost_rxtx.c
> > +++ b/lib/librte_vhost/vhost_rxtx.c
> > @@ -43,6 +43,18 @@
> >  #define MAX_PKT_BURST 32
> >
> >  /**
> > + * Check the virtqueue idx validility,
> > + * return 1 if pass, otherwise 0.
> > + */
> > +static inline uint8_t __attribute__((always_inline))
> > +check_virtqueue_idx(uint16_t virtq_idx, uint8_t is_tx, uint32_t
> > +virtq_num) {
> > +	if ((is_tx ^ (virtq_idx & 0x1)) || (virtq_idx >= virtq_num))
> > +		return 0;
> > +	return 1;
> > +}
> > +
> > +/**
> >   * This function adds buffers to the virtio devices RX virtqueue. Buffers can
> >   * be received from the physical port or from another virtio device. A
> packet
> >   * count is returned to indicate the number of packets that are
> > succesfully @@ -68,12 +80,15 @@ virtio_dev_rx(struct virtio_net *dev,
> uint16_t queue_id,
> >  	uint8_t success = 0;
> >
> >  	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev-
> >device_fh);
> > -	if (unlikely(queue_id != VIRTIO_RXQ)) {
> > -		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this
> version.\n");
> > +	if (unlikely(check_virtqueue_idx(queue_id, 0,
> > +		VIRTIO_QNUM * dev->virt_qp_nb) == 0)) {
> > +		RTE_LOG(ERR, VHOST_DATA,
> > +			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
> > +			 __func__, dev->device_fh, queue_id);
> >  		return 0;
> >  	}
> >
> > -	vq = dev->virtqueue[VIRTIO_RXQ];
> > +	vq = dev->virtqueue[queue_id];
> >  	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
> >
> >  	/*
> > @@ -235,8 +250,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> > queue_id,  }
> >
> >  static inline uint32_t __attribute__((always_inline))
> > -copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
> > -	uint16_t res_end_idx, struct rte_mbuf *pkt)
> > +copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t queue_id,
> > +	uint16_t res_base_idx, uint16_t res_end_idx,
> > +	struct rte_mbuf *pkt)
> >  {
> >  	uint32_t vec_idx = 0;
> >  	uint32_t entry_success = 0;
> > @@ -264,8 +280,9 @@ copy_from_mbuf_to_vring(struct virtio_net *dev,
> uint16_t res_base_idx,
> >  	 * Convert from gpa to vva
> >  	 * (guest physical addr -> vhost virtual addr)
> >  	 */
> > -	vq = dev->virtqueue[VIRTIO_RXQ];
> > -	vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
> > +	vq = dev->virtqueue[queue_id];
> > +	vb_addr =
> > +		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
> >  	vb_hdr_addr = vb_addr;
> >
> >  	/* Prefetch buffer address. */
> > @@ -464,11 +481,15 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> > uint16_t queue_id,
> >
> >  	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
> >  		dev->device_fh);
> > -	if (unlikely(queue_id != VIRTIO_RXQ)) {
> > -		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this
> version.\n");
> > +	if (unlikely(check_virtqueue_idx(queue_id, 0,
> > +		VIRTIO_QNUM * dev->virt_qp_nb) == 0)) {
> > +		RTE_LOG(ERR, VHOST_DATA,
> > +			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
> > +			 __func__, dev->device_fh, queue_id);
> > +		return 0;
> >  	}
> >
> > -	vq = dev->virtqueue[VIRTIO_RXQ];
> > +	vq = dev->virtqueue[queue_id];
> >  	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
> >
> >  	if (count == 0)
> > @@ -509,7 +530,7 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
> >  							res_cur_idx);
> >  		} while (success == 0);
> >
> > -		entry_success = copy_from_mbuf_to_vring(dev,
> res_base_idx,
> > +		entry_success = copy_from_mbuf_to_vring(dev, queue_id,
> > +res_base_idx,
> >  			res_cur_idx, pkts[pkt_idx]);
> >
> >  		rte_compiler_barrier();
> > @@ -559,12 +580,15 @@ rte_vhost_dequeue_burst(struct virtio_net *dev,
> uint16_t queue_id,
> >  	uint16_t free_entries, entry_success = 0;
> >  	uint16_t avail_idx;
> >
> > -	if (unlikely(queue_id != VIRTIO_TXQ)) {
> > -		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this
> version.\n");
> > +	if (unlikely(check_virtqueue_idx(queue_id, 1,
> > +		VIRTIO_QNUM * dev->virt_qp_nb) == 0)) {
> > +		RTE_LOG(ERR, VHOST_DATA,
> > +			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
> > +			 __func__, dev->device_fh, queue_id);
> >  		return 0;
> >  	}
> >
> > -	vq = dev->virtqueue[VIRTIO_TXQ];
> > +	vq = dev->virtqueue[queue_id];
> >  	avail_idx =  *((volatile uint16_t *)&vq->avail->idx);
> >
> >  	/* If there are no available buffers then return. */ diff --git
> > a/lib/librte_vhost/vhost_user/vhost-net-user.c
> > b/lib/librte_vhost/vhost_user/vhost-net-user.c
> > index f406a94..3d7c373 100644
> > --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
> > +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> > @@ -383,7 +383,9 @@ vserver_message_handler(int connfd, void *dat, int
> *remove)
> >  		ops->set_owner(ctx);
> >  		break;
> >  	case VHOST_USER_RESET_OWNER:
> > -		ops->reset_owner(ctx);
> > +		RTE_LOG(INFO, VHOST_CONFIG,
> > +			"(%"PRIu64") VHOST_NET_RESET_OWNER\n", ctx.fh);
> > +		user_reset_owner(ctx, &msg.payload.state);
> >  		break;
> >
> >  	case VHOST_USER_SET_MEM_TABLE:
> > diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c
> > b/lib/librte_vhost/vhost_user/virtio-net-user.c
> > index c1ffc38..4c1d4df 100644
> > --- a/lib/librte_vhost/vhost_user/virtio-net-user.c
> > +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> > @@ -209,30 +209,46 @@ static int
> >  virtio_is_ready(struct virtio_net *dev)  {
> >  	struct vhost_virtqueue *rvq, *tvq;
> > +	uint32_t q_idx;
> >
> >  	/* mq support in future.*/
> > -	rvq = dev->virtqueue[VIRTIO_RXQ];
> > -	tvq = dev->virtqueue[VIRTIO_TXQ];
> > -	if (rvq && tvq && rvq->desc && tvq->desc &&
> > -		(rvq->kickfd != (eventfd_t)-1) &&
> > -		(rvq->callfd != (eventfd_t)-1) &&
> > -		(tvq->kickfd != (eventfd_t)-1) &&
> > -		(tvq->callfd != (eventfd_t)-1)) {
> > -		RTE_LOG(INFO, VHOST_CONFIG,
> > -			"virtio is now ready for processing.\n");
> > -		return 1;
> > +	for (q_idx = 0; q_idx < dev->virt_qp_nb; q_idx++) {
> > +		uint32_t virt_rx_q_idx = q_idx * VIRTIO_QNUM +
> VIRTIO_RXQ;
> > +		uint32_t virt_tx_q_idx = q_idx * VIRTIO_QNUM +
> VIRTIO_TXQ;
> > +
> > +		rvq = dev->virtqueue[virt_rx_q_idx];
> > +		tvq = dev->virtqueue[virt_tx_q_idx];
> > +		if ((rvq == NULL) || (tvq == NULL) ||
> > +			(rvq->desc == NULL) || (tvq->desc == NULL) ||
> > +			(rvq->kickfd == (eventfd_t)-1) ||
> > +			(rvq->callfd == (eventfd_t)-1) ||
> > +			(tvq->kickfd == (eventfd_t)-1) ||
> > +			(tvq->callfd == (eventfd_t)-1)) {
> > +			RTE_LOG(INFO, VHOST_CONFIG,
> > +				"virtio isn't ready for processing.\n");
> > +			return 0;
> > +		}
> >  	}
> >  	RTE_LOG(INFO, VHOST_CONFIG,
> > -		"virtio isn't ready for processing.\n");
> > -	return 0;
> > +		"virtio is now ready for processing.\n");
> > +	return 1;
> >  }
> >
> >  void
> >  user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg
> > *pmsg)  {
> >  	struct vhost_vring_file file;
> > +	struct virtio_net *dev = get_device(ctx);
> > +	uint32_t cur_qp_idx;
> >
> >  	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> > +	cur_qp_idx = file.index >> 1;
> > +
> > +	if (dev->virt_qp_nb < cur_qp_idx + 1) {
> > +		if (alloc_vring_queue_pair(dev, cur_qp_idx) == 0)
> > +			dev->virt_qp_nb = cur_qp_idx + 1;
> 
> Looks like it is missing vring initialization here.
> 
> 	if (dev->virt_qp_nb < cur_qp_idx + 1) {
> 		if (alloc_vring_queue_pair(dev, cur_qp_idx) == 0) {
> 			dev->virt_qp_nb = cur_qp_idx + 1;
> 			init_vring_queue_pair(dev, cur_qp_idx);

I have called the init_vring_queue_pair inside function alloc_vring_queue_pair,
It has same effect as your suggestion.

Thanks again
Changchun

> 		}
> 	}
> 
> 
> fbl
> 
> 
> > +	}
> > +
> >  	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
> >  		file.fd = -1;
> >  	else
> > @@ -290,13 +306,37 @@ user_get_vring_base(struct vhost_device_ctx ctx,
> >  	 * sent and only sent in vhost_vring_stop.
> >  	 * TODO: cleanup the vring, it isn't usable since here.
> >  	 */
> > -	if (((int)dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
> > -		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> > -		dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> > +	if (((int)dev->virtqueue[state->index]->kickfd) >= 0) {
> > +		close(dev->virtqueue[state->index]->kickfd);
> > +		dev->virtqueue[state->index]->kickfd = (eventfd_t)-1;
> >  	}
> > -	if (((int)dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
> > -		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> > -		dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> > +
> > +	return 0;
> > +}
> > +
> > +/*
> > + * when virtio is stopped, qemu will send us the RESET_OWNER message.
> > + */
> > +int
> > +user_reset_owner(struct vhost_device_ctx ctx,
> > +	struct vhost_vring_state *state)
> > +{
> > +	struct virtio_net *dev = get_device(ctx);
> > +
> > +	/* We have to stop the queue (virtio) if it is running. */
> > +	if (dev->flags & VIRTIO_DEV_RUNNING)
> > +		notify_ops->destroy_device(dev);
> > +
> > +	RTE_LOG(INFO, VHOST_CONFIG,
> > +		"reset owner --- state idx:%d state num:%d\n", state->index,
> state->num);
> > +	/*
> > +	 * Based on current qemu vhost-user implementation, this message
> is
> > +	 * sent and only sent in vhost_net_stop_one.
> > +	 * TODO: cleanup the vring, it isn't usable since here.
> > +	 */
> > +	if (((int)dev->virtqueue[state->index]->kickfd) >= 0) {
> > +		close(dev->virtqueue[state->index]->kickfd);
> > +		dev->virtqueue[state->index]->kickfd = (eventfd_t)-1;
> >  	}
> >
> >  	return 0;
> > diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h
> > b/lib/librte_vhost/vhost_user/virtio-net-user.h
> > index df24860..2429836 100644
> > --- a/lib/librte_vhost/vhost_user/virtio-net-user.h
> > +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> > @@ -46,4 +46,6 @@ void user_set_vring_kick(struct vhost_device_ctx,
> > struct VhostUserMsg *);  int user_get_vring_base(struct
> > vhost_device_ctx, struct vhost_vring_state *);
> >
> >  void user_destroy_device(struct vhost_device_ctx);
> > +
> > +int user_reset_owner(struct vhost_device_ctx ctx, struct
> > +vhost_vring_state *state);
> >  #endif
> > diff --git a/lib/librte_vhost/virtio-net.c
> > b/lib/librte_vhost/virtio-net.c index b520ec5..2a4b791 100644
> > --- a/lib/librte_vhost/virtio-net.c
> > +++ b/lib/librte_vhost/virtio-net.c
> > @@ -71,9 +71,10 @@ static struct virtio_net_config_ll *ll_root;
> > #define VHOST_SUPPORTED_FEATURES ((1ULL <<
> VIRTIO_NET_F_MRG_RXBUF) | \
> >  				(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
> >  				(1ULL << VIRTIO_NET_F_CTRL_RX) | \
> > -				(1ULL << VHOST_F_LOG_ALL))
> > -static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
> > +				(1ULL << VHOST_F_LOG_ALL) | \
> > +				(1ULL << VIRTIO_NET_F_MQ))
> >
> > +static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
> >
> >  /*
> >   * Converts QEMU virtual address to Vhost virtual address. This
> > function is @@ -182,6 +183,8 @@ add_config_ll_entry(struct
> > virtio_net_config_ll *new_ll_dev)  static void  cleanup_device(struct
> > virtio_net *dev)  {
> > +	uint32_t qp_idx;
> > +
> >  	/* Unmap QEMU memory file if mapped. */
> >  	if (dev->mem) {
> >  		munmap((void *)(uintptr_t)dev->mem->mapped_address,
> > @@ -190,14 +193,18 @@ cleanup_device(struct virtio_net *dev)
> >  	}
> >
> >  	/* Close any event notifiers opened by device. */
> > -	if ((int)dev->virtqueue[VIRTIO_RXQ]->callfd >= 0)
> > -		close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
> > -	if ((int)dev->virtqueue[VIRTIO_RXQ]->kickfd >= 0)
> > -		close((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);
> > -	if ((int)dev->virtqueue[VIRTIO_TXQ]->callfd >= 0)
> > -		close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
> > -	if ((int)dev->virtqueue[VIRTIO_TXQ]->kickfd >= 0)
> > -		close((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);
> > +	for (qp_idx = 0; qp_idx < dev->virt_qp_nb; qp_idx++) {
> > +		uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM +
> VIRTIO_RXQ;
> > +		uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM +
> VIRTIO_TXQ;
> > +		if ((int)dev->virtqueue[virt_rx_q_idx]->callfd >= 0)
> > +			close((int)dev->virtqueue[virt_rx_q_idx]->callfd);
> > +		if ((int)dev->virtqueue[virt_rx_q_idx]->kickfd >= 0)
> > +			close((int)dev->virtqueue[virt_rx_q_idx]->kickfd);
> > +		if ((int)dev->virtqueue[virt_tx_q_idx]->callfd >= 0)
> > +			close((int)dev->virtqueue[virt_tx_q_idx]->callfd);
> > +		if ((int)dev->virtqueue[virt_tx_q_idx]->kickfd >= 0)
> > +			close((int)dev->virtqueue[virt_tx_q_idx]->kickfd);
> > +	}
> >  }
> >
> >  /*
> > @@ -206,9 +213,17 @@ cleanup_device(struct virtio_net *dev)  static
> > void  free_device(struct virtio_net_config_ll *ll_dev)  {
> > -	/* Free any malloc'd memory */
> > -	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
> > -	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
> > +	uint32_t qp_idx;
> > +
> > +	/*
> > +	 * Free any malloc'd memory.
> > +	 */
> > +	/* Free every queue pair. */
> > +	for (qp_idx = 0; qp_idx < ll_dev->dev.virt_qp_nb; qp_idx++) {
> > +		uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM +
> VIRTIO_RXQ;
> > +		rte_free(ll_dev->dev.virtqueue[virt_rx_q_idx]);
> > +	}
> > +	rte_free(ll_dev->dev.virtqueue);
> >  	rte_free(ll_dev);
> >  }
> >
> > @@ -242,6 +257,27 @@ rm_config_ll_entry(struct virtio_net_config_ll
> > *ll_dev,  }
> >
> >  /*
> > + *  Initialise all variables in vring queue pair.
> > + */
> > +static void
> > +init_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx) {
> > +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> > +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> > +	memset(dev->virtqueue[virt_rx_q_idx], 0, sizeof(struct
> vhost_virtqueue));
> > +	memset(dev->virtqueue[virt_tx_q_idx], 0, sizeof(struct
> > +vhost_virtqueue));
> > +
> > +	dev->virtqueue[virt_rx_q_idx]->kickfd = (eventfd_t)-1;
> > +	dev->virtqueue[virt_rx_q_idx]->callfd = (eventfd_t)-1;
> > +	dev->virtqueue[virt_tx_q_idx]->kickfd = (eventfd_t)-1;
> > +	dev->virtqueue[virt_tx_q_idx]->callfd = (eventfd_t)-1;
> > +
> > +	/* Backends are set to -1 indicating an inactive device. */
> > +	dev->virtqueue[virt_rx_q_idx]->backend = VIRTIO_DEV_STOPPED;
> > +	dev->virtqueue[virt_tx_q_idx]->backend = VIRTIO_DEV_STOPPED; }
> > +
> > +/*
> >   *  Initialise all variables in device structure.
> >   */
> >  static void
> > @@ -258,17 +294,34 @@ init_device(struct virtio_net *dev)
> >  	/* Set everything to 0. */
> >  	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
> >  		(sizeof(struct virtio_net) - (size_t)vq_offset));
> > -	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct
> vhost_virtqueue));
> > -	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct
> vhost_virtqueue));
> >
> > -	dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> > -	dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> > -	dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> > -	dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> > +	init_vring_queue_pair(dev, 0);
> > +	dev->virt_qp_nb = 1;
> > +}
> > +
> > +/*
> > + *  Alloc mem for vring queue pair.
> > + */
> > +int
> > +alloc_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx) {
> > +	struct vhost_virtqueue *virtqueue = NULL;
> > +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> > +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> >
> > -	/* Backends are set to -1 indicating an inactive device. */
> > -	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> > -	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> > +	virtqueue = rte_malloc(NULL, sizeof(struct vhost_virtqueue) *
> VIRTIO_QNUM, 0);
> > +	if (virtqueue == NULL) {
> > +		RTE_LOG(ERR, VHOST_CONFIG,
> > +			"Failed to allocate memory for virt qp:%d.\n",
> qp_idx);
> > +		return -1;
> > +	}
> > +
> > +	dev->virtqueue[virt_rx_q_idx] = virtqueue;
> > +	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
> > +
> > +	init_vring_queue_pair(dev, qp_idx);
> > +
> > +	return 0;
> >  }
> >
> >  /*
> > @@ -280,7 +333,6 @@ static int
> >  new_device(struct vhost_device_ctx ctx)  {
> >  	struct virtio_net_config_ll *new_ll_dev;
> > -	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
> >
> >  	/* Setup device and virtqueues. */
> >  	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll),
> > 0); @@ -291,28 +343,22 @@ new_device(struct vhost_device_ctx ctx)
> >  		return -1;
> >  	}
> >
> > -	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> > -	if (virtqueue_rx == NULL) {
> > -		rte_free(new_ll_dev);
> > +	new_ll_dev->dev.virtqueue =
> > +		rte_malloc(NULL, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX *
> sizeof(struct vhost_virtqueue *), 0);
> > +	if (new_ll_dev->dev.virtqueue == NULL) {
> >  		RTE_LOG(ERR, VHOST_CONFIG,
> > -			"(%"PRIu64") Failed to allocate memory for rxq.\n",
> > +			"(%"PRIu64") Failed to allocate memory for
> dev.virtqueue.\n",
> >  			ctx.fh);
> > +		rte_free(new_ll_dev);
> >  		return -1;
> >  	}
> >
> > -	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> > -	if (virtqueue_tx == NULL) {
> > -		rte_free(virtqueue_rx);
> > +	if (alloc_vring_queue_pair(&new_ll_dev->dev, 0) == -1) {
> > +		rte_free(new_ll_dev->dev.virtqueue);
> >  		rte_free(new_ll_dev);
> > -		RTE_LOG(ERR, VHOST_CONFIG,
> > -			"(%"PRIu64") Failed to allocate memory for txq.\n",
> > -			ctx.fh);
> >  		return -1;
> >  	}
> >
> > -	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
> > -	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
> > -
> >  	/* Initialise device and virtqueues. */
> >  	init_device(&new_ll_dev->dev);
> >
> > @@ -396,7 +442,7 @@ set_owner(struct vhost_device_ctx ctx)
> >   * Called from CUSE IOCTL: VHOST_RESET_OWNER
> >   */
> >  static int
> > -reset_owner(struct vhost_device_ctx ctx)
> > +reset_owner(__rte_unused struct vhost_device_ctx ctx)
> >  {
> >  	struct virtio_net_config_ll *ll_dev;
> >
> > @@ -434,6 +480,7 @@ static int
> >  set_features(struct vhost_device_ctx ctx, uint64_t *pu)  {
> >  	struct virtio_net *dev;
> > +	uint32_t q_idx;
> >
> >  	dev = get_device(ctx);
> >  	if (dev == NULL)
> > @@ -445,22 +492,26 @@ set_features(struct vhost_device_ctx ctx,
> uint64_t *pu)
> >  	dev->features = *pu;
> >
> >  	/* Set the vhost_hlen depending on if VIRTIO_NET_F_MRG_RXBUF
> is set. */
> > -	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> > -		LOG_DEBUG(VHOST_CONFIG,
> > -			"(%"PRIu64") Mergeable RX buffers enabled\n",
> > -			dev->device_fh);
> > -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> > -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> > -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > -	} else {
> > -		LOG_DEBUG(VHOST_CONFIG,
> > -			"(%"PRIu64") Mergeable RX buffers disabled\n",
> > -			dev->device_fh);
> > -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> > -			sizeof(struct virtio_net_hdr);
> > -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> > -			sizeof(struct virtio_net_hdr);
> > +	for (q_idx = 0; q_idx < dev->virt_qp_nb; q_idx++) {
> > +		uint32_t virt_rx_q_idx = q_idx * VIRTIO_QNUM +
> VIRTIO_RXQ;
> > +		uint32_t virt_tx_q_idx = q_idx * VIRTIO_QNUM +
> VIRTIO_TXQ;
> > +		if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> > +			LOG_DEBUG(VHOST_CONFIG,
> > +				"(%"PRIu64") Mergeable RX buffers
> enabled\n",
> > +				dev->device_fh);
> > +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> > +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> > +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > +		} else {
> > +			LOG_DEBUG(VHOST_CONFIG,
> > +				"(%"PRIu64") Mergeable RX buffers
> disabled\n",
> > +				dev->device_fh);
> > +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> > +				sizeof(struct virtio_net_hdr);
> > +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> > +				sizeof(struct virtio_net_hdr);
> > +		}
> >  	}
> >  	return 0;
> >  }
> > @@ -826,6 +877,14 @@ int rte_vhost_feature_enable(uint64_t
> feature_mask)
> >  	return -1;
> >  }
> >
> > +uint16_t rte_vhost_qp_num_get(struct virtio_net *dev) {
> > +	if (dev == NULL)
> > +		return 0;
> > +
> > +	return dev->virt_qp_nb;
> > +}
> > +
> >  /*
> >   * Register ops so that we can add/remove device to data core.
> >   */
> > --
> > 1.8.4.2
> >
  
Flavio Leitner Aug. 14, 2015, 12:16 p.m. UTC | #3
On Fri, Aug 14, 2015 at 02:29:51AM +0000, Ouyang, Changchun wrote:
> > -----Original Message-----
> > From: Flavio Leitner [mailto:fbl@sysclose.org]
> > Sent: Thursday, August 13, 2015 8:52 PM
> > To: Ouyang, Changchun
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v4 02/12] vhost: support multiple queues in
> > virtio dev
> > 
> > On Wed, Aug 12, 2015 at 04:02:37PM +0800, Ouyang Changchun wrote:
> > >  	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> > > +	cur_qp_idx = file.index >> 1;
> > > +
> > > +	if (dev->virt_qp_nb < cur_qp_idx + 1) {
> > > +		if (alloc_vring_queue_pair(dev, cur_qp_idx) == 0)
> > > +			dev->virt_qp_nb = cur_qp_idx + 1;
> > 
> > Looks like it is missing vring initialization here.
> > 
> > 	if (dev->virt_qp_nb < cur_qp_idx + 1) {
> > 		if (alloc_vring_queue_pair(dev, cur_qp_idx) == 0) {
> > 			dev->virt_qp_nb = cur_qp_idx + 1;
> > 			init_vring_queue_pair(dev, cur_qp_idx);
> 
> I have called the init_vring_queue_pair inside function alloc_vring_queue_pair,
> It has same effect as your suggestion.

Yup, I missed that.
Thanks!
fbl
  
Yuanhan Liu Aug. 19, 2015, 3:52 a.m. UTC | #4
Hi Changchun,

On Wed, Aug 12, 2015 at 04:02:37PM +0800, Ouyang Changchun wrote:
> Each virtio device could have multiple queues, say 2 or 4, at most 8.
> Enabling this feature allows virtio device/port on guest has the ability to
> use different vCPU to receive/transmit packets from/to each queue.
> 
> In multiple queues mode, virtio device readiness means all queues of
> this virtio device are ready, cleanup/destroy a virtio device also
> requires clearing all queues belong to it.
> 
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
[snip ..]
>  /*
> + *  Initialise all variables in vring queue pair.
> + */
> +static void
> +init_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx)
> +{
> +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> +	memset(dev->virtqueue[virt_rx_q_idx], 0, sizeof(struct vhost_virtqueue));
> +	memset(dev->virtqueue[virt_tx_q_idx], 0, sizeof(struct vhost_virtqueue));
> +
> +	dev->virtqueue[virt_rx_q_idx]->kickfd = (eventfd_t)-1;
> +	dev->virtqueue[virt_rx_q_idx]->callfd = (eventfd_t)-1;
> +	dev->virtqueue[virt_tx_q_idx]->kickfd = (eventfd_t)-1;
> +	dev->virtqueue[virt_tx_q_idx]->callfd = (eventfd_t)-1;
> +
> +	/* Backends are set to -1 indicating an inactive device. */
> +	dev->virtqueue[virt_rx_q_idx]->backend = VIRTIO_DEV_STOPPED;
> +	dev->virtqueue[virt_tx_q_idx]->backend = VIRTIO_DEV_STOPPED;
> +}
> +
> +/*
>   *  Initialise all variables in device structure.
>   */
>  static void
> @@ -258,17 +294,34 @@ init_device(struct virtio_net *dev)
>  	/* Set everything to 0. */

There is a trick here. Let me fill the context first:

283 static void
284 init_device(struct virtio_net *dev)
285 {
286         uint64_t vq_offset;
287
288         /*
289          * Virtqueues have already been malloced so
290          * we don't want to set them to NULL.
291          */
292         vq_offset = offsetof(struct virtio_net, mem);
293
294         /* Set everything to 0. */
295         memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
296                 (sizeof(struct virtio_net) - (size_t)vq_offset));
297
298         init_vring_queue_pair(dev, 0);

This piece of code's intention is to memset everything to zero, except
the `virtqueue' field, for, as the comment stated, we have already
allocated virtqueue.

It works only when `virtqueue' field is before `mem' field, and it was
before:

    struct virtio_net {
            struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM];        /**< Contains all virtqueue information. */
            struct virtio_memory    *mem;           /**< QEMU memory and memory region information. */
            ...

After this patch, it becomes:

    struct virtio_net {
            struct virtio_memory    *mem;           /**< QEMU memory and memory region information. */
            struct vhost_virtqueue  **virtqueue;    /**< Contains all virtqueue information. */
            ...


Which actually wipes all stuff inside `struct virtio_net`, resulting to 
setting `virtqueue' to NULL as well.

While reading the code(without you patch applied), I thought that it's
error-prone, as it is very likely that someone else besides the author
doesn't know such undocumented rule. And you just gave me an example :)

Huawei, I'm proposing a fix to call rte_zmalloc() for allocating new_ll_dev
to get rid of such issue. What do you think?

	--yliu



>  	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
>  		(sizeof(struct virtio_net) - (size_t)vq_offset));
> -	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
> -	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
>  
> -	dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> -	dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> -	dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> -	dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> +	init_vring_queue_pair(dev, 0);
> +	dev->virt_qp_nb = 1;
> +}
> +
> +/*
> + *  Alloc mem for vring queue pair.
> + */
> +int
> +alloc_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx)
> +{
> +	struct vhost_virtqueue *virtqueue = NULL;
> +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
>  
> -	/* Backends are set to -1 indicating an inactive device. */
> -	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> -	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> +	virtqueue = rte_malloc(NULL, sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
> +	if (virtqueue == NULL) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
> +		return -1;
> +	}
> +
> +	dev->virtqueue[virt_rx_q_idx] = virtqueue;
> +	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
> +
> +	init_vring_queue_pair(dev, qp_idx);
> +
> +	return 0;
>  }
>  
>  /*
> @@ -280,7 +333,6 @@ static int
>  new_device(struct vhost_device_ctx ctx)
>  {
>  	struct virtio_net_config_ll *new_ll_dev;
> -	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
>  
>  	/* Setup device and virtqueues. */
>  	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
> @@ -291,28 +343,22 @@ new_device(struct vhost_device_ctx ctx)
>  		return -1;
>  	}
>  
> -	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> -	if (virtqueue_rx == NULL) {
> -		rte_free(new_ll_dev);
> +	new_ll_dev->dev.virtqueue =
> +		rte_malloc(NULL, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX * sizeof(struct vhost_virtqueue *), 0);
> +	if (new_ll_dev->dev.virtqueue == NULL) {
>  		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Failed to allocate memory for rxq.\n",
> +			"(%"PRIu64") Failed to allocate memory for dev.virtqueue.\n",
>  			ctx.fh);
> +		rte_free(new_ll_dev);
>  		return -1;
>  	}
>  
> -	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> -	if (virtqueue_tx == NULL) {
> -		rte_free(virtqueue_rx);
> +	if (alloc_vring_queue_pair(&new_ll_dev->dev, 0) == -1) {
> +		rte_free(new_ll_dev->dev.virtqueue);
>  		rte_free(new_ll_dev);
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Failed to allocate memory for txq.\n",
> -			ctx.fh);
>  		return -1;
>  	}
>  
> -	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
> -	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
> -
>  	/* Initialise device and virtqueues. */
>  	init_device(&new_ll_dev->dev);
>  
> @@ -396,7 +442,7 @@ set_owner(struct vhost_device_ctx ctx)
>   * Called from CUSE IOCTL: VHOST_RESET_OWNER
>   */
>  static int
> -reset_owner(struct vhost_device_ctx ctx)
> +reset_owner(__rte_unused struct vhost_device_ctx ctx)
>  {
>  	struct virtio_net_config_ll *ll_dev;
>  
> @@ -434,6 +480,7 @@ static int
>  set_features(struct vhost_device_ctx ctx, uint64_t *pu)
>  {
>  	struct virtio_net *dev;
> +	uint32_t q_idx;
>  
>  	dev = get_device(ctx);
>  	if (dev == NULL)
> @@ -445,22 +492,26 @@ set_features(struct vhost_device_ctx ctx, uint64_t *pu)
>  	dev->features = *pu;
>  
>  	/* Set the vhost_hlen depending on if VIRTIO_NET_F_MRG_RXBUF is set. */
> -	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") Mergeable RX buffers enabled\n",
> -			dev->device_fh);
> -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> -	} else {
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") Mergeable RX buffers disabled\n",
> -			dev->device_fh);
> -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> -			sizeof(struct virtio_net_hdr);
> -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> -			sizeof(struct virtio_net_hdr);
> +	for (q_idx = 0; q_idx < dev->virt_qp_nb; q_idx++) {
> +		uint32_t virt_rx_q_idx = q_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> +		uint32_t virt_tx_q_idx = q_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> +		if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> +			LOG_DEBUG(VHOST_CONFIG,
> +				"(%"PRIu64") Mergeable RX buffers enabled\n",
> +				dev->device_fh);
> +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> +		} else {
> +			LOG_DEBUG(VHOST_CONFIG,
> +				"(%"PRIu64") Mergeable RX buffers disabled\n",
> +				dev->device_fh);
> +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> +				sizeof(struct virtio_net_hdr);
> +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> +				sizeof(struct virtio_net_hdr);
> +		}
>  	}
>  	return 0;
>  }
> @@ -826,6 +877,14 @@ int rte_vhost_feature_enable(uint64_t feature_mask)
>  	return -1;
>  }
>  
> +uint16_t rte_vhost_qp_num_get(struct virtio_net *dev)
> +{
> +	if (dev == NULL)
> +		return 0;
> +
> +	return dev->virt_qp_nb;
> +}
> +
>  /*
>   * Register ops so that we can add/remove device to data core.
>   */
> -- 
> 1.8.4.2
>
  
Ouyang Changchun Aug. 19, 2015, 5:54 a.m. UTC | #5
Hi Yuanhan,

> -----Original Message-----
> From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> Sent: Wednesday, August 19, 2015 11:53 AM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org; Xie, Huawei
> Subject: Re: [dpdk-dev] [PATCH v4 02/12] vhost: support multiple queues in
> virtio dev
> 
> Hi Changchun,
> 
> On Wed, Aug 12, 2015 at 04:02:37PM +0800, Ouyang Changchun wrote:
> > Each virtio device could have multiple queues, say 2 or 4, at most 8.
> > Enabling this feature allows virtio device/port on guest has the
> > ability to use different vCPU to receive/transmit packets from/to each
> queue.
> >
> > In multiple queues mode, virtio device readiness means all queues of
> > this virtio device are ready, cleanup/destroy a virtio device also
> > requires clearing all queues belong to it.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> [snip ..]
> >  /*
> > + *  Initialise all variables in vring queue pair.
> > + */
> > +static void
> > +init_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx) {
> > +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> > +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> > +	memset(dev->virtqueue[virt_rx_q_idx], 0, sizeof(struct
> vhost_virtqueue));
> > +	memset(dev->virtqueue[virt_tx_q_idx], 0, sizeof(struct
> > +vhost_virtqueue));
> > +
> > +	dev->virtqueue[virt_rx_q_idx]->kickfd = (eventfd_t)-1;
> > +	dev->virtqueue[virt_rx_q_idx]->callfd = (eventfd_t)-1;
> > +	dev->virtqueue[virt_tx_q_idx]->kickfd = (eventfd_t)-1;
> > +	dev->virtqueue[virt_tx_q_idx]->callfd = (eventfd_t)-1;
> > +
> > +	/* Backends are set to -1 indicating an inactive device. */
> > +	dev->virtqueue[virt_rx_q_idx]->backend = VIRTIO_DEV_STOPPED;
> > +	dev->virtqueue[virt_tx_q_idx]->backend = VIRTIO_DEV_STOPPED; }
> > +
> > +/*
> >   *  Initialise all variables in device structure.
> >   */
> >  static void
> > @@ -258,17 +294,34 @@ init_device(struct virtio_net *dev)
> >  	/* Set everything to 0. */
> 
> There is a trick here. Let me fill the context first:
> 
> 283 static void
> 284 init_device(struct virtio_net *dev)
> 285 {
> 286         uint64_t vq_offset;
> 287
> 288         /*
> 289          * Virtqueues have already been malloced so
> 290          * we don't want to set them to NULL.
> 291          */
> 292         vq_offset = offsetof(struct virtio_net, mem);
> 293
> 294         /* Set everything to 0. */
> 295         memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
> 296                 (sizeof(struct virtio_net) - (size_t)vq_offset));
> 297
> 298         init_vring_queue_pair(dev, 0);
> 
> This piece of code's intention is to memset everything to zero, except the
> `virtqueue' field, for, as the comment stated, we have already allocated
> virtqueue.
> 
> It works only when `virtqueue' field is before `mem' field, and it was
> before:
> 
>     struct virtio_net {
>             struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM];        /**< Contains
> all virtqueue information. */
>             struct virtio_memory    *mem;           /**< QEMU memory and memory
> region information. */
>             ...
> 
> After this patch, it becomes:
> 
>     struct virtio_net {
>             struct virtio_memory    *mem;           /**< QEMU memory and memory
> region information. */
>             struct vhost_virtqueue  **virtqueue;    /**< Contains all virtqueue
> information. */
>             ...
> 
> 
> Which actually wipes all stuff inside `struct virtio_net`, resulting to setting
> `virtqueue' to NULL as well.
> 
> While reading the code(without you patch applied), I thought that it's error-
> prone, as it is very likely that someone else besides the author doesn't know
> such undocumented rule. And you just gave me an example :)
> 
> Huawei, I'm proposing a fix to call rte_zmalloc() for allocating new_ll_dev to
> get rid of such issue. What do you think?
> 
> 	--yliu
> 
> 

Suggest you finish the latter patch review:
[PATCH v4 04/12] vhost: set memory layout for multiple queues mode,
After you finish reviewing this patch, I think you will change your mind :-)

This patch resolve what you concern.

> 
> >  	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
> >  		(sizeof(struct virtio_net) - (size_t)vq_offset));
> > -	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct
> vhost_virtqueue));
> > -	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct
> vhost_virtqueue));
> >
> > -	dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> > -	dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> > -	dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> > -	dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> > +	init_vring_queue_pair(dev, 0);
> > +	dev->virt_qp_nb = 1;
> > +}
> > +
> > +/*
> > + *  Alloc mem for vring queue pair.
> > + */
> > +int
> > +alloc_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx) {
> > +	struct vhost_virtqueue *virtqueue = NULL;
> > +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> > +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> >
> > -	/* Backends are set to -1 indicating an inactive device. */
> > -	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> > -	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> > +	virtqueue = rte_malloc(NULL, sizeof(struct vhost_virtqueue) *
> VIRTIO_QNUM, 0);
> > +	if (virtqueue == NULL) {
> > +		RTE_LOG(ERR, VHOST_CONFIG,
> > +			"Failed to allocate memory for virt qp:%d.\n",
> qp_idx);
> > +		return -1;
> > +	}
> > +
> > +	dev->virtqueue[virt_rx_q_idx] = virtqueue;
> > +	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
> > +
> > +	init_vring_queue_pair(dev, qp_idx);
> > +
> > +	return 0;
> >  }
> >
> >  /*
> > @@ -280,7 +333,6 @@ static int
> >  new_device(struct vhost_device_ctx ctx)  {
> >  	struct virtio_net_config_ll *new_ll_dev;
> > -	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
> >
> >  	/* Setup device and virtqueues. */
> >  	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll),
> > 0); @@ -291,28 +343,22 @@ new_device(struct vhost_device_ctx ctx)
> >  		return -1;
> >  	}
> >
> > -	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> > -	if (virtqueue_rx == NULL) {
> > -		rte_free(new_ll_dev);
> > +	new_ll_dev->dev.virtqueue =
> > +		rte_malloc(NULL, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX *
> sizeof(struct vhost_virtqueue *), 0);
> > +	if (new_ll_dev->dev.virtqueue == NULL) {
> >  		RTE_LOG(ERR, VHOST_CONFIG,
> > -			"(%"PRIu64") Failed to allocate memory for rxq.\n",
> > +			"(%"PRIu64") Failed to allocate memory for
> dev.virtqueue.\n",
> >  			ctx.fh);
> > +		rte_free(new_ll_dev);
> >  		return -1;
> >  	}
> >
> > -	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> > -	if (virtqueue_tx == NULL) {
> > -		rte_free(virtqueue_rx);
> > +	if (alloc_vring_queue_pair(&new_ll_dev->dev, 0) == -1) {
> > +		rte_free(new_ll_dev->dev.virtqueue);
> >  		rte_free(new_ll_dev);
> > -		RTE_LOG(ERR, VHOST_CONFIG,
> > -			"(%"PRIu64") Failed to allocate memory for txq.\n",
> > -			ctx.fh);
> >  		return -1;
> >  	}
> >
> > -	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
> > -	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
> > -
> >  	/* Initialise device and virtqueues. */
> >  	init_device(&new_ll_dev->dev);
> >
> > @@ -396,7 +442,7 @@ set_owner(struct vhost_device_ctx ctx)
> >   * Called from CUSE IOCTL: VHOST_RESET_OWNER
> >   */
> >  static int
> > -reset_owner(struct vhost_device_ctx ctx)
> > +reset_owner(__rte_unused struct vhost_device_ctx ctx)
> >  {
> >  	struct virtio_net_config_ll *ll_dev;
> >
> > @@ -434,6 +480,7 @@ static int
> >  set_features(struct vhost_device_ctx ctx, uint64_t *pu)  {
> >  	struct virtio_net *dev;
> > +	uint32_t q_idx;
> >
> >  	dev = get_device(ctx);
> >  	if (dev == NULL)
> > @@ -445,22 +492,26 @@ set_features(struct vhost_device_ctx ctx,
> uint64_t *pu)
> >  	dev->features = *pu;
> >
> >  	/* Set the vhost_hlen depending on if VIRTIO_NET_F_MRG_RXBUF
> is set. */
> > -	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> > -		LOG_DEBUG(VHOST_CONFIG,
> > -			"(%"PRIu64") Mergeable RX buffers enabled\n",
> > -			dev->device_fh);
> > -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> > -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> > -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > -	} else {
> > -		LOG_DEBUG(VHOST_CONFIG,
> > -			"(%"PRIu64") Mergeable RX buffers disabled\n",
> > -			dev->device_fh);
> > -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> > -			sizeof(struct virtio_net_hdr);
> > -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> > -			sizeof(struct virtio_net_hdr);
> > +	for (q_idx = 0; q_idx < dev->virt_qp_nb; q_idx++) {
> > +		uint32_t virt_rx_q_idx = q_idx * VIRTIO_QNUM +
> VIRTIO_RXQ;
> > +		uint32_t virt_tx_q_idx = q_idx * VIRTIO_QNUM +
> VIRTIO_TXQ;
> > +		if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> > +			LOG_DEBUG(VHOST_CONFIG,
> > +				"(%"PRIu64") Mergeable RX buffers
> enabled\n",
> > +				dev->device_fh);
> > +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> > +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> > +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > +		} else {
> > +			LOG_DEBUG(VHOST_CONFIG,
> > +				"(%"PRIu64") Mergeable RX buffers
> disabled\n",
> > +				dev->device_fh);
> > +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> > +				sizeof(struct virtio_net_hdr);
> > +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> > +				sizeof(struct virtio_net_hdr);
> > +		}
> >  	}
> >  	return 0;
> >  }
> > @@ -826,6 +877,14 @@ int rte_vhost_feature_enable(uint64_t
> feature_mask)
> >  	return -1;
> >  }
> >
> > +uint16_t rte_vhost_qp_num_get(struct virtio_net *dev) {
> > +	if (dev == NULL)
> > +		return 0;
> > +
> > +	return dev->virt_qp_nb;
> > +}
> > +
> >  /*
> >   * Register ops so that we can add/remove device to data core.
> >   */
> > --
> > 1.8.4.2
> >
  
Yuanhan Liu Aug. 19, 2015, 6:28 a.m. UTC | #6
On Wed, Aug 19, 2015 at 05:54:09AM +0000, Ouyang, Changchun wrote:
> Hi Yuanhan,
> 
> > -----Original Message-----
> > From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> > Sent: Wednesday, August 19, 2015 11:53 AM
> > To: Ouyang, Changchun
> > Cc: dev@dpdk.org; Xie, Huawei
> > Subject: Re: [dpdk-dev] [PATCH v4 02/12] vhost: support multiple queues in
> > virtio dev
> > 
> > Hi Changchun,
> > 
> > On Wed, Aug 12, 2015 at 04:02:37PM +0800, Ouyang Changchun wrote:
> > > Each virtio device could have multiple queues, say 2 or 4, at most 8.
> > > Enabling this feature allows virtio device/port on guest has the
> > > ability to use different vCPU to receive/transmit packets from/to each
> > queue.
> > >
> > > In multiple queues mode, virtio device readiness means all queues of
> > > this virtio device are ready, cleanup/destroy a virtio device also
> > > requires clearing all queues belong to it.
> > >
> > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > ---
> > [snip ..]
> > >  /*
> > > + *  Initialise all variables in vring queue pair.
> > > + */
> > > +static void
> > > +init_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx) {
> > > +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> > > +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> > > +	memset(dev->virtqueue[virt_rx_q_idx], 0, sizeof(struct
> > vhost_virtqueue));
> > > +	memset(dev->virtqueue[virt_tx_q_idx], 0, sizeof(struct
> > > +vhost_virtqueue));
> > > +
> > > +	dev->virtqueue[virt_rx_q_idx]->kickfd = (eventfd_t)-1;
> > > +	dev->virtqueue[virt_rx_q_idx]->callfd = (eventfd_t)-1;
> > > +	dev->virtqueue[virt_tx_q_idx]->kickfd = (eventfd_t)-1;
> > > +	dev->virtqueue[virt_tx_q_idx]->callfd = (eventfd_t)-1;
> > > +
> > > +	/* Backends are set to -1 indicating an inactive device. */
> > > +	dev->virtqueue[virt_rx_q_idx]->backend = VIRTIO_DEV_STOPPED;
> > > +	dev->virtqueue[virt_tx_q_idx]->backend = VIRTIO_DEV_STOPPED; }
> > > +
> > > +/*
> > >   *  Initialise all variables in device structure.
> > >   */
> > >  static void
> > > @@ -258,17 +294,34 @@ init_device(struct virtio_net *dev)
> > >  	/* Set everything to 0. */
> > 
> > There is a trick here. Let me fill the context first:
> > 
> > 283 static void
> > 284 init_device(struct virtio_net *dev)
> > 285 {
> > 286         uint64_t vq_offset;
> > 287
> > 288         /*
> > 289          * Virtqueues have already been malloced so
> > 290          * we don't want to set them to NULL.
> > 291          */
> > 292         vq_offset = offsetof(struct virtio_net, mem);
> > 293
> > 294         /* Set everything to 0. */
> > 295         memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
> > 296                 (sizeof(struct virtio_net) - (size_t)vq_offset));
> > 297
> > 298         init_vring_queue_pair(dev, 0);
> > 
> > This piece of code's intention is to memset everything to zero, except the
> > `virtqueue' field, for, as the comment stated, we have already allocated
> > virtqueue.
> > 
> > It works only when `virtqueue' field is before `mem' field, and it was
> > before:
> > 
> >     struct virtio_net {
> >             struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM];        /**< Contains
> > all virtqueue information. */
> >             struct virtio_memory    *mem;           /**< QEMU memory and memory
> > region information. */
> >             ...
> > 
> > After this patch, it becomes:
> > 
> >     struct virtio_net {
> >             struct virtio_memory    *mem;           /**< QEMU memory and memory
> > region information. */
> >             struct vhost_virtqueue  **virtqueue;    /**< Contains all virtqueue
> > information. */
> >             ...
> > 
> > 
> > Which actually wipes all stuff inside `struct virtio_net`, resulting to setting
> > `virtqueue' to NULL as well.
> > 
> > While reading the code(without you patch applied), I thought that it's error-
> > prone, as it is very likely that someone else besides the author doesn't know
> > such undocumented rule. And you just gave me an example :)
> > 
> > Huawei, I'm proposing a fix to call rte_zmalloc() for allocating new_ll_dev to
> > get rid of such issue. What do you think?
> > 
> > 	--yliu
> > 
> > 
> 
> Suggest you finish the latter patch review:
> [PATCH v4 04/12] vhost: set memory layout for multiple queues mode,
> After you finish reviewing this patch, I think you will change your mind :-)
> 
> This patch resolve what you concern.

Sorry that I hadn't gone that far yet. And yes, I see. I found that you
moved the barrier to `features' field, which puts the `virtqueue' field
back to the "do not set to zero" zone.

It's still an undocumented rule, and hence, error prone, IMO. But, you
reminded me that init_device() will be invoked at other place else(reset_owner()).
Hence, my solution won't work, either.

I'm wondering saving the `virtqueue'(and `mem_arr' from patch 04/12)
explicitly before memset() and restoring them after that, to get rid of
the undocumented rule. It may become uglier with more and more fields
need to be saved this way. But judging that we have two fields so far,
I'm kind of okay to that.

What do you think then? If that doesn't work, we should add some comments
inside the virtio_net struct at least, or even add a build time check.

	--yliu

> > 
> > >  	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
> > >  		(sizeof(struct virtio_net) - (size_t)vq_offset));
> > > -	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct
> > vhost_virtqueue));
> > > -	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct
> > vhost_virtqueue));
> > >
> > > -	dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> > > -	dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> > > -	dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> > > -	dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> > > +	init_vring_queue_pair(dev, 0);
> > > +	dev->virt_qp_nb = 1;
> > > +}
> > > +
> > > +/*
> > > + *  Alloc mem for vring queue pair.
> > > + */
> > > +int
> > > +alloc_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx) {
> > > +	struct vhost_virtqueue *virtqueue = NULL;
> > > +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> > > +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> > >
> > > -	/* Backends are set to -1 indicating an inactive device. */
> > > -	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> > > -	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> > > +	virtqueue = rte_malloc(NULL, sizeof(struct vhost_virtqueue) *
> > VIRTIO_QNUM, 0);
> > > +	if (virtqueue == NULL) {
> > > +		RTE_LOG(ERR, VHOST_CONFIG,
> > > +			"Failed to allocate memory for virt qp:%d.\n",
> > qp_idx);
> > > +		return -1;
> > > +	}
> > > +
> > > +	dev->virtqueue[virt_rx_q_idx] = virtqueue;
> > > +	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
> > > +
> > > +	init_vring_queue_pair(dev, qp_idx);
> > > +
> > > +	return 0;
> > >  }
> > >
> > >  /*
> > > @@ -280,7 +333,6 @@ static int
> > >  new_device(struct vhost_device_ctx ctx)  {
> > >  	struct virtio_net_config_ll *new_ll_dev;
> > > -	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
> > >
> > >  	/* Setup device and virtqueues. */
> > >  	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll),
> > > 0); @@ -291,28 +343,22 @@ new_device(struct vhost_device_ctx ctx)
> > >  		return -1;
> > >  	}
> > >
> > > -	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> > > -	if (virtqueue_rx == NULL) {
> > > -		rte_free(new_ll_dev);
> > > +	new_ll_dev->dev.virtqueue =
> > > +		rte_malloc(NULL, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX *
> > sizeof(struct vhost_virtqueue *), 0);
> > > +	if (new_ll_dev->dev.virtqueue == NULL) {
> > >  		RTE_LOG(ERR, VHOST_CONFIG,
> > > -			"(%"PRIu64") Failed to allocate memory for rxq.\n",
> > > +			"(%"PRIu64") Failed to allocate memory for
> > dev.virtqueue.\n",
> > >  			ctx.fh);
> > > +		rte_free(new_ll_dev);
> > >  		return -1;
> > >  	}
> > >
> > > -	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> > > -	if (virtqueue_tx == NULL) {
> > > -		rte_free(virtqueue_rx);
> > > +	if (alloc_vring_queue_pair(&new_ll_dev->dev, 0) == -1) {
> > > +		rte_free(new_ll_dev->dev.virtqueue);
> > >  		rte_free(new_ll_dev);
> > > -		RTE_LOG(ERR, VHOST_CONFIG,
> > > -			"(%"PRIu64") Failed to allocate memory for txq.\n",
> > > -			ctx.fh);
> > >  		return -1;
> > >  	}
> > >
> > > -	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
> > > -	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
> > > -
> > >  	/* Initialise device and virtqueues. */
> > >  	init_device(&new_ll_dev->dev);
> > >
> > > @@ -396,7 +442,7 @@ set_owner(struct vhost_device_ctx ctx)
> > >   * Called from CUSE IOCTL: VHOST_RESET_OWNER
> > >   */
> > >  static int
> > > -reset_owner(struct vhost_device_ctx ctx)
> > > +reset_owner(__rte_unused struct vhost_device_ctx ctx)
> > >  {
> > >  	struct virtio_net_config_ll *ll_dev;
> > >
> > > @@ -434,6 +480,7 @@ static int
> > >  set_features(struct vhost_device_ctx ctx, uint64_t *pu)  {
> > >  	struct virtio_net *dev;
> > > +	uint32_t q_idx;
> > >
> > >  	dev = get_device(ctx);
> > >  	if (dev == NULL)
> > > @@ -445,22 +492,26 @@ set_features(struct vhost_device_ctx ctx,
> > uint64_t *pu)
> > >  	dev->features = *pu;
> > >
> > >  	/* Set the vhost_hlen depending on if VIRTIO_NET_F_MRG_RXBUF
> > is set. */
> > > -	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> > > -		LOG_DEBUG(VHOST_CONFIG,
> > > -			"(%"PRIu64") Mergeable RX buffers enabled\n",
> > > -			dev->device_fh);
> > > -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> > > -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > > -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> > > -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > > -	} else {
> > > -		LOG_DEBUG(VHOST_CONFIG,
> > > -			"(%"PRIu64") Mergeable RX buffers disabled\n",
> > > -			dev->device_fh);
> > > -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> > > -			sizeof(struct virtio_net_hdr);
> > > -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> > > -			sizeof(struct virtio_net_hdr);
> > > +	for (q_idx = 0; q_idx < dev->virt_qp_nb; q_idx++) {
> > > +		uint32_t virt_rx_q_idx = q_idx * VIRTIO_QNUM +
> > VIRTIO_RXQ;
> > > +		uint32_t virt_tx_q_idx = q_idx * VIRTIO_QNUM +
> > VIRTIO_TXQ;
> > > +		if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> > > +			LOG_DEBUG(VHOST_CONFIG,
> > > +				"(%"PRIu64") Mergeable RX buffers
> > enabled\n",
> > > +				dev->device_fh);
> > > +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> > > +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > > +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> > > +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > > +		} else {
> > > +			LOG_DEBUG(VHOST_CONFIG,
> > > +				"(%"PRIu64") Mergeable RX buffers
> > disabled\n",
> > > +				dev->device_fh);
> > > +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> > > +				sizeof(struct virtio_net_hdr);
> > > +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> > > +				sizeof(struct virtio_net_hdr);
> > > +		}
> > >  	}
> > >  	return 0;
> > >  }
> > > @@ -826,6 +877,14 @@ int rte_vhost_feature_enable(uint64_t
> > feature_mask)
> > >  	return -1;
> > >  }
> > >
> > > +uint16_t rte_vhost_qp_num_get(struct virtio_net *dev) {
> > > +	if (dev == NULL)
> > > +		return 0;
> > > +
> > > +	return dev->virt_qp_nb;
> > > +}
> > > +
> > >  /*
> > >   * Register ops so that we can add/remove device to data core.
> > >   */
> > > --
> > > 1.8.4.2
> > >
  
Yuanhan Liu Aug. 19, 2015, 6:39 a.m. UTC | #7
On Wed, Aug 19, 2015 at 02:28:51PM +0800, Yuanhan Liu wrote:
> On Wed, Aug 19, 2015 at 05:54:09AM +0000, Ouyang, Changchun wrote:
> > Hi Yuanhan,
> > 
> > > -----Original Message-----
> > > From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> > > Sent: Wednesday, August 19, 2015 11:53 AM
> > > To: Ouyang, Changchun
> > > Cc: dev@dpdk.org; Xie, Huawei
> > > Subject: Re: [dpdk-dev] [PATCH v4 02/12] vhost: support multiple queues in
> > > virtio dev
> > > 
> > > Hi Changchun,
> > > 
> > > On Wed, Aug 12, 2015 at 04:02:37PM +0800, Ouyang Changchun wrote:
> > > > Each virtio device could have multiple queues, say 2 or 4, at most 8.
> > > > Enabling this feature allows virtio device/port on guest has the
> > > > ability to use different vCPU to receive/transmit packets from/to each
> > > queue.
> > > >
> > > > In multiple queues mode, virtio device readiness means all queues of
> > > > this virtio device are ready, cleanup/destroy a virtio device also
> > > > requires clearing all queues belong to it.
> > > >
> > > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > > ---
> > > [snip ..]
> > > >  /*
> > > > + *  Initialise all variables in vring queue pair.
> > > > + */
> > > > +static void
> > > > +init_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx) {
> > > > +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> > > > +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> > > > +	memset(dev->virtqueue[virt_rx_q_idx], 0, sizeof(struct
> > > vhost_virtqueue));
> > > > +	memset(dev->virtqueue[virt_tx_q_idx], 0, sizeof(struct
> > > > +vhost_virtqueue));
> > > > +
> > > > +	dev->virtqueue[virt_rx_q_idx]->kickfd = (eventfd_t)-1;
> > > > +	dev->virtqueue[virt_rx_q_idx]->callfd = (eventfd_t)-1;
> > > > +	dev->virtqueue[virt_tx_q_idx]->kickfd = (eventfd_t)-1;
> > > > +	dev->virtqueue[virt_tx_q_idx]->callfd = (eventfd_t)-1;
> > > > +
> > > > +	/* Backends are set to -1 indicating an inactive device. */
> > > > +	dev->virtqueue[virt_rx_q_idx]->backend = VIRTIO_DEV_STOPPED;
> > > > +	dev->virtqueue[virt_tx_q_idx]->backend = VIRTIO_DEV_STOPPED; }
> > > > +
> > > > +/*
> > > >   *  Initialise all variables in device structure.
> > > >   */
> > > >  static void
> > > > @@ -258,17 +294,34 @@ init_device(struct virtio_net *dev)
> > > >  	/* Set everything to 0. */
> > > 
> > > There is a trick here. Let me fill the context first:
> > > 
> > > 283 static void
> > > 284 init_device(struct virtio_net *dev)
> > > 285 {
> > > 286         uint64_t vq_offset;
> > > 287
> > > 288         /*
> > > 289          * Virtqueues have already been malloced so
> > > 290          * we don't want to set them to NULL.
> > > 291          */
> > > 292         vq_offset = offsetof(struct virtio_net, mem);
> > > 293
> > > 294         /* Set everything to 0. */
> > > 295         memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
> > > 296                 (sizeof(struct virtio_net) - (size_t)vq_offset));
> > > 297
> > > 298         init_vring_queue_pair(dev, 0);
> > > 
> > > This piece of code's intention is to memset everything to zero, except the
> > > `virtqueue' field, for, as the comment stated, we have already allocated
> > > virtqueue.
> > > 
> > > It works only when `virtqueue' field is before `mem' field, and it was
> > > before:
> > > 
> > >     struct virtio_net {
> > >             struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM];        /**< Contains
> > > all virtqueue information. */
> > >             struct virtio_memory    *mem;           /**< QEMU memory and memory
> > > region information. */
> > >             ...
> > > 
> > > After this patch, it becomes:
> > > 
> > >     struct virtio_net {
> > >             struct virtio_memory    *mem;           /**< QEMU memory and memory
> > > region information. */
> > >             struct vhost_virtqueue  **virtqueue;    /**< Contains all virtqueue
> > > information. */
> > >             ...
> > > 
> > > 
> > > Which actually wipes all stuff inside `struct virtio_net`, resulting to setting
> > > `virtqueue' to NULL as well.
> > > 
> > > While reading the code(without you patch applied), I thought that it's error-
> > > prone, as it is very likely that someone else besides the author doesn't know
> > > such undocumented rule. And you just gave me an example :)
> > > 
> > > Huawei, I'm proposing a fix to call rte_zmalloc() for allocating new_ll_dev to
> > > get rid of such issue. What do you think?
> > > 
> > > 	--yliu
> > > 
> > > 
> > 
> > Suggest you finish the latter patch review:
> > [PATCH v4 04/12] vhost: set memory layout for multiple queues mode,
> > After you finish reviewing this patch, I think you will change your mind :-)
> > 
> > This patch resolve what you concern.
> 
> Sorry that I hadn't gone that far yet. And yes, I see. I found that you
> moved the barrier to `features' field, which puts the `virtqueue' field
> back to the "do not set to zero" zone.
> 
> It's still an undocumented rule, and hence, error prone, IMO. But, you
> reminded me that init_device() will be invoked at other place else(reset_owner()).
> Hence, my solution won't work, either.
> 
> I'm wondering saving the `virtqueue'(and `mem_arr' from patch 04/12)
> explicitly before memset() and restoring them after that, to get rid of
> the undocumented rule. It may become uglier with more and more fields
> need to be saved this way. But judging that we have two fields so far,
> I'm kind of okay to that.

I thought it again. Nah.., let's forget that. It's not flexible, for
array fields like "virtqueue[]".

	--yliu
> 
> What do you think then? If that doesn't work, we should add some comments
> inside the virtio_net struct at least, or even add a build time check.
> 
> 	--yliu
> 
> > > 
> > > >  	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
> > > >  		(sizeof(struct virtio_net) - (size_t)vq_offset));
> > > > -	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct
> > > vhost_virtqueue));
> > > > -	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct
> > > vhost_virtqueue));
> > > >
> > > > -	dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> > > > -	dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> > > > -	dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> > > > -	dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> > > > +	init_vring_queue_pair(dev, 0);
> > > > +	dev->virt_qp_nb = 1;
> > > > +}
> > > > +
> > > > +/*
> > > > + *  Alloc mem for vring queue pair.
> > > > + */
> > > > +int
> > > > +alloc_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx) {
> > > > +	struct vhost_virtqueue *virtqueue = NULL;
> > > > +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> > > > +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> > > >
> > > > -	/* Backends are set to -1 indicating an inactive device. */
> > > > -	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> > > > -	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> > > > +	virtqueue = rte_malloc(NULL, sizeof(struct vhost_virtqueue) *
> > > VIRTIO_QNUM, 0);
> > > > +	if (virtqueue == NULL) {
> > > > +		RTE_LOG(ERR, VHOST_CONFIG,
> > > > +			"Failed to allocate memory for virt qp:%d.\n",
> > > qp_idx);
> > > > +		return -1;
> > > > +	}
> > > > +
> > > > +	dev->virtqueue[virt_rx_q_idx] = virtqueue;
> > > > +	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
> > > > +
> > > > +	init_vring_queue_pair(dev, qp_idx);
> > > > +
> > > > +	return 0;
> > > >  }
> > > >
> > > >  /*
> > > > @@ -280,7 +333,6 @@ static int
> > > >  new_device(struct vhost_device_ctx ctx)  {
> > > >  	struct virtio_net_config_ll *new_ll_dev;
> > > > -	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
> > > >
> > > >  	/* Setup device and virtqueues. */
> > > >  	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll),
> > > > 0); @@ -291,28 +343,22 @@ new_device(struct vhost_device_ctx ctx)
> > > >  		return -1;
> > > >  	}
> > > >
> > > > -	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> > > > -	if (virtqueue_rx == NULL) {
> > > > -		rte_free(new_ll_dev);
> > > > +	new_ll_dev->dev.virtqueue =
> > > > +		rte_malloc(NULL, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX *
> > > sizeof(struct vhost_virtqueue *), 0);
> > > > +	if (new_ll_dev->dev.virtqueue == NULL) {
> > > >  		RTE_LOG(ERR, VHOST_CONFIG,
> > > > -			"(%"PRIu64") Failed to allocate memory for rxq.\n",
> > > > +			"(%"PRIu64") Failed to allocate memory for
> > > dev.virtqueue.\n",
> > > >  			ctx.fh);
> > > > +		rte_free(new_ll_dev);
> > > >  		return -1;
> > > >  	}
> > > >
> > > > -	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> > > > -	if (virtqueue_tx == NULL) {
> > > > -		rte_free(virtqueue_rx);
> > > > +	if (alloc_vring_queue_pair(&new_ll_dev->dev, 0) == -1) {
> > > > +		rte_free(new_ll_dev->dev.virtqueue);
> > > >  		rte_free(new_ll_dev);
> > > > -		RTE_LOG(ERR, VHOST_CONFIG,
> > > > -			"(%"PRIu64") Failed to allocate memory for txq.\n",
> > > > -			ctx.fh);
> > > >  		return -1;
> > > >  	}
> > > >
> > > > -	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
> > > > -	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
> > > > -
> > > >  	/* Initialise device and virtqueues. */
> > > >  	init_device(&new_ll_dev->dev);
> > > >
> > > > @@ -396,7 +442,7 @@ set_owner(struct vhost_device_ctx ctx)
> > > >   * Called from CUSE IOCTL: VHOST_RESET_OWNER
> > > >   */
> > > >  static int
> > > > -reset_owner(struct vhost_device_ctx ctx)
> > > > +reset_owner(__rte_unused struct vhost_device_ctx ctx)
> > > >  {
> > > >  	struct virtio_net_config_ll *ll_dev;
> > > >
> > > > @@ -434,6 +480,7 @@ static int
> > > >  set_features(struct vhost_device_ctx ctx, uint64_t *pu)  {
> > > >  	struct virtio_net *dev;
> > > > +	uint32_t q_idx;
> > > >
> > > >  	dev = get_device(ctx);
> > > >  	if (dev == NULL)
> > > > @@ -445,22 +492,26 @@ set_features(struct vhost_device_ctx ctx,
> > > uint64_t *pu)
> > > >  	dev->features = *pu;
> > > >
> > > >  	/* Set the vhost_hlen depending on if VIRTIO_NET_F_MRG_RXBUF
> > > is set. */
> > > > -	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> > > > -		LOG_DEBUG(VHOST_CONFIG,
> > > > -			"(%"PRIu64") Mergeable RX buffers enabled\n",
> > > > -			dev->device_fh);
> > > > -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> > > > -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > > > -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> > > > -			sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > > > -	} else {
> > > > -		LOG_DEBUG(VHOST_CONFIG,
> > > > -			"(%"PRIu64") Mergeable RX buffers disabled\n",
> > > > -			dev->device_fh);
> > > > -		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
> > > > -			sizeof(struct virtio_net_hdr);
> > > > -		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
> > > > -			sizeof(struct virtio_net_hdr);
> > > > +	for (q_idx = 0; q_idx < dev->virt_qp_nb; q_idx++) {
> > > > +		uint32_t virt_rx_q_idx = q_idx * VIRTIO_QNUM +
> > > VIRTIO_RXQ;
> > > > +		uint32_t virt_tx_q_idx = q_idx * VIRTIO_QNUM +
> > > VIRTIO_TXQ;
> > > > +		if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
> > > > +			LOG_DEBUG(VHOST_CONFIG,
> > > > +				"(%"PRIu64") Mergeable RX buffers
> > > enabled\n",
> > > > +				dev->device_fh);
> > > > +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> > > > +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > > > +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> > > > +				sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > > > +		} else {
> > > > +			LOG_DEBUG(VHOST_CONFIG,
> > > > +				"(%"PRIu64") Mergeable RX buffers
> > > disabled\n",
> > > > +				dev->device_fh);
> > > > +			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
> > > > +				sizeof(struct virtio_net_hdr);
> > > > +			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
> > > > +				sizeof(struct virtio_net_hdr);
> > > > +		}
> > > >  	}
> > > >  	return 0;
> > > >  }
> > > > @@ -826,6 +877,14 @@ int rte_vhost_feature_enable(uint64_t
> > > feature_mask)
> > > >  	return -1;
> > > >  }
> > > >
> > > > +uint16_t rte_vhost_qp_num_get(struct virtio_net *dev) {
> > > > +	if (dev == NULL)
> > > > +		return 0;
> > > > +
> > > > +	return dev->virt_qp_nb;
> > > > +}
> > > > +
> > > >  /*
> > > >   * Register ops so that we can add/remove device to data core.
> > > >   */
> > > > --
> > > > 1.8.4.2
> > > >
  
Tetsuya Mukawa Sept. 3, 2015, 2:27 a.m. UTC | #8
On 2015/08/12 17:02, Ouyang Changchun wrote:
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
> index df24860..2429836 100644
> --- a/lib/librte_vhost/vhost_user/virtio-net-user.h
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> @@ -46,4 +46,6 @@ void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
>
>  /*
> @@ -206,9 +213,17 @@ cleanup_device(struct virtio_net *dev)
>  static void
>  free_device(struct virtio_net_config_ll *ll_dev)
>  {
> -	/* Free any malloc'd memory */
> -	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
> -	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
> +	uint32_t qp_idx;
> +
> +	/*
> +	 * Free any malloc'd memory.
> +	 */
> +	/* Free every queue pair. */
> +	for (qp_idx = 0; qp_idx < ll_dev->dev.virt_qp_nb; qp_idx++) {
> +		uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> +		rte_free(ll_dev->dev.virtqueue[virt_rx_q_idx]);

Hi Changchun,

Should we free tx queue also here?

Thanks,
Tetsuya

> +	}
> +	rte_free(ll_dev->dev.virtqueue);
>  	rte_free(ll_dev);
>  }
>  
>
  
Ouyang Changchun Sept. 6, 2015, 2:25 a.m. UTC | #9
Hi Tetsuya,

> -----Original Message-----
> From: Tetsuya Mukawa [mailto:mukawa@igel.co.jp]
> Sent: Thursday, September 3, 2015 10:27 AM
> To: dev@dpdk.org; Ouyang, Changchun
> Subject: Re: [dpdk-dev] [PATCH v4 02/12] vhost: support multiple queues in
> virtio dev
> 
> On 2015/08/12 17:02, Ouyang Changchun wrote:
> > diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h
> > b/lib/librte_vhost/vhost_user/virtio-net-user.h
> > index df24860..2429836 100644
> > --- a/lib/librte_vhost/vhost_user/virtio-net-user.h
> > +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> > @@ -46,4 +46,6 @@ void user_set_vring_kick(struct vhost_device_ctx,
> > struct VhostUserMsg *);
> >
> >  /*
> > @@ -206,9 +213,17 @@ cleanup_device(struct virtio_net *dev)  static
> > void  free_device(struct virtio_net_config_ll *ll_dev)  {
> > -	/* Free any malloc'd memory */
> > -	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
> > -	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
> > +	uint32_t qp_idx;
> > +
> > +	/*
> > +	 * Free any malloc'd memory.
> > +	 */
> > +	/* Free every queue pair. */
> > +	for (qp_idx = 0; qp_idx < ll_dev->dev.virt_qp_nb; qp_idx++) {
> > +		uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM +
> VIRTIO_RXQ;
> > +		rte_free(ll_dev->dev.virtqueue[virt_rx_q_idx]);
> 
> Hi Changchun,
> 
> Should we free tx queue also here?
>

We don't need do it, as we allocate once for both rx and tx queue.
Thus, we allocate once, free once.
Pls see the following code snippet:

+ *  Alloc mem for vring queue pair.
+ */
+int
+alloc_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx) {
+	struct vhost_virtqueue *virtqueue = NULL;
+	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
 
-	/* Backends are set to -1 indicating an inactive device. */
-	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
-	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
+	virtqueue = rte_malloc(NULL, sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
+	if (virtqueue == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
+		return -1;
+	}
+
+	dev->virtqueue[virt_rx_q_idx] = virtqueue;
+	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
+
+	init_vring_queue_pair(dev, qp_idx);
+
+	return 0;
 }

Thanks
Changchun

> 
> > +	}
> > +	rte_free(ll_dev->dev.virtqueue);
> >  	rte_free(ll_dev);
> >  }
> >
> >
  

Patch

diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index b9bf320..d9e887f 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -59,7 +59,6 @@  struct rte_mbuf;
 /* Backend value set by guest. */
 #define VIRTIO_DEV_STOPPED -1
 
-
 /* Enum for virtqueue management. */
 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 
@@ -96,13 +95,14 @@  struct vhost_virtqueue {
  * Device structure contains all configuration information relating to the device.
  */
 struct virtio_net {
-	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
 	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
+	struct vhost_virtqueue	**virtqueue;    /**< Contains all virtqueue information. */
 	uint64_t		features;	/**< Negotiated feature set. */
 	uint64_t		device_fh;	/**< device identifier. */
 	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
 	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
+	uint32_t		virt_qp_nb;
 	void			*priv;		/**< private context */
 } __rte_cache_aligned;
 
@@ -235,4 +235,10 @@  uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
 uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
 
+/**
+ * This function get the queue pair number of one vhost device.
+ * @return
+ *  num of queue pair of specified virtio device.
+ */
+uint16_t rte_vhost_qp_num_get(struct virtio_net *dev);
 #endif /* _VIRTIO_NET_H_ */
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index c69b60b..7dff14d 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -115,4 +115,5 @@  struct vhost_net_device_ops {
 
 
 struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
+int alloc_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx);
 #endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 0d07338..db4ad88 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -43,6 +43,18 @@ 
 #define MAX_PKT_BURST 32
 
 /**
+ * Check the virtqueue idx validility,
+ * return 1 if pass, otherwise 0.
+ */
+static inline uint8_t __attribute__((always_inline))
+check_virtqueue_idx(uint16_t virtq_idx, uint8_t is_tx, uint32_t virtq_num)
+{
+	if ((is_tx ^ (virtq_idx & 0x1)) || (virtq_idx >= virtq_num))
+		return 0;
+	return 1;
+}
+
+/**
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
  * count is returned to indicate the number of packets that are succesfully
@@ -68,12 +80,15 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	uint8_t success = 0;
 
 	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
-	if (unlikely(queue_id != VIRTIO_RXQ)) {
-		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
+	if (unlikely(check_virtqueue_idx(queue_id, 0,
+		VIRTIO_QNUM * dev->virt_qp_nb) == 0)) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+			 __func__, dev->device_fh, queue_id);
 		return 0;
 	}
 
-	vq = dev->virtqueue[VIRTIO_RXQ];
+	vq = dev->virtqueue[queue_id];
 	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
 
 	/*
@@ -235,8 +250,9 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 }
 
 static inline uint32_t __attribute__((always_inline))
-copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
-	uint16_t res_end_idx, struct rte_mbuf *pkt)
+copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t queue_id,
+	uint16_t res_base_idx, uint16_t res_end_idx,
+	struct rte_mbuf *pkt)
 {
 	uint32_t vec_idx = 0;
 	uint32_t entry_success = 0;
@@ -264,8 +280,9 @@  copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	 * Convert from gpa to vva
 	 * (guest physical addr -> vhost virtual addr)
 	 */
-	vq = dev->virtqueue[VIRTIO_RXQ];
-	vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+	vq = dev->virtqueue[queue_id];
+	vb_addr =
+		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 	vb_hdr_addr = vb_addr;
 
 	/* Prefetch buffer address. */
@@ -464,11 +481,15 @@  virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
 		dev->device_fh);
-	if (unlikely(queue_id != VIRTIO_RXQ)) {
-		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
+	if (unlikely(check_virtqueue_idx(queue_id, 0,
+		VIRTIO_QNUM * dev->virt_qp_nb) == 0)) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+			 __func__, dev->device_fh, queue_id);
+		return 0;
 	}
 
-	vq = dev->virtqueue[VIRTIO_RXQ];
+	vq = dev->virtqueue[queue_id];
 	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
 
 	if (count == 0)
@@ -509,7 +530,7 @@  virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 							res_cur_idx);
 		} while (success == 0);
 
-		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
+		entry_success = copy_from_mbuf_to_vring(dev, queue_id, res_base_idx,
 			res_cur_idx, pkts[pkt_idx]);
 
 		rte_compiler_barrier();
@@ -559,12 +580,15 @@  rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 	uint16_t free_entries, entry_success = 0;
 	uint16_t avail_idx;
 
-	if (unlikely(queue_id != VIRTIO_TXQ)) {
-		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
+	if (unlikely(check_virtqueue_idx(queue_id, 1,
+		VIRTIO_QNUM * dev->virt_qp_nb) == 0)) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+			 __func__, dev->device_fh, queue_id);
 		return 0;
 	}
 
-	vq = dev->virtqueue[VIRTIO_TXQ];
+	vq = dev->virtqueue[queue_id];
 	avail_idx =  *((volatile uint16_t *)&vq->avail->idx);
 
 	/* If there are no available buffers then return. */
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index f406a94..3d7c373 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -383,7 +383,9 @@  vserver_message_handler(int connfd, void *dat, int *remove)
 		ops->set_owner(ctx);
 		break;
 	case VHOST_USER_RESET_OWNER:
-		ops->reset_owner(ctx);
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"(%"PRIu64") VHOST_NET_RESET_OWNER\n", ctx.fh);
+		user_reset_owner(ctx, &msg.payload.state);
 		break;
 
 	case VHOST_USER_SET_MEM_TABLE:
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
index c1ffc38..4c1d4df 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -209,30 +209,46 @@  static int
 virtio_is_ready(struct virtio_net *dev)
 {
 	struct vhost_virtqueue *rvq, *tvq;
+	uint32_t q_idx;
 
 	/* mq support in future.*/
-	rvq = dev->virtqueue[VIRTIO_RXQ];
-	tvq = dev->virtqueue[VIRTIO_TXQ];
-	if (rvq && tvq && rvq->desc && tvq->desc &&
-		(rvq->kickfd != (eventfd_t)-1) &&
-		(rvq->callfd != (eventfd_t)-1) &&
-		(tvq->kickfd != (eventfd_t)-1) &&
-		(tvq->callfd != (eventfd_t)-1)) {
-		RTE_LOG(INFO, VHOST_CONFIG,
-			"virtio is now ready for processing.\n");
-		return 1;
+	for (q_idx = 0; q_idx < dev->virt_qp_nb; q_idx++) {
+		uint32_t virt_rx_q_idx = q_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+		uint32_t virt_tx_q_idx = q_idx * VIRTIO_QNUM + VIRTIO_TXQ;
+
+		rvq = dev->virtqueue[virt_rx_q_idx];
+		tvq = dev->virtqueue[virt_tx_q_idx];
+		if ((rvq == NULL) || (tvq == NULL) ||
+			(rvq->desc == NULL) || (tvq->desc == NULL) ||
+			(rvq->kickfd == (eventfd_t)-1) ||
+			(rvq->callfd == (eventfd_t)-1) ||
+			(tvq->kickfd == (eventfd_t)-1) ||
+			(tvq->callfd == (eventfd_t)-1)) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"virtio isn't ready for processing.\n");
+			return 0;
+		}
 	}
 	RTE_LOG(INFO, VHOST_CONFIG,
-		"virtio isn't ready for processing.\n");
-	return 0;
+		"virtio is now ready for processing.\n");
+	return 1;
 }
 
 void
 user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 {
 	struct vhost_vring_file file;
+	struct virtio_net *dev = get_device(ctx);
+	uint32_t cur_qp_idx;
 
 	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	cur_qp_idx = file.index >> 1;
+
+	if (dev->virt_qp_nb < cur_qp_idx + 1) {
+		if (alloc_vring_queue_pair(dev, cur_qp_idx) == 0)
+			dev->virt_qp_nb = cur_qp_idx + 1;
+	}
+
 	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
 		file.fd = -1;
 	else
@@ -290,13 +306,37 @@  user_get_vring_base(struct vhost_device_ctx ctx,
 	 * sent and only sent in vhost_vring_stop.
 	 * TODO: cleanup the vring, it isn't usable since here.
 	 */
-	if (((int)dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
-		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
-		dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
+	if (((int)dev->virtqueue[state->index]->kickfd) >= 0) {
+		close(dev->virtqueue[state->index]->kickfd);
+		dev->virtqueue[state->index]->kickfd = (eventfd_t)-1;
 	}
-	if (((int)dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
-		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
-		dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
+
+	return 0;
+}
+
+/*
+ * when virtio is stopped, qemu will send us the RESET_OWNER message.
+ */
+int
+user_reset_owner(struct vhost_device_ctx ctx,
+	struct vhost_vring_state *state)
+{
+	struct virtio_net *dev = get_device(ctx);
+
+	/* We have to stop the queue (virtio) if it is running. */
+	if (dev->flags & VIRTIO_DEV_RUNNING)
+		notify_ops->destroy_device(dev);
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"reset owner --- state idx:%d state num:%d\n", state->index, state->num);
+	/*
+	 * Based on current qemu vhost-user implementation, this message is
+	 * sent and only sent in vhost_net_stop_one.
+	 * TODO: cleanup the vring, it isn't usable since here.
+	 */
+	if (((int)dev->virtqueue[state->index]->kickfd) >= 0) {
+		close(dev->virtqueue[state->index]->kickfd);
+		dev->virtqueue[state->index]->kickfd = (eventfd_t)-1;
 	}
 
 	return 0;
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
index df24860..2429836 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.h
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
@@ -46,4 +46,6 @@  void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
 int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
 
 void user_destroy_device(struct vhost_device_ctx);
+
+int user_reset_owner(struct vhost_device_ctx ctx, struct vhost_vring_state *state);
 #endif
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index b520ec5..2a4b791 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -71,9 +71,10 @@  static struct virtio_net_config_ll *ll_root;
 #define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
 				(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
 				(1ULL << VIRTIO_NET_F_CTRL_RX) | \
-				(1ULL << VHOST_F_LOG_ALL))
-static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
+				(1ULL << VHOST_F_LOG_ALL) | \
+				(1ULL << VIRTIO_NET_F_MQ))
 
+static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
 
 /*
  * Converts QEMU virtual address to Vhost virtual address. This function is
@@ -182,6 +183,8 @@  add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
 static void
 cleanup_device(struct virtio_net *dev)
 {
+	uint32_t qp_idx;
+
 	/* Unmap QEMU memory file if mapped. */
 	if (dev->mem) {
 		munmap((void *)(uintptr_t)dev->mem->mapped_address,
@@ -190,14 +193,18 @@  cleanup_device(struct virtio_net *dev)
 	}
 
 	/* Close any event notifiers opened by device. */
-	if ((int)dev->virtqueue[VIRTIO_RXQ]->callfd >= 0)
-		close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
-	if ((int)dev->virtqueue[VIRTIO_RXQ]->kickfd >= 0)
-		close((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);
-	if ((int)dev->virtqueue[VIRTIO_TXQ]->callfd >= 0)
-		close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
-	if ((int)dev->virtqueue[VIRTIO_TXQ]->kickfd >= 0)
-		close((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);
+	for (qp_idx = 0; qp_idx < dev->virt_qp_nb; qp_idx++) {
+		uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+		uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
+		if ((int)dev->virtqueue[virt_rx_q_idx]->callfd >= 0)
+			close((int)dev->virtqueue[virt_rx_q_idx]->callfd);
+		if ((int)dev->virtqueue[virt_rx_q_idx]->kickfd >= 0)
+			close((int)dev->virtqueue[virt_rx_q_idx]->kickfd);
+		if ((int)dev->virtqueue[virt_tx_q_idx]->callfd >= 0)
+			close((int)dev->virtqueue[virt_tx_q_idx]->callfd);
+		if ((int)dev->virtqueue[virt_tx_q_idx]->kickfd >= 0)
+			close((int)dev->virtqueue[virt_tx_q_idx]->kickfd);
+	}
 }
 
 /*
@@ -206,9 +213,17 @@  cleanup_device(struct virtio_net *dev)
 static void
 free_device(struct virtio_net_config_ll *ll_dev)
 {
-	/* Free any malloc'd memory */
-	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
-	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
+	uint32_t qp_idx;
+
+	/*
+	 * Free any malloc'd memory.
+	 */
+	/* Free every queue pair. */
+	for (qp_idx = 0; qp_idx < ll_dev->dev.virt_qp_nb; qp_idx++) {
+		uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+		rte_free(ll_dev->dev.virtqueue[virt_rx_q_idx]);
+	}
+	rte_free(ll_dev->dev.virtqueue);
 	rte_free(ll_dev);
 }
 
@@ -242,6 +257,27 @@  rm_config_ll_entry(struct virtio_net_config_ll *ll_dev,
 }
 
 /*
+ *  Initialise all variables in vring queue pair.
+ */
+static void
+init_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx)
+{
+	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
+	memset(dev->virtqueue[virt_rx_q_idx], 0, sizeof(struct vhost_virtqueue));
+	memset(dev->virtqueue[virt_tx_q_idx], 0, sizeof(struct vhost_virtqueue));
+
+	dev->virtqueue[virt_rx_q_idx]->kickfd = (eventfd_t)-1;
+	dev->virtqueue[virt_rx_q_idx]->callfd = (eventfd_t)-1;
+	dev->virtqueue[virt_tx_q_idx]->kickfd = (eventfd_t)-1;
+	dev->virtqueue[virt_tx_q_idx]->callfd = (eventfd_t)-1;
+
+	/* Backends are set to -1 indicating an inactive device. */
+	dev->virtqueue[virt_rx_q_idx]->backend = VIRTIO_DEV_STOPPED;
+	dev->virtqueue[virt_tx_q_idx]->backend = VIRTIO_DEV_STOPPED;
+}
+
+/*
  *  Initialise all variables in device structure.
  */
 static void
@@ -258,17 +294,34 @@  init_device(struct virtio_net *dev)
 	/* Set everything to 0. */
 	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
 		(sizeof(struct virtio_net) - (size_t)vq_offset));
-	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
-	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
 
-	dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
-	dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
-	dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
-	dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
+	init_vring_queue_pair(dev, 0);
+	dev->virt_qp_nb = 1;
+}
+
+/*
+ *  Alloc mem for vring queue pair.
+ */
+int
+alloc_vring_queue_pair(struct virtio_net *dev, uint16_t qp_idx)
+{
+	struct vhost_virtqueue *virtqueue = NULL;
+	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
 
-	/* Backends are set to -1 indicating an inactive device. */
-	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
-	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
+	virtqueue = rte_malloc(NULL, sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
+	if (virtqueue == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
+		return -1;
+	}
+
+	dev->virtqueue[virt_rx_q_idx] = virtqueue;
+	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
+
+	init_vring_queue_pair(dev, qp_idx);
+
+	return 0;
 }
 
 /*
@@ -280,7 +333,6 @@  static int
 new_device(struct vhost_device_ctx ctx)
 {
 	struct virtio_net_config_ll *new_ll_dev;
-	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
 
 	/* Setup device and virtqueues. */
 	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
@@ -291,28 +343,22 @@  new_device(struct vhost_device_ctx ctx)
 		return -1;
 	}
 
-	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
-	if (virtqueue_rx == NULL) {
-		rte_free(new_ll_dev);
+	new_ll_dev->dev.virtqueue =
+		rte_malloc(NULL, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX * sizeof(struct vhost_virtqueue *), 0);
+	if (new_ll_dev->dev.virtqueue == NULL) {
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to allocate memory for rxq.\n",
+			"(%"PRIu64") Failed to allocate memory for dev.virtqueue.\n",
 			ctx.fh);
+		rte_free(new_ll_dev);
 		return -1;
 	}
 
-	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
-	if (virtqueue_tx == NULL) {
-		rte_free(virtqueue_rx);
+	if (alloc_vring_queue_pair(&new_ll_dev->dev, 0) == -1) {
+		rte_free(new_ll_dev->dev.virtqueue);
 		rte_free(new_ll_dev);
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to allocate memory for txq.\n",
-			ctx.fh);
 		return -1;
 	}
 
-	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
-	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
-
 	/* Initialise device and virtqueues. */
 	init_device(&new_ll_dev->dev);
 
@@ -396,7 +442,7 @@  set_owner(struct vhost_device_ctx ctx)
  * Called from CUSE IOCTL: VHOST_RESET_OWNER
  */
 static int
-reset_owner(struct vhost_device_ctx ctx)
+reset_owner(__rte_unused struct vhost_device_ctx ctx)
 {
 	struct virtio_net_config_ll *ll_dev;
 
@@ -434,6 +480,7 @@  static int
 set_features(struct vhost_device_ctx ctx, uint64_t *pu)
 {
 	struct virtio_net *dev;
+	uint32_t q_idx;
 
 	dev = get_device(ctx);
 	if (dev == NULL)
@@ -445,22 +492,26 @@  set_features(struct vhost_device_ctx ctx, uint64_t *pu)
 	dev->features = *pu;
 
 	/* Set the vhost_hlen depending on if VIRTIO_NET_F_MRG_RXBUF is set. */
-	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") Mergeable RX buffers enabled\n",
-			dev->device_fh);
-		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
-			sizeof(struct virtio_net_hdr_mrg_rxbuf);
-		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
-			sizeof(struct virtio_net_hdr_mrg_rxbuf);
-	} else {
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") Mergeable RX buffers disabled\n",
-			dev->device_fh);
-		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
-			sizeof(struct virtio_net_hdr);
-		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
-			sizeof(struct virtio_net_hdr);
+	for (q_idx = 0; q_idx < dev->virt_qp_nb; q_idx++) {
+		uint32_t virt_rx_q_idx = q_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+		uint32_t virt_tx_q_idx = q_idx * VIRTIO_QNUM + VIRTIO_TXQ;
+		if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+			LOG_DEBUG(VHOST_CONFIG,
+				"(%"PRIu64") Mergeable RX buffers enabled\n",
+				dev->device_fh);
+			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
+				sizeof(struct virtio_net_hdr_mrg_rxbuf);
+			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
+				sizeof(struct virtio_net_hdr_mrg_rxbuf);
+		} else {
+			LOG_DEBUG(VHOST_CONFIG,
+				"(%"PRIu64") Mergeable RX buffers disabled\n",
+				dev->device_fh);
+			dev->virtqueue[virt_rx_q_idx]->vhost_hlen =
+				sizeof(struct virtio_net_hdr);
+			dev->virtqueue[virt_tx_q_idx]->vhost_hlen =
+				sizeof(struct virtio_net_hdr);
+		}
 	}
 	return 0;
 }
@@ -826,6 +877,14 @@  int rte_vhost_feature_enable(uint64_t feature_mask)
 	return -1;
 }
 
+uint16_t rte_vhost_qp_num_get(struct virtio_net *dev)
+{
+	if (dev == NULL)
+		return 0;
+
+	return dev->virt_qp_nb;
+}
+
 /*
  * Register ops so that we can add/remove device to data core.
  */