[v2,2/6] net/mlx5: add support for two ports hairpin mode
diff mbox series

Message ID 1603375597-430528-3-git-send-email-bingz@nvidia.com
State Superseded
Delegated to: Raslan Darawsheh
Headers show
Series
  • add two ports hairpin mode support in mlx5 PMD
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Bing Zhao Oct. 22, 2020, 2:06 p.m. UTC
In order to support hairpin between two ports, mlx5 PMD needs to
implement the functions and provide them as the function pointers.

The bind and unbind functions are executed per port pairs. All the
hairpin queues between the two ports should have the same attributes
during queues setup. Different configurations among queue pairs from
the same ports are not supported. It is allowed that two ports only
have one direction hairpin.

In order to set up the connection between two queues, peer Rx queue
HW information must be fetched via the internal RTE API and the queue
information could be used to modify the SQ object. Then the RQ object
will be modified with the Tx queue HW information. The reverse
operation is not supported right now.

When disconnecting the queues pair, SQ and RQ object should be reset
without any peer HW information. The unbinding operation will try to
disconnect all Tx queues from the port from the Rx queues of the peer
port.

Tx explicit mode attribute will be saved and used when creating a
hairpin flow.

Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c |  10 +
 drivers/net/mlx5/mlx5.h          |  19 ++
 drivers/net/mlx5/mlx5_rxtx.h     |   2 +
 drivers/net/mlx5/mlx5_trigger.c  | 611 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 640 insertions(+), 2 deletions(-)

Comments

Slava Ovsiienko Oct. 26, 2020, 9:29 a.m. UTC | #1
> -----Original Message-----
> From: Bing Zhao <bingz@nvidia.com>
> Sent: Thursday, October 22, 2020 17:07
> To: viacheslavo@mellanox.com; matan@mellanox.com
> Cc: dev@dpdk.org; Ori Kam <orika@nvidia.com>; Raslan Darawsheh
> <rasland@nvidia.com>
> Subject: [PATCH v2 2/6] net/mlx5: add support for two ports hairpin mode
> 
> In order to support hairpin between two ports, mlx5 PMD needs to implement
> the functions and provide them as the function pointers.
> 
> The bind and unbind functions are executed per port pairs. All the hairpin
> queues between the two ports should have the same attributes during queues
> setup. Different configurations among queue pairs from the same ports are not
> supported. It is allowed that two ports only have one direction hairpin.
> 
> In order to set up the connection between two queues, peer Rx queue HW
> information must be fetched via the internal RTE API and the queue
> information could be used to modify the SQ object. Then the RQ object will be
> modified with the Tx queue HW information. The reverse operation is not
> supported right now.
> 
> When disconnecting the queues pair, SQ and RQ object should be reset
> without any peer HW information. The unbinding operation will try to
> disconnect all Tx queues from the port from the Rx queues of the peer port.
> 
> Tx explicit mode attribute will be saved and used when creating a hairpin flow.
> 
> Signed-off-by: Bing Zhao <bingz@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

> ---
>  drivers/net/mlx5/linux/mlx5_os.c |  10 +
>  drivers/net/mlx5/mlx5.h          |  19 ++
>  drivers/net/mlx5/mlx5_rxtx.h     |   2 +
>  drivers/net/mlx5/mlx5_trigger.c  | 611
> ++++++++++++++++++++++++++++++++++++++-
>  4 files changed, 640 insertions(+), 2 deletions(-)
>

Patch
diff mbox series

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 40f9446..83a8b56 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -2552,6 +2552,11 @@ 
 	.get_module_eeprom = mlx5_get_module_eeprom,
 	.hairpin_cap_get = mlx5_hairpin_cap_get,
 	.mtr_ops_get = mlx5_flow_meter_ops_get,
+	.hairpin_bind = mlx5_hairpin_bind,
+	.hairpin_unbind = mlx5_hairpin_unbind,
+	.hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update,
+	.hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind,
+	.hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind,
 };
 
 /* Available operations from secondary process. */
@@ -2630,4 +2635,9 @@ 
 	.get_module_eeprom = mlx5_get_module_eeprom,
 	.hairpin_cap_get = mlx5_hairpin_cap_get,
 	.mtr_ops_get = mlx5_flow_meter_ops_get,
+	.hairpin_bind = mlx5_hairpin_bind,
+	.hairpin_unbind = mlx5_hairpin_unbind,
+	.hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update,
+	.hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind,
+	.hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind,
 };
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c9d5d71..38d0977 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -891,6 +891,14 @@  struct mlx5_priv {
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
 #define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
 
+struct rte_hairpin_peer_info {
+	uint32_t qp_id;
+	uint32_t vhca_id;
+	uint16_t peer_q;
+	uint16_t tx_explicit;
+	uint16_t manual_bind;
+};
+
 /* mlx5.c */
 
 int mlx5_getenv_int(const char *);
@@ -1041,6 +1049,17 @@  void mlx5_vlan_vmwa_acquire(struct rte_eth_dev *dev,
 int mlx5_traffic_enable(struct rte_eth_dev *dev);
 void mlx5_traffic_disable(struct rte_eth_dev *dev);
 int mlx5_traffic_restart(struct rte_eth_dev *dev);
+int mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
+				   struct rte_hairpin_peer_info *current_info,
+				   struct rte_hairpin_peer_info *peer_info,
+				   uint32_t direction);
+int mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
+				 struct rte_hairpin_peer_info *peer_info,
+				 uint32_t direction);
+int mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
+				   uint32_t direction);
+int mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port);
+int mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port);
 
 /* mlx5_flow.c */
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index b243b6f..b50b643 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -184,6 +184,7 @@  struct mlx5_rxq_ctrl {
 	void *wq_umem; /* WQ buffer registration info. */
 	void *cq_umem; /* CQ buffer registration info. */
 	struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */
+	uint32_t hairpin_status; /* Hairpin binding status. */
 };
 
 /* TX queue send local data. */
@@ -280,6 +281,7 @@  struct mlx5_txq_ctrl {
 	void *bf_reg; /* BlueFlame register from Verbs. */
 	uint16_t dump_file_n; /* Number of dump files. */
 	struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */
+	uint32_t hairpin_status; /* Hairpin binding status. */
 	struct mlx5_txq_data txq; /* Data path structure. */
 	/* Must be the last field in the structure, contains elts[]. */
 };
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7735f02..800645e 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -203,7 +203,7 @@ 
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_hairpin_bind(struct rte_eth_dev *dev)
+mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
@@ -281,6 +281,613 @@ 
 	return -rte_errno;
 }
 
+/*
+ * Fetch the peer queue's SW & HW information.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param peer_queue
+ *   Index of the queue to fetch the information.
+ * @param current_info
+ *   Pointer to the input peer information, not used currently.
+ * @param peer_info
+ *   Pointer to the structure to store the information, output.
+ * @param direction
+ *   Positive to get the RxQ information, zero to get the TxQ information.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
+			       struct rte_hairpin_peer_info *current_info,
+			       struct rte_hairpin_peer_info *peer_info,
+			       uint32_t direction)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	(void)current_info;
+
+	if (dev->data->dev_started == 0) {
+		rte_errno = EBUSY;
+		DRV_LOG(ERR, "peer port %u is not started",
+			dev->data->port_id);
+		return -rte_errno;
+	}
+	/*
+	 * Peer port used as egress. In the current design, hairpin Tx queue
+	 * will be bound to the peer Rx queue. Indeed, only the information of
+	 * peer Rx queue needs to be fetched.
+	 */
+	if (direction == 0) {
+		struct mlx5_txq_ctrl *txq_ctrl;
+
+		txq_ctrl = mlx5_txq_get(dev, peer_queue);
+		if (!txq_ctrl) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
+				dev->data->port_id, peer_queue);
+			return -rte_errno;
+		}
+		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
+				dev->data->port_id, peer_queue);
+			mlx5_txq_release(dev, peer_queue);
+			return -rte_errno;
+		}
+		if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
+			rte_errno = ENOMEM;
+			DRV_LOG(ERR, "port %u no Txq object found: %d",
+				dev->data->port_id, peer_queue);
+			mlx5_txq_release(dev, peer_queue);
+			return -rte_errno;
+		}
+		peer_info->qp_id = txq_ctrl->obj->sq->id;
+		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
+		/* 1-to-1 mapping, only the first one is used. */
+		peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
+		peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
+		peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
+		mlx5_txq_release(dev, peer_queue);
+	} else { /* Peer port used as ingress. */
+		struct mlx5_rxq_ctrl *rxq_ctrl;
+
+		rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
+		if (!rxq_ctrl) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
+				dev->data->port_id, peer_queue);
+			return -rte_errno;
+		}
+		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
+				dev->data->port_id, peer_queue);
+			mlx5_rxq_release(dev, peer_queue);
+			return -rte_errno;
+		}
+		if (!rxq_ctrl->obj || !rxq_ctrl->obj->rq) {
+			rte_errno = ENOMEM;
+			DRV_LOG(ERR, "port %u no Rxq object found: %d",
+				dev->data->port_id, peer_queue);
+			mlx5_rxq_release(dev, peer_queue);
+			return -rte_errno;
+		}
+		peer_info->qp_id = rxq_ctrl->obj->rq->id;
+		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
+		peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
+		peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
+		peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
+		mlx5_rxq_release(dev, peer_queue);
+	}
+	return 0;
+}
+
+/*
+ * Bind the hairpin queue with the peer HW information.
+ * This needs to be called twice both for Tx and Rx queues of a pair.
+ * If the queue is already bound, it is considered successful.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param cur_queue
+ *   Index of the queue to change the HW configuration to bind.
+ * @param peer_info
+ *   Pointer to information of the peer queue.
+ * @param direction
+ *   Positive to configure the TxQ, zero to configure the RxQ.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
+			     struct rte_hairpin_peer_info *peer_info,
+			     uint32_t direction)
+{
+	int ret = 0;
+
+	/*
+	 * Consistency checking of the peer queue: opposite direction is used
+	 * to get the peer queue info with ethdev port ID, no need to check.
+	 */
+	if (peer_info->peer_q != cur_queue) {
+		rte_errno = EINVAL;
+		DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
+			dev->data->port_id, cur_queue, peer_info->peer_q);
+		return -rte_errno;
+	}
+	if (direction != 0) {
+		struct mlx5_txq_ctrl *txq_ctrl;
+		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
+
+		txq_ctrl = mlx5_txq_get(dev, cur_queue);
+		if (!txq_ctrl) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
+				dev->data->port_id, cur_queue);
+			return -rte_errno;
+		}
+		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
+				dev->data->port_id, cur_queue);
+			mlx5_txq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
+			rte_errno = ENOMEM;
+			DRV_LOG(ERR, "port %u no Txq object found: %d",
+				dev->data->port_id, cur_queue);
+			mlx5_txq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		if (txq_ctrl->hairpin_status) {
+			DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
+				dev->data->port_id, cur_queue);
+			mlx5_txq_release(dev, cur_queue);
+			return 0;
+		}
+		/*
+		 * All queues' of one port consistency checking is done in the
+		 * bind() function, and that is optional.
+		 */
+		if (peer_info->tx_explicit !=
+		    txq_ctrl->hairpin_conf.tx_explicit) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
+				" mismatch", dev->data->port_id, cur_queue);
+			mlx5_txq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		if (peer_info->manual_bind !=
+		    txq_ctrl->hairpin_conf.manual_bind) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
+				" mismatch", dev->data->port_id, cur_queue);
+			mlx5_txq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		sq_attr.state = MLX5_SQC_STATE_RDY;
+		sq_attr.sq_state = MLX5_SQC_STATE_RST;
+		sq_attr.hairpin_peer_rq = peer_info->qp_id;
+		sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
+		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
+		if (ret == 0)
+			txq_ctrl->hairpin_status = 1;
+		mlx5_txq_release(dev, cur_queue);
+	} else {
+		struct mlx5_rxq_ctrl *rxq_ctrl;
+		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
+
+		rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
+		if (!rxq_ctrl) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
+				dev->data->port_id, cur_queue);
+			return -rte_errno;
+		}
+		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
+				dev->data->port_id, cur_queue);
+			mlx5_rxq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		if (!rxq_ctrl->obj || !rxq_ctrl->obj->rq) {
+			rte_errno = ENOMEM;
+			DRV_LOG(ERR, "port %u no Rxq object found: %d",
+				dev->data->port_id, cur_queue);
+			mlx5_rxq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		if (rxq_ctrl->hairpin_status) {
+			DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
+				dev->data->port_id, cur_queue);
+			mlx5_rxq_release(dev, cur_queue);
+			return 0;
+		}
+		if (peer_info->tx_explicit !=
+		    rxq_ctrl->hairpin_conf.tx_explicit) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
+				" mismatch", dev->data->port_id, cur_queue);
+			mlx5_rxq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		if (peer_info->manual_bind !=
+		    rxq_ctrl->hairpin_conf.manual_bind) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
+				" mismatch", dev->data->port_id, cur_queue);
+			mlx5_rxq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		rq_attr.state = MLX5_SQC_STATE_RDY;
+		rq_attr.rq_state = MLX5_SQC_STATE_RST;
+		rq_attr.hairpin_peer_sq = peer_info->qp_id;
+		rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
+		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
+		if (ret == 0)
+			rxq_ctrl->hairpin_status = 1;
+		mlx5_rxq_release(dev, cur_queue);
+	}
+	return ret;
+}
+
+/*
+ * Unbind the hairpin queue and reset its HW configuration.
+ * This needs to be called twice both for Tx and Rx queues of a pair.
+ * If the queue is already unbound, it is considered successful.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param cur_queue
+ *   Index of the queue to change the HW configuration to unbind.
+ * @param direction
+ *   Positive to reset the TxQ, zero to reset the RxQ.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
+			       uint32_t direction)
+{
+	int ret = 0;
+
+	if (direction != 0) {
+		struct mlx5_txq_ctrl *txq_ctrl;
+		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
+
+		txq_ctrl = mlx5_txq_get(dev, cur_queue);
+		if (!txq_ctrl) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
+				dev->data->port_id, cur_queue);
+			return -rte_errno;
+		}
+		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
+				dev->data->port_id, cur_queue);
+			mlx5_txq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
+			rte_errno = ENOMEM;
+			DRV_LOG(ERR, "port %u no Txq object found: %d",
+				dev->data->port_id, cur_queue);
+			mlx5_txq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		/* Already unbound, 0 returns. */
+		if (txq_ctrl->hairpin_status == 0) {
+			mlx5_txq_release(dev, cur_queue);
+			DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
+				dev->data->port_id, cur_queue);
+			return 0;
+		}
+		sq_attr.state = MLX5_SQC_STATE_RST;
+		sq_attr.sq_state = MLX5_SQC_STATE_RST;
+		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
+		if (ret == 0)
+			txq_ctrl->hairpin_status = 0;
+		mlx5_txq_release(dev, cur_queue);
+	} else {
+		struct mlx5_rxq_ctrl *rxq_ctrl;
+		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
+
+		rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
+		if (!rxq_ctrl) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
+				dev->data->port_id, cur_queue);
+			return -rte_errno;
+		}
+		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
+				dev->data->port_id, cur_queue);
+			mlx5_rxq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		if (!rxq_ctrl->obj || !rxq_ctrl->obj->rq) {
+			rte_errno = ENOMEM;
+			DRV_LOG(ERR, "port %u no Rxq object found: %d",
+				dev->data->port_id, cur_queue);
+			mlx5_rxq_release(dev, cur_queue);
+			return -rte_errno;
+		}
+		if (!rxq_ctrl->hairpin_status) {
+			mlx5_rxq_release(dev, cur_queue);
+			DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
+				dev->data->port_id, cur_queue);
+			return 0;
+		}
+		rq_attr.state = MLX5_SQC_STATE_RST;
+		rq_attr.rq_state = MLX5_SQC_STATE_RST;
+		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
+		if (ret == 0)
+			rxq_ctrl->hairpin_status = 0;
+		mlx5_rxq_release(dev, cur_queue);
+	}
+	return ret;
+}
+
+/*
+ * Bind the hairpin port pairs, from the Tx to the peer Rx.
+ * This function only supports to bind the Tx to one Rx.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param rx_port
+ *   Port identifier of the Rx port.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	int ret = 0;
+	struct mlx5_txq_ctrl *txq_ctrl;
+	uint32_t i, j;
+	struct rte_hairpin_peer_info peer;
+	struct rte_hairpin_peer_info cur;
+	const struct rte_eth_hairpin_conf *conf;
+	uint16_t num_q = 0;
+	uint16_t local_port = priv->dev_data->port_id;
+	uint32_t manual;
+	uint32_t explicit;
+	uint16_t rx_queue;
+
+	/*
+	 * Before binding TxQ to peer RxQ, first round loop will be used for
+	 * checking the queues' configuration consistency. This would be a
+	 * little time consuming but better than doing the rollback.
+	 */
+	for (i = 0; i != priv->txqs_n; i++) {
+		txq_ctrl = mlx5_txq_get(dev, i);
+		if (!txq_ctrl)
+			continue;
+		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
+			mlx5_txq_release(dev, i);
+			continue;
+		}
+		/*
+		 * All hairpin Tx queues of a single port that connected to the
+		 * same peer Rx port should have the same "auto binding" and
+		 * "implicit Tx flow" modes.
+		 * Peer consistency checking will be done in per queue binding.
+		 */
+		conf = &txq_ctrl->hairpin_conf;
+		if (conf->peers[0].port == rx_port) {
+			if (num_q == 0) {
+				manual = conf->manual_bind;
+				explicit = conf->tx_explicit;
+			} else {
+				if (manual != conf->manual_bind ||
+				    explicit != conf->tx_explicit) {
+					mlx5_txq_release(dev, i);
+					rte_errno = EINVAL;
+					DRV_LOG(ERR, "port %u queue %d mode"
+						" mismatch: %u %u, %u %u",
+						local_port, i, manual,
+						conf->manual_bind, explicit,
+						conf->tx_explicit);
+					return -rte_errno;
+				}
+			}
+			num_q++;
+		}
+		mlx5_txq_release(dev, i);
+	}
+	/* Once no queue is configured, success is returned directly. */
+	if (num_q == 0)
+		return ret;
+	/* All the hairpin TX queues need to be traversed again. */
+	for (i = 0; i != priv->txqs_n; i++) {
+		txq_ctrl = mlx5_txq_get(dev, i);
+		if (!txq_ctrl)
+			continue;
+		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
+			mlx5_txq_release(dev, i);
+			continue;
+		}
+		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
+			mlx5_txq_release(dev, i);
+			continue;
+		}
+		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
+		/*
+		 * Fetch peer RxQ's information.
+		 * No need to pass the information of the current queue.
+		 */
+		ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
+							NULL, &peer, 1);
+		if (ret != 0) {
+			mlx5_txq_release(dev, i);
+			goto error;
+		}
+		/* Accessing its own device, inside mlx5 PMD. */
+		ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
+		if (ret != 0) {
+			mlx5_txq_release(dev, i);
+			goto error;
+		}
+		/* Pass TxQ's information to peer RxQ and try binding. */
+		cur.peer_q = rx_queue;
+		cur.qp_id = txq_ctrl->obj->sq->id;
+		cur.vhca_id = priv->config.hca_attr.vhca_id;
+		cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
+		cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
+		/*
+		 * In order to access another device in a proper way, RTE level
+		 * private function is needed.
+		 */
+		ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
+						      &cur, 0);
+		if (ret != 0) {
+			mlx5_txq_release(dev, i);
+			goto error;
+		}
+		mlx5_txq_release(dev, i);
+	}
+	return 0;
+error:
+	/*
+	 * Do roll-back process for the queues already bound.
+	 * No need to check the return value of the queue unbind function.
+	 */
+	for (j = i; j != 0; j--) {
+		/* No validation is needed here. */
+		txq_ctrl = mlx5_txq_get(dev, j);
+		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
+		rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
+		mlx5_hairpin_queue_peer_unbind(dev, j, 1);
+		mlx5_txq_release(dev, j);
+	}
+	return ret;
+}
+
+/*
+ * Unbind the hairpin port pair, HW configuration of both devices will be clear
+ * and status will be reset for all the queues used between the them.
+ * This function only supports to unbind the Tx from one Rx.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param rx_port
+ *   Port identifier of the Rx port.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_txq_ctrl *txq_ctrl;
+	uint32_t i;
+	int ret;
+	uint16_t cur_port = priv->dev_data->port_id;
+
+	for (i = 0; i != priv->txqs_n; i++) {
+		uint16_t rx_queue;
+
+		txq_ctrl = mlx5_txq_get(dev, i);
+		if (!txq_ctrl)
+			continue;
+		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
+			mlx5_txq_release(dev, i);
+			continue;
+		}
+		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
+			mlx5_txq_release(dev, i);
+			continue;
+		}
+		/* Indeed, only the first used queue needs to be checked. */
+		if (txq_ctrl->hairpin_conf.manual_bind != 0) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "port %u and port %u is in auto-bind mode",
+				cur_port, rx_port);
+			mlx5_txq_release(dev, i);
+			return -rte_errno;
+		}
+		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
+		mlx5_txq_release(dev, i);
+		ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
+		if (ret) {
+			DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
+				rx_port, rx_queue);
+			return ret;
+		}
+		ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
+		if (ret) {
+			DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
+				cur_port, i);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
+ * @see mlx5_hairpin_bind_single_port()
+ */
+int
+mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
+{
+	int ret = 0;
+	uint16_t p, pp;
+
+	/*
+	 * If the Rx port has no hairpin configuration with the current port,
+	 * the binding will be skipped in the called function of single port.
+	 * Device started status will be checked only before the queue
+	 * information updating.
+	 */
+	if (rx_port == RTE_MAX_ETHPORTS) {
+		RTE_ETH_FOREACH_DEV(p) {
+			ret = mlx5_hairpin_bind_single_port(dev, p);
+			if (ret != 0)
+				goto unbind;
+		}
+		return ret;
+	} else {
+		return mlx5_hairpin_bind_single_port(dev, rx_port);
+	}
+unbind:
+	RTE_ETH_FOREACH_DEV(pp)
+		if (pp < p)
+			mlx5_hairpin_unbind_single_port(dev, pp);
+	return ret;
+}
+
+/*
+ * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
+ * @see mlx5_hairpin_unbind_single_port()
+ */
+int
+mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
+{
+	int ret = 0;
+	uint16_t p;
+
+	if (rx_port == RTE_MAX_ETHPORTS)
+		RTE_ETH_FOREACH_DEV(p) {
+			ret = mlx5_hairpin_unbind_single_port(dev, p);
+			if (ret != 0)
+				return ret;
+		}
+	else
+		ret = mlx5_hairpin_bind_single_port(dev, rx_port);
+	return ret;
+}
+
 /**
  * DPDK callback to start the device.
  *
@@ -332,7 +939,7 @@ 
 			dev->data->port_id, strerror(rte_errno));
 		goto error;
 	}
-	ret = mlx5_hairpin_bind(dev);
+	ret = mlx5_hairpin_auto_bind(dev);
 	if (ret) {
 		DRV_LOG(ERR, "port %u hairpin binding failed: %s",
 			dev->data->port_id, strerror(rte_errno));