diff mbox

[dpdk-dev,v2,09/11] enic: optimize the Tx function

Message ID 1464071579-30072-10-git-send-email-johndale@cisco.com (mailing list archive)
State Superseded, archived
Delegated to: Bruce Richardson
Headers show

Commit Message

John Daley May 24, 2016, 6:32 a.m. UTC
Reduce host CPU overhead of Tx packet processing:
* Use local variables inside per packet loop instead of fields in structs.
* Factor book keeping and conditionals out of the per packet loop where
  possible.
* Post buffers to the nic at a maximum of every 64 packets

Signed-off-by: Nelson Escobar <neescoba@cisco.com>
Signed-off-by: John Daley <johndale@cisco.com>
---
 drivers/net/enic/base/vnic_wq.h |   1 +
 drivers/net/enic/enic_res.h     |   2 +-
 drivers/net/enic/enic_rxtx.c    | 167 +++++++++++++++++++---------------------
 3 files changed, 83 insertions(+), 87 deletions(-)

Comments

Piotr Azarewicz May 30, 2016, 10:05 a.m. UTC | #1
Hi,

>  uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  	uint16_t nb_pkts)
>  {
>  	uint16_t index;
> -	unsigned int frags;
> -	unsigned int pkt_len;
> -	unsigned int seg_len;
> -	unsigned int inc_len;
> +	unsigned int pkt_len, data_len;
>  	unsigned int nb_segs;
> -	struct rte_mbuf *tx_pkt, *next_tx_pkt;
> +	struct rte_mbuf *tx_pkt;
>  	struct vnic_wq *wq = (struct vnic_wq *)tx_queue;
>  	struct enic *enic = vnic_dev_priv(wq->vdev);
>  	unsigned short vlan_id;
>  	unsigned short ol_flags;

Above ol_flags  should be uint64_t.

> -	uint8_t last_seg, eop;
> -	unsigned int host_tx_descs = 0;
> +	unsigned int wq_desc_avail;
> +	int head_idx;
> +	struct vnic_wq_buf *buf;
> +	unsigned int hw_ip_cksum_enabled;
> +	unsigned int desc_count;
> +	struct wq_enet_desc *descs, *desc_p, desc_tmp;
> +	uint16_t mss;
> +	uint8_t vlan_tag_insert;
> +	uint8_t eop;
> +	uint64_t bus_addr;
> 
> +	enic_cleanup_wq(enic, wq);
> +	wq_desc_avail = vnic_wq_desc_avail(wq);
> +	head_idx = wq->head_idx;
> +	desc_count = wq->ring.desc_count;
> +
> +	nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX);
> +
> +	hw_ip_cksum_enabled = enic->hw_ip_checksum;
>  	for (index = 0; index < nb_pkts; index++) {
>  		tx_pkt = *tx_pkts++;
> -		inc_len = 0;
>  		nb_segs = tx_pkt->nb_segs;
> -		if (nb_segs > vnic_wq_desc_avail(wq)) {
> +		if (nb_segs > wq_desc_avail) {
>  			if (index > 0)
> -				enic_post_wq_index(wq);
> -
> -			/* wq cleanup and try again */
> -			if (!enic_cleanup_wq(enic, wq) ||
> -				(nb_segs > vnic_wq_desc_avail(wq))) {
> -				return index;
> -			}
> +				goto post;
> +			goto done;
>  		}
> 
>  		pkt_len = tx_pkt->pkt_len;
> +		data_len = tx_pkt->data_len;
>  		vlan_id = tx_pkt->vlan_tci;
>  		ol_flags = tx_pkt->ol_flags;

Cause you may miss a lot of flags in here.
Piotr

> -		for (frags = 0; inc_len < pkt_len; frags++) {
> -			if (!tx_pkt)
> -				break;
> -			next_tx_pkt = tx_pkt->next;
> -			seg_len = tx_pkt->data_len;
> -			inc_len += seg_len;
> -
> -			host_tx_descs++;
> -			last_seg = 0;
> -			eop = 0;
> -			if ((pkt_len == inc_len) || !next_tx_pkt) {
> -				eop = 1;
> -				/* post if last packet in batch or > thresh */
> -				if ((index == (nb_pkts - 1)) ||
> -				   (host_tx_descs > ENIC_TX_POST_THRESH))
> {
> -					last_seg = 1;
> -					host_tx_descs = 0;
> -				}
> +
> +		mss = 0;
> +		vlan_tag_insert = 0;
> +		bus_addr = (dma_addr_t)
> +			   (tx_pkt->buf_physaddr + tx_pkt->data_off);
> +
> +		descs = (struct wq_enet_desc *)wq->ring.descs;
> +		desc_p = descs + head_idx;
> +
> +		eop = (data_len == pkt_len);
> +
> +		if (ol_flags & PKT_TX_VLAN_PKT)
> +			vlan_tag_insert = 1;
> +
> +		if (hw_ip_cksum_enabled && (ol_flags &
> PKT_TX_IP_CKSUM))
> +			mss |= ENIC_CALC_IP_CKSUM;
> +
> +		if (hw_ip_cksum_enabled && (ol_flags &
> PKT_TX_TCP_UDP_CKSUM))
> +			mss |= ENIC_CALC_TCP_UDP_CKSUM;
> +
> +		wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, 0,
> 0, eop,
> +				 eop, 0, vlan_tag_insert, vlan_id, 0);
> +
> +		*desc_p = desc_tmp;
> +		buf = &wq->bufs[head_idx];
> +		buf->mb = (void *)tx_pkt;
> +		head_idx = enic_ring_incr(desc_count, head_idx);
> +		wq_desc_avail--;
> +
> +		if (!eop) {
> +			for (tx_pkt = tx_pkt->next; tx_pkt; tx_pkt =
> +			    tx_pkt->next) {
> +				data_len = tx_pkt->data_len;
> +
> +				if (tx_pkt->next == NULL)
> +					eop = 1;
> +				desc_p = descs + head_idx;
> +				bus_addr = (dma_addr_t)(tx_pkt-
> >buf_physaddr
> +					   + tx_pkt->data_off);
> +				wq_enet_desc_enc((struct wq_enet_desc *)
> +						 &desc_tmp, bus_addr,
> data_len,
> +						 mss, 0, 0, eop, eop, 0,
> +						 vlan_tag_insert, vlan_id, 0);
> +
> +				*desc_p = desc_tmp;
> +				buf = &wq->bufs[head_idx];
> +				buf->mb = (void *)tx_pkt;
> +				head_idx = enic_ring_incr(desc_count,
> head_idx);
> +				wq_desc_avail--;
>  			}
> -			enic_send_pkt(enic, wq, tx_pkt, (unsigned
> short)seg_len,
> -				      !frags, eop, last_seg, ol_flags, vlan_id);
> -			tx_pkt = next_tx_pkt;
>  		}
>  	}
> + post:
> +	rte_wmb();
> +	iowrite32(head_idx, &wq->ctrl->posted_index);
> + done:
> +	wq->ring.desc_avail = wq_desc_avail;
> +	wq->head_idx = head_idx;
> 
> -	enic_cleanup_wq(enic, wq);
>  	return index;
>  }
> +
> +
> --
> 2.7.0
diff mbox

Patch

diff --git a/drivers/net/enic/base/vnic_wq.h b/drivers/net/enic/base/vnic_wq.h
index 689b81c..7a66813 100644
--- a/drivers/net/enic/base/vnic_wq.h
+++ b/drivers/net/enic/base/vnic_wq.h
@@ -67,6 +67,7 @@  struct vnic_wq_ctrl {
 
 /* 16 bytes */
 struct vnic_wq_buf {
+	struct rte_mempool *pool;
 	void *mb;
 };
 
diff --git a/drivers/net/enic/enic_res.h b/drivers/net/enic/enic_res.h
index 955db71..3c8e303 100644
--- a/drivers/net/enic/enic_res.h
+++ b/drivers/net/enic/enic_res.h
@@ -53,7 +53,7 @@ 
 
 #define ENIC_NON_TSO_MAX_DESC		16
 #define ENIC_DEFAULT_RX_FREE_THRESH	32
-#define ENIC_TX_POST_THRESH		(ENIC_MIN_WQ_DESCS / 2)
+#define ENIC_TX_XMIT_MAX		64
 
 #define ENIC_SETTING(enic, f) ((enic->config.flags & VENETF_##f) ? 1 : 0)
 
diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c
index ec8d90a..ba15670 100644
--- a/drivers/net/enic/enic_rxtx.c
+++ b/drivers/net/enic/enic_rxtx.c
@@ -374,114 +374,109 @@  unsigned int enic_cleanup_wq(__rte_unused struct enic *enic, struct vnic_wq *wq)
 	return 0;
 }
 
-void enic_post_wq_index(struct vnic_wq *wq)
-{
-	enic_vnic_post_wq_index(wq);
-}
-
-void enic_send_pkt(struct enic *enic, struct vnic_wq *wq,
-		   struct rte_mbuf *tx_pkt, unsigned short len,
-		   uint8_t sop, uint8_t eop, uint8_t cq_entry,
-		   uint16_t ol_flags, uint16_t vlan_tag)
-{
-	struct wq_enet_desc *desc, *descs;
-	uint16_t mss = 0;
-	uint8_t vlan_tag_insert = 0;
-	uint64_t bus_addr = (dma_addr_t)
-	    (tx_pkt->buf_physaddr + tx_pkt->data_off);
-
-	descs = (struct wq_enet_desc *)wq->ring.descs;
-	desc = descs + wq->head_idx;
-
-	if (sop) {
-		if (ol_flags & PKT_TX_VLAN_PKT)
-			vlan_tag_insert = 1;
-
-		if (enic->hw_ip_checksum) {
-			if (ol_flags & PKT_TX_IP_CKSUM)
-				mss |= ENIC_CALC_IP_CKSUM;
-
-			if (ol_flags & PKT_TX_TCP_UDP_CKSUM)
-				mss |= ENIC_CALC_TCP_UDP_CKSUM;
-		}
-	}
-
-	wq_enet_desc_enc(desc,
-		bus_addr,
-		len,
-		mss,
-		0 /* header_length */,
-		0 /* offload_mode WQ_ENET_OFFLOAD_MODE_CSUM */,
-		eop,
-		cq_entry,
-		0 /* fcoe_encap */,
-		vlan_tag_insert,
-		vlan_tag,
-		0 /* loopback */);
-
-	enic_vnic_post_wq(wq, (void *)tx_pkt, cq_entry);
-}
-
 uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t nb_pkts)
 {
 	uint16_t index;
-	unsigned int frags;
-	unsigned int pkt_len;
-	unsigned int seg_len;
-	unsigned int inc_len;
+	unsigned int pkt_len, data_len;
 	unsigned int nb_segs;
-	struct rte_mbuf *tx_pkt, *next_tx_pkt;
+	struct rte_mbuf *tx_pkt;
 	struct vnic_wq *wq = (struct vnic_wq *)tx_queue;
 	struct enic *enic = vnic_dev_priv(wq->vdev);
 	unsigned short vlan_id;
 	unsigned short ol_flags;
-	uint8_t last_seg, eop;
-	unsigned int host_tx_descs = 0;
+	unsigned int wq_desc_avail;
+	int head_idx;
+	struct vnic_wq_buf *buf;
+	unsigned int hw_ip_cksum_enabled;
+	unsigned int desc_count;
+	struct wq_enet_desc *descs, *desc_p, desc_tmp;
+	uint16_t mss;
+	uint8_t vlan_tag_insert;
+	uint8_t eop;
+	uint64_t bus_addr;
 
+	enic_cleanup_wq(enic, wq);
+	wq_desc_avail = vnic_wq_desc_avail(wq);
+	head_idx = wq->head_idx;
+	desc_count = wq->ring.desc_count;
+
+	nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX);
+
+	hw_ip_cksum_enabled = enic->hw_ip_checksum;
 	for (index = 0; index < nb_pkts; index++) {
 		tx_pkt = *tx_pkts++;
-		inc_len = 0;
 		nb_segs = tx_pkt->nb_segs;
-		if (nb_segs > vnic_wq_desc_avail(wq)) {
+		if (nb_segs > wq_desc_avail) {
 			if (index > 0)
-				enic_post_wq_index(wq);
-
-			/* wq cleanup and try again */
-			if (!enic_cleanup_wq(enic, wq) ||
-				(nb_segs > vnic_wq_desc_avail(wq))) {
-				return index;
-			}
+				goto post;
+			goto done;
 		}
 
 		pkt_len = tx_pkt->pkt_len;
+		data_len = tx_pkt->data_len;
 		vlan_id = tx_pkt->vlan_tci;
 		ol_flags = tx_pkt->ol_flags;
-		for (frags = 0; inc_len < pkt_len; frags++) {
-			if (!tx_pkt)
-				break;
-			next_tx_pkt = tx_pkt->next;
-			seg_len = tx_pkt->data_len;
-			inc_len += seg_len;
-
-			host_tx_descs++;
-			last_seg = 0;
-			eop = 0;
-			if ((pkt_len == inc_len) || !next_tx_pkt) {
-				eop = 1;
-				/* post if last packet in batch or > thresh */
-				if ((index == (nb_pkts - 1)) ||
-				   (host_tx_descs > ENIC_TX_POST_THRESH)) {
-					last_seg = 1;
-					host_tx_descs = 0;
-				}
+
+		mss = 0;
+		vlan_tag_insert = 0;
+		bus_addr = (dma_addr_t)
+			   (tx_pkt->buf_physaddr + tx_pkt->data_off);
+
+		descs = (struct wq_enet_desc *)wq->ring.descs;
+		desc_p = descs + head_idx;
+
+		eop = (data_len == pkt_len);
+
+		if (ol_flags & PKT_TX_VLAN_PKT)
+			vlan_tag_insert = 1;
+
+		if (hw_ip_cksum_enabled && (ol_flags & PKT_TX_IP_CKSUM))
+			mss |= ENIC_CALC_IP_CKSUM;
+
+		if (hw_ip_cksum_enabled && (ol_flags & PKT_TX_TCP_UDP_CKSUM))
+			mss |= ENIC_CALC_TCP_UDP_CKSUM;
+
+		wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, 0, 0, eop,
+				 eop, 0, vlan_tag_insert, vlan_id, 0);
+
+		*desc_p = desc_tmp;
+		buf = &wq->bufs[head_idx];
+		buf->mb = (void *)tx_pkt;
+		head_idx = enic_ring_incr(desc_count, head_idx);
+		wq_desc_avail--;
+
+		if (!eop) {
+			for (tx_pkt = tx_pkt->next; tx_pkt; tx_pkt =
+			    tx_pkt->next) {
+				data_len = tx_pkt->data_len;
+
+				if (tx_pkt->next == NULL)
+					eop = 1;
+				desc_p = descs + head_idx;
+				bus_addr = (dma_addr_t)(tx_pkt->buf_physaddr
+					   + tx_pkt->data_off);
+				wq_enet_desc_enc((struct wq_enet_desc *)
+						 &desc_tmp, bus_addr, data_len,
+						 mss, 0, 0, eop, eop, 0,
+						 vlan_tag_insert, vlan_id, 0);
+
+				*desc_p = desc_tmp;
+				buf = &wq->bufs[head_idx];
+				buf->mb = (void *)tx_pkt;
+				head_idx = enic_ring_incr(desc_count, head_idx);
+				wq_desc_avail--;
 			}
-			enic_send_pkt(enic, wq, tx_pkt, (unsigned short)seg_len,
-				      !frags, eop, last_seg, ol_flags, vlan_id);
-			tx_pkt = next_tx_pkt;
 		}
 	}
+ post:
+	rte_wmb();
+	iowrite32(head_idx, &wq->ctrl->posted_index);
+ done:
+	wq->ring.desc_avail = wq_desc_avail;
+	wq->head_idx = head_idx;
 
-	enic_cleanup_wq(enic, wq);
 	return index;
 }
+
+