[v4] gro : ipv6 changes to support GRO for TCP/ipv6

Message ID 20230606145802.53671-1-kumaraparamesh92@gmail.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [v4] gro : ipv6 changes to support GRO for TCP/ipv6 |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/github-robot: build success github build: passed
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch-unit-testing success Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS

Commit Message

Kumara Parameshwaran June 6, 2023, 2:58 p.m. UTC
  The patch adds GRO support for TCP/ipv6 packets. This does not
include the support for vxlan, udp ipv6 packets.

Signed-off-by: Kumara Parameshwaran <kumaraparamesh92@gmail.com>
---
v1:
	* Changes to support GRO for TCP/ipv6 packets. This does not include
	  vxlan changes. 
	* The GRO is performed only for ipv6 packets that does not contain 
	 extension headers. 
	* The logic for the TCP coalescing remains the same, in ipv6 header 
	  the source address, destination address, flow label, version fields 
	  are expected to be the same. 
	* Re-organised the code to reuse certain tcp functions for both ipv4 and 
	  ipv6 flows.
v2:
	* Fix comments in gro_tcp6.h header file. 

v3:
	* Adderess review comments to fix code duplication for v4 and v6

v4:
	* Addresses review comments for v3, do not use callbacks 

 lib/gro/gro_tcp.c        | 128 +++++++++++++++++++
 lib/gro/gro_tcp.h        | 209 ++++++++++++++++++++++++++++++
 lib/gro/gro_tcp4.c       | 182 ++++++---------------------
 lib/gro/gro_tcp4.h       | 168 +------------------------
 lib/gro/gro_tcp6.c       | 266 +++++++++++++++++++++++++++++++++++++++
 lib/gro/gro_tcp6.h       | 163 ++++++++++++++++++++++++
 lib/gro/gro_vxlan_tcp4.c |  21 ++--
 lib/gro/gro_vxlan_tcp4.h |   3 +-
 lib/gro/meson.build      |   2 +
 lib/gro/rte_gro.c        |  83 +++++++++---
 lib/gro/rte_gro.h        |   3 +
 11 files changed, 895 insertions(+), 333 deletions(-)
 create mode 100644 lib/gro/gro_tcp.c
 create mode 100644 lib/gro/gro_tcp.h
 create mode 100644 lib/gro/gro_tcp6.c
 create mode 100644 lib/gro/gro_tcp6.h
  

Comments

Hu, Jiayu June 8, 2023, 4:05 a.m. UTC | #1
Hi Kumara,

Please see replies inline.

In addition, you need to update the programmer guide in generic_receive_offload_lib.rst,
and release note release_23_07.rst.

Thanks,
Jiayu

> -----Original Message-----
> From: Kumara Parameshwaran <kumaraparamesh92@gmail.com>
> Sent: Tuesday, June 6, 2023 10:58 PM
> To: Hu, Jiayu <jiayu.hu@intel.com>
> Cc: dev@dpdk.org; Kumara Parameshwaran
> <kumaraparamesh92@gmail.com>
> Subject: [PATCH v4] gro : ipv6 changes to support GRO for TCP/ipv6
> 
> The patch adds GRO support for TCP/ipv6 packets. This does not include the
> support for vxlan, udp ipv6 packets.
> 
> Signed-off-by: Kumara Parameshwaran <kumaraparamesh92@gmail.com>> 
> diff --git a/lib/gro/gro_tcp.c b/lib/gro/gro_tcp.c new file mode 100644 index
> 0000000000..02a7d0f8c5
> --- /dev/null
> +++ b/lib/gro/gro_tcp.c

For gro_tcp.c and gro_tcp.h, it's better to add "_internal" in the file name.

> @@ -0,0 +1,128 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2017 Intel Corporation
> + */
> +#include <rte_malloc.h>
> +#include <rte_mbuf.h>
> +#include <rte_ethdev.h>
> +
> +#include "gro_tcp.h"
> +
> +static inline uint32_t
> +find_an_empty_item(struct gro_tcp_item *items,
> +	uint32_t max_item_num)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < max_item_num; i++)
> +		if (items[i].firstseg == NULL)
> +			return i;
> +	return INVALID_ARRAY_INDEX;
> +}
> +
> +uint32_t
> +insert_new_tcp_item(struct rte_mbuf *pkt,
> +		struct gro_tcp_item *items,
> +		uint32_t *item_num,
> +		uint32_t max_item_num,
> +		uint64_t start_time,
> +		uint32_t prev_idx,
> +		uint32_t sent_seq,
> +		uint16_t ip_id,
> +		uint8_t is_atomic)

This function can be inline.

> +{
> +	uint32_t item_idx;
> +
> +	item_idx = find_an_empty_item(items, max_item_num);
> +	if (item_idx == INVALID_ARRAY_INDEX)
> +		return INVALID_ARRAY_INDEX;
> +
> +	items[item_idx].firstseg = pkt;
> +	items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt);
> +	items[item_idx].start_time = start_time;
> +	items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
> +	items[item_idx].sent_seq = sent_seq;
> +	items[item_idx].ip_id = ip_id;
> +	items[item_idx].nb_merged = 1;
> +	items[item_idx].is_atomic = is_atomic;
> +	(*item_num) += 1;
> +
> +	/* if the previous packet exists, chain them together. */
> +	if (prev_idx != INVALID_ARRAY_INDEX) {
> +		items[item_idx].next_pkt_idx =
> +			items[prev_idx].next_pkt_idx;
> +		items[prev_idx].next_pkt_idx = item_idx;
> +	}
> +
> +	return item_idx;
> +}
> +
> +uint32_t
> +delete_tcp_item(struct gro_tcp_item *items, uint32_t item_idx,
> +		uint32_t *item_num,
> +		uint32_t prev_item_idx)
> +{
> +	uint32_t next_idx = items[item_idx].next_pkt_idx;
> +
> +	/* NULL indicates an empty item */
> +	items[item_idx].firstseg = NULL;
> +	(*item_num) -= 1;
> +	if (prev_item_idx != INVALID_ARRAY_INDEX)
> +		items[prev_item_idx].next_pkt_idx = next_idx;
> +
> +	return next_idx;
> +}

This function can be inline.

> +
> +int32_t
> +gro_process_tcp_item(struct rte_mbuf *pkt,
> +	struct rte_tcp_hdr *tcp_hdr,
> +	int32_t tcp_dl,
> +	struct gro_tcp_item *items,
> +	uint32_t item_idx,
> +	uint32_t *item_num,
> +	uint32_t max_item_num,
> +	uint16_t ip_id,
> +	uint8_t is_atomic,
> +	uint64_t start_time)

It is for internal use, so it's better to remove "gro_" from the function name.

> +{
> +	uint32_t cur_idx;
> +	uint32_t prev_idx;
> +	int cmp;
> +	uint32_t sent_seq;
> +
> +	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> +	/*
> +	 * Check all packets in the flow and try to find a neighbor for
> +	 * the input packet.
> +	 */
> +	cur_idx = item_idx;
> +	prev_idx = cur_idx;
> +	do {
> +		cmp = check_seq_option(&items[cur_idx], tcp_hdr,
> +				sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
> +				is_atomic);
> +		if (cmp) {
> +			if (merge_two_tcp_packets(&items[cur_idx],
> +						pkt, cmp, sent_seq, ip_id, 0))
> +				return 1;
> +			/*
> +			 * Fail to merge the two packets, as the packet
> +			 * length is greater than the max value. Store
> +			 * the packet into the flow.
> +			 */
> +			if (insert_new_tcp_item(pkt, items, item_num,
> max_item_num, start_time, cur_idx,
> +						sent_seq, ip_id, is_atomic) ==
> +					INVALID_ARRAY_INDEX)
> +				return -1;
> +			return 0;
> +		}
> +		prev_idx = cur_idx;
> +		cur_idx = items[cur_idx].next_pkt_idx;
> +	} while (cur_idx != INVALID_ARRAY_INDEX);
> +
> +	/* Fail to find a neighbor, so store the packet into the flow. */
> +	if (insert_new_tcp_item(pkt, items, item_num, max_item_num,
> start_time, prev_idx, sent_seq,
> +				ip_id, is_atomic) == INVALID_ARRAY_INDEX)
> +		return -1;
> +
> +	return 0;
> +}
> diff --git a/lib/gro/gro_tcp.h b/lib/gro/gro_tcp.h new file mode 100644 index
> 0000000000..4b5b4eda9c
> --- /dev/null
> +++ b/lib/gro/gro_tcp.h
> @@ -0,0 +1,209 @@
> +#ifndef _GRO_TCP_H_
> +#define _GRO_TCP_H_
> +
> +#define INVALID_ARRAY_INDEX 0xffffffffUL
> +
> +#include <rte_tcp.h>
> +
> +/*
> + * The max length of a IPv4 packet, which includes the length of the L3
> + * header, the L4 header and the data payload.
> + */
> +#define MAX_IP_PKT_LENGTH UINT16_MAX
> +
> +/* The maximum TCP header length */
> +#define MAX_TCP_HLEN 60
> +#define INVALID_TCP_HDRLEN(len) \
> +	(((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN))
> +
> +struct gro_tcp_flow {

This structure name is confusing. In the upper layer, tcp4 and tcp6 have gro_tcp4_flow
and gro_tcp6_flow, which represent a flow. Inside gro_tcp4/6_flow, there are keys,
represented by struct tcp4/6_flow_key. But inside struct tcp4/6_flow_key, there is
struct gro_tcp_flow. Need to rename struct gro_tcp_flow, like common_tcp_flow_key.

> +	struct rte_ether_addr eth_saddr;
> +	struct rte_ether_addr eth_daddr;
> +	uint32_t recv_ack;
> +	uint16_t src_port;
> +	uint16_t dst_port;
> +};
> +
> +#define ASSIGN_TCP_FLOW_KEY(k1, k2) \

Ditto. The macro needs rename, like ASSIGN_COMMON_TCP_FLOW_KEY.

> +	do {\
> +		rte_ether_addr_copy(&(k1->eth_saddr), &(k2->eth_saddr)); \
> +		rte_ether_addr_copy(&(k1->eth_daddr), &(k2->eth_daddr));
> \
> +		k2->recv_ack = k1->recv_ack; \
> +		k2->src_port = k1->src_port; \
> +		k2->dst_port = k1->dst_port; \
> +	} while (0)
> +
> +struct gro_tcp_item {
> +	/*
> +	 * The first MBUF segment of the packet. If the value
> +	 * is NULL, it means the item is empty.
> +	 */
> +	struct rte_mbuf *firstseg;
> +	/* The last MBUF segment of the packet */
> +	struct rte_mbuf *lastseg;
> +	/*
> +	 * The time when the first packet is inserted into the table.
> +	 * This value won't be updated, even if the packet is merged
> +	 * with other packets.
> +	 */
> +	uint64_t start_time;
> +	/*
> +	 * next_pkt_idx is used to chain the packets that
> +	 * are in the same flow but can't be merged together
> +	 * (e.g. caused by packet reordering).
> +	 */
> +	uint32_t next_pkt_idx;
> +	/* TCP sequence number of the packet */
> +	uint32_t sent_seq;
> +	/* IPv4 ID of the packet */
> +	uint16_t ip_id;

The ip_id field is not used by tcp6. It's better to use an union to include ip_id for IPv4 and
an useless member for IPv6 with some comments to avoid confusing.

> +	/* the number of merged packets */
> +	uint16_t nb_merged;
> +	/* Indicate if IPv4 ID can be ignored */
> +	uint8_t is_atomic;
> +};
> +
> +uint32_t
> +insert_new_tcp_item(struct rte_mbuf *pkt,
> +		struct gro_tcp_item *items,
> +		uint32_t *item_num,
> +		uint32_t table_size,
> +		uint64_t start_time,
> +		uint32_t prev_idx,
> +		uint32_t sent_seq,
> +		uint16_t ip_id,
> +		uint8_t is_atomic);
> +
> +uint32_t
> +delete_tcp_item(struct gro_tcp_item *items,
> +		uint32_t item_idx,
> +		uint32_t *item_num,
> +		uint32_t prev_item_idx);
> +
> +int32_t
> +gro_process_tcp_item(struct rte_mbuf *pkt,
> +	struct rte_tcp_hdr *tcp_hdr,
> +	int32_t tcp_dl,
> +	struct gro_tcp_item *items,
> +	uint32_t item_idx,
> +	uint32_t *item_num,
> +	uint32_t table_size,
> +	uint16_t ip_id,
> +	uint8_t is_atomic,
> +	uint64_t start_time);
> +
> +/*
> + * Merge two TCP packets without updating checksums.
> + * If cmp is larger than 0, append the new packet to the
> + * original packet. Otherwise, pre-pend the new packet to
> + * the original packet.
> + */
> +static inline int
> +merge_two_tcp_packets(struct gro_tcp_item *item,
> +		struct rte_mbuf *pkt,
> +		int cmp,
> +		uint32_t sent_seq,
> +		uint16_t ip_id,
> +		uint16_t l2_offset)
> +{
> +	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> +	uint16_t hdr_len, l2_len;
> +
> +	if (cmp > 0) {
> +		pkt_head = item->firstseg;
> +		pkt_tail = pkt;
> +	} else {
> +		pkt_head = pkt;
> +		pkt_tail = item->firstseg;
> +	}
> +
> +	/* check if the IPv4 packet length is greater than the max value */
> +	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
> +		pkt_head->l4_len;
> +	l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
> +	if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
> +				hdr_len > MAX_IP_PKT_LENGTH))
> +		return 0;
> +
> +	/* remove the packet header for the tail packet */
> +	rte_pktmbuf_adj(pkt_tail, hdr_len);
> +
> +	/* chain two packets together */
> +	if (cmp > 0) {
> +		item->lastseg->next = pkt;
> +		item->lastseg = rte_pktmbuf_lastseg(pkt);
> +		/* update IP ID to the larger value */
> +		item->ip_id = ip_id;
> +	} else {
> +		lastseg = rte_pktmbuf_lastseg(pkt);
> +		lastseg->next = item->firstseg;
> +		item->firstseg = pkt;
> +		/* update sent_seq to the smaller value */
> +		item->sent_seq = sent_seq;
> +		item->ip_id = ip_id;
> +	}
> +	item->nb_merged++;
> +
> +	/* update MBUF metadata for the merged packet */
> +	pkt_head->nb_segs += pkt_tail->nb_segs;
> +	pkt_head->pkt_len += pkt_tail->pkt_len;
> +
> +	return 1;
> +}
> +
> +/*
> + * Check if two TCP/IPv4 packets are neighbors.
> + */
> +static inline int
> +check_seq_option(struct gro_tcp_item *item,
> +		struct rte_tcp_hdr *tcph,
> +		uint32_t sent_seq,
> +		uint16_t ip_id,
> +		uint16_t tcp_hl,
> +		uint16_t tcp_dl,
> +		uint16_t l2_offset,
> +		uint8_t is_atomic)
> +{
> +	struct rte_mbuf *pkt_orig = item->firstseg;
> +	char *iph_orig;
> +	struct rte_tcp_hdr *tcph_orig;
> +	uint16_t len, tcp_hl_orig;
> +
> +	iph_orig = (char *)(rte_pktmbuf_mtod(pkt_orig, char *) +
> +			l2_offset + pkt_orig->l2_len);
> +	tcph_orig = (struct rte_tcp_hdr *)(iph_orig + pkt_orig->l3_len);
> +	tcp_hl_orig = pkt_orig->l4_len;
> +
> +	/* Check if TCP option fields equal */
> +	len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct rte_tcp_hdr);
> +	if ((tcp_hl != tcp_hl_orig) || ((len > 0) &&
> +				(memcmp(tcph + 1, tcph_orig + 1,
> +					len) != 0)))
> +		return 0;
> +
> +	/* Don't merge packets whose DF bits are different */
> +	if (unlikely(item->is_atomic ^ is_atomic))
> +		return 0;
> +
> +	/* check if the two packets are neighbors */
> +	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
> +		pkt_orig->l3_len - tcp_hl_orig;
> +	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
> +				(ip_id == item->ip_id + 1)))
> +		/* append the new packet */
> +		return 1;
> +	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
> +				(ip_id + item->nb_merged == item->ip_id)))
> +		/* pre-pend the new packet */
> +		return -1;
> +
> +	return 0;
> +}
> +
> +static inline int
> +is_same_tcp_flow(struct gro_tcp_flow *k1, struct gro_tcp_flow *k2) {
> +	return (!memcmp(k1, k2, sizeof(struct gro_tcp_flow))); }

I think this function needs rename, as the result of this function cannot identify if they are 
same TCP flow.

> +
> +#endif
> diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c index
> 0014096e63..ffc33747c4 100644
> --- a/lib/gro/gro_tcp4.c
> +++ b/lib/gro/gro_tcp4.c
> @@ -7,6 +7,7 @@
>  #include <rte_ethdev.h>
> 
>  #include "gro_tcp4.h"
> +#include "gro_tcp.h"
> 
>  void *
>  gro_tcp4_tbl_create(uint16_t socket_id, @@ -30,7 +31,7 @@
> gro_tcp4_tbl_create(uint16_t socket_id,
>  	if (tbl == NULL)
>  		return NULL;
> 
> -	size = sizeof(struct gro_tcp4_item) * entries_num;
> +	size = sizeof(struct gro_tcp_item) * entries_num;
>  	tbl->items = rte_zmalloc_socket(__func__,
>  			size,
>  			RTE_CACHE_LINE_SIZE,
> @@ -71,18 +72,6 @@ gro_tcp4_tbl_destroy(void *tbl)
>  	rte_free(tcp_tbl);
>  }
> 
> -static inline uint32_t
> -find_an_empty_item(struct gro_tcp4_tbl *tbl) -{
> -	uint32_t i;
> -	uint32_t max_item_num = tbl->max_item_num;
> -
> -	for (i = 0; i < max_item_num; i++)
> -		if (tbl->items[i].firstseg == NULL)
> -			return i;
> -	return INVALID_ARRAY_INDEX;
> -}
> -
>  static inline uint32_t
>  find_an_empty_flow(struct gro_tcp4_tbl *tbl)  { @@ -95,56 +84,6 @@
> find_an_empty_flow(struct gro_tcp4_tbl *tbl)
>  	return INVALID_ARRAY_INDEX;
>  }
> 
> -static inline uint32_t
> -insert_new_item(struct gro_tcp4_tbl *tbl,
> -		struct rte_mbuf *pkt,
> -		uint64_t start_time,
> -		uint32_t prev_idx,
> -		uint32_t sent_seq,
> -		uint16_t ip_id,
> -		uint8_t is_atomic)
> -{
> -	uint32_t item_idx;
> -
> -	item_idx = find_an_empty_item(tbl);
> -	if (item_idx == INVALID_ARRAY_INDEX)
> -		return INVALID_ARRAY_INDEX;
> -
> -	tbl->items[item_idx].firstseg = pkt;
> -	tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt);
> -	tbl->items[item_idx].start_time = start_time;
> -	tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
> -	tbl->items[item_idx].sent_seq = sent_seq;
> -	tbl->items[item_idx].ip_id = ip_id;
> -	tbl->items[item_idx].nb_merged = 1;
> -	tbl->items[item_idx].is_atomic = is_atomic;
> -	tbl->item_num++;
> -
> -	/* if the previous packet exists, chain them together. */
> -	if (prev_idx != INVALID_ARRAY_INDEX) {
> -		tbl->items[item_idx].next_pkt_idx =
> -			tbl->items[prev_idx].next_pkt_idx;
> -		tbl->items[prev_idx].next_pkt_idx = item_idx;
> -	}
> -
> -	return item_idx;
> -}
> -
> -static inline uint32_t
> -delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
> -		uint32_t prev_item_idx)
> -{
> -	uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
> -
> -	/* NULL indicates an empty item */
> -	tbl->items[item_idx].firstseg = NULL;
> -	tbl->item_num--;
> -	if (prev_item_idx != INVALID_ARRAY_INDEX)
> -		tbl->items[prev_item_idx].next_pkt_idx = next_idx;
> -
> -	return next_idx;
> -}
> -
>  static inline uint32_t
>  insert_new_flow(struct gro_tcp4_tbl *tbl,
>  		struct tcp4_flow_key *src,
> @@ -159,13 +98,10 @@ insert_new_flow(struct gro_tcp4_tbl *tbl,
> 
>  	dst = &(tbl->flows[flow_idx].key);
> 
> -	rte_ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
> -	rte_ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
> +	ASSIGN_TCP_FLOW_KEY((&src->tcp_flow), (&dst->tcp_flow));
> +
>  	dst->ip_src_addr = src->ip_src_addr;
>  	dst->ip_dst_addr = src->ip_dst_addr;
> -	dst->recv_ack = src->recv_ack;
> -	dst->src_port = src->src_port;
> -	dst->dst_port = src->dst_port;
> 
>  	tbl->flows[flow_idx].start_index = item_idx;
>  	tbl->flow_num++;
> @@ -173,21 +109,6 @@ insert_new_flow(struct gro_tcp4_tbl *tbl,
>  	return flow_idx;
>  }
> 
> -/*
> - * update the packet length for the flushed packet.
> - */
> -static inline void
> -update_header(struct gro_tcp4_item *item) -{
> -	struct rte_ipv4_hdr *ipv4_hdr;
> -	struct rte_mbuf *pkt = item->firstseg;
> -
> -	ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
> -			pkt->l2_len);
> -	ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len -
> -			pkt->l2_len);
> -}
> -
>  int32_t
>  gro_tcp4_reassemble(struct rte_mbuf *pkt,
>  		struct gro_tcp4_tbl *tbl,
> @@ -195,16 +116,15 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,  {
>  	struct rte_ether_hdr *eth_hdr;
>  	struct rte_ipv4_hdr *ipv4_hdr;
> -	struct rte_tcp_hdr *tcp_hdr;
> -	uint32_t sent_seq;
>  	int32_t tcp_dl;
> +	struct rte_tcp_hdr *tcp_hdr;
>  	uint16_t ip_id, hdr_len, frag_off, ip_tlen;
>  	uint8_t is_atomic;
> +	uint32_t sent_seq;

No need to change tcp_hdr and sent_seq here.

> 
>  	struct tcp4_flow_key key;
> -	uint32_t cur_idx, prev_idx, item_idx;
> +	uint32_t item_idx;
>  	uint32_t i, max_flow_num, remaining_flow_num;
> -	int cmp;
>  	uint8_t find;
> 
>  	/*
> @@ -216,7 +136,7 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> 
>  	eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
>  	ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
> -	tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
> +	tcp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_tcp_hdr *,
> +pkt->l2_len + pkt->l3_len);
>  	hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
> 
>  	/*
> @@ -230,7 +150,6 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>  	ip_tlen = rte_be_to_cpu_16(ipv4_hdr->total_length);
>  	if (pkt->pkt_len > (uint32_t)(ip_tlen + pkt->l2_len))
>  		rte_pktmbuf_trim(pkt, pkt->pkt_len - ip_tlen - pkt->l2_len);
> -
>  	/*
>  	 * Don't process the packet whose payload length is less than or
>  	 * equal to 0.
> @@ -239,6 +158,13 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>  	if (tcp_dl <= 0)
>  		return -1;
> 
> +	rte_ether_addr_copy(&(eth_hdr->src_addr),
> &(key.tcp_flow.eth_saddr));
> +	rte_ether_addr_copy(&(eth_hdr->dst_addr),
> &(key.tcp_flow.eth_daddr));
> +	key.ip_src_addr = ipv4_hdr->src_addr;
> +	key.ip_dst_addr = ipv4_hdr->dst_addr;
> +	key.tcp_flow.src_port = tcp_hdr->src_port;
> +	key.tcp_flow.dst_port = tcp_hdr->dst_port;
> +	key.tcp_flow.recv_ack = tcp_hdr->recv_ack;
>  	/*
>  	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
>  	 * whose DF bit is 1, IPv4 ID is ignored.
> @@ -246,15 +172,6 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>  	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
>  	is_atomic = (frag_off & RTE_IPV4_HDR_DF_FLAG) ==
> RTE_IPV4_HDR_DF_FLAG;
>  	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
> -	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> -
> -	rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.eth_saddr));
> -	rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.eth_daddr));
> -	key.ip_src_addr = ipv4_hdr->src_addr;
> -	key.ip_dst_addr = ipv4_hdr->dst_addr;
> -	key.src_port = tcp_hdr->src_port;
> -	key.dst_port = tcp_hdr->dst_port;
> -	key.recv_ack = tcp_hdr->recv_ack;
> 
>  	/* Search for a matched flow. */
>  	max_flow_num = tbl->max_flow_num;
> @@ -270,63 +187,44 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>  		}
>  	}
> 
> -	/*
> -	 * Fail to find a matched flow. Insert a new flow and store the
> -	 * packet into the flow.
> -	 */
>  	if (find == 0) {
> -		item_idx = insert_new_item(tbl, pkt, start_time,
> -				INVALID_ARRAY_INDEX, sent_seq, ip_id,
> -				is_atomic);
> +		sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> +		item_idx = insert_new_tcp_item(pkt, tbl->items, &tbl-
> >item_num, tbl->max_item_num, start_time,
> +						INVALID_ARRAY_INDEX,
> sent_seq, ip_id,
> +						is_atomic);
>  		if (item_idx == INVALID_ARRAY_INDEX)
>  			return -1;
>  		if (insert_new_flow(tbl, &key, item_idx) ==
> -				INVALID_ARRAY_INDEX) {
> +			INVALID_ARRAY_INDEX) {
>  			/*
>  			 * Fail to insert a new flow, so delete the
>  			 * stored packet.
> -			 */
> -			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
> +			*/
> +			delete_tcp_item(tbl->items, item_idx, &tbl-
> >item_num,
> +INVALID_ARRAY_INDEX);
>  			return -1;
>  		}
>  		return 0;
>  	}
> +	item_idx = tbl->flows[i].start_index;

No need to update item_idx, and you can directly pass tbl->flows[i].start_index to
gro_process_tcp_item(). And same in gro_tcp6_reassemble().

> 
> -	/*
> -	 * Check all packets in the flow and try to find a neighbor for
> -	 * the input packet.
> -	 */
> -	cur_idx = tbl->flows[i].start_index;
> -	prev_idx = cur_idx;
> -	do {
> -		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
> -				sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
> -				is_atomic);
> -		if (cmp) {
> -			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
> -						pkt, cmp, sent_seq, ip_id, 0))
> -				return 1;
> -			/*
> -			 * Fail to merge the two packets, as the packet
> -			 * length is greater than the max value. Store
> -			 * the packet into the flow.
> -			 */
> -			if (insert_new_item(tbl, pkt, start_time, cur_idx,
> -						sent_seq, ip_id, is_atomic) ==
> -					INVALID_ARRAY_INDEX)
> -				return -1;
> -			return 0;
> -		}
> -		prev_idx = cur_idx;
> -		cur_idx = tbl->items[cur_idx].next_pkt_idx;
> -	} while (cur_idx != INVALID_ARRAY_INDEX);
> +	return gro_process_tcp_item(pkt, tcp_hdr, tcp_dl, tbl->items,
> item_idx,
> +						&tbl->item_num, tbl-
> >max_item_num,
> +						ip_id, is_atomic, start_time);
> +}
> 
> -	/* Fail to find a neighbor, so store the packet into the flow. */
> -	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
> -				ip_id, is_atomic) == INVALID_ARRAY_INDEX)
> -		return -1;
  
Kumara Parameshwaran June 8, 2023, 4:52 p.m. UTC | #2
Hi Jiyau,

Thanks for the quick review comments. Will address the review comments.
Require clarification in one of the comments. Please find it inline.

On Thu, Jun 8, 2023 at 9:35 AM Hu, Jiayu <jiayu.hu@intel.com> wrote:

> Hi Kumara,
>
> Please see replies inline.
>
> In addition, you need to update the programmer guide in
> generic_receive_offload_lib.rst,
> and release note release_23_07.rst.
>
> Thanks,
> Jiayu
>
> > -----Original Message-----
> > From: Kumara Parameshwaran <kumaraparamesh92@gmail.com>
> > Sent: Tuesday, June 6, 2023 10:58 PM
> > To: Hu, Jiayu <jiayu.hu@intel.com>
> > Cc: dev@dpdk.org; Kumara Parameshwaran
> > <kumaraparamesh92@gmail.com>
> > Subject: [PATCH v4] gro : ipv6 changes to support GRO for TCP/ipv6
> >
> > The patch adds GRO support for TCP/ipv6 packets. This does not include
> the
> > support for vxlan, udp ipv6 packets.
> >
> > Signed-off-by: Kumara Parameshwaran <kumaraparamesh92@gmail.com>>
> > diff --git a/lib/gro/gro_tcp.c b/lib/gro/gro_tcp.c new file mode 100644
> index
> > 0000000000..02a7d0f8c5
> > --- /dev/null
> > +++ b/lib/gro/gro_tcp.c
>
> For gro_tcp.c and gro_tcp.h, it's better to add "_internal" in the file
> name.
>
> > @@ -0,0 +1,128 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2017 Intel Corporation
> > + */
> > +#include <rte_malloc.h>
> > +#include <rte_mbuf.h>
> > +#include <rte_ethdev.h>
> > +
> > +#include "gro_tcp.h"
> > +
> > +static inline uint32_t
> > +find_an_empty_item(struct gro_tcp_item *items,
> > +     uint32_t max_item_num)
> > +{
> > +     uint32_t i;
> > +
> > +     for (i = 0; i < max_item_num; i++)
> > +             if (items[i].firstseg == NULL)
> > +                     return i;
> > +     return INVALID_ARRAY_INDEX;
> > +}
> > +
> > +uint32_t
> > +insert_new_tcp_item(struct rte_mbuf *pkt,
> > +             struct gro_tcp_item *items,
> > +             uint32_t *item_num,
> > +             uint32_t max_item_num,
> > +             uint64_t start_time,
> > +             uint32_t prev_idx,
> > +             uint32_t sent_seq,
> > +             uint16_t ip_id,
> > +             uint8_t is_atomic)
>
> This function can be inline.
>
> > +{
> > +     uint32_t item_idx;
> > +
> > +     item_idx = find_an_empty_item(items, max_item_num);
> > +     if (item_idx == INVALID_ARRAY_INDEX)
> > +             return INVALID_ARRAY_INDEX;
> > +
> > +     items[item_idx].firstseg = pkt;
> > +     items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt);
> > +     items[item_idx].start_time = start_time;
> > +     items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
> > +     items[item_idx].sent_seq = sent_seq;
> > +     items[item_idx].ip_id = ip_id;
> > +     items[item_idx].nb_merged = 1;
> > +     items[item_idx].is_atomic = is_atomic;
> > +     (*item_num) += 1;
> > +
> > +     /* if the previous packet exists, chain them together. */
> > +     if (prev_idx != INVALID_ARRAY_INDEX) {
> > +             items[item_idx].next_pkt_idx =
> > +                     items[prev_idx].next_pkt_idx;
> > +             items[prev_idx].next_pkt_idx = item_idx;
> > +     }
> > +
> > +     return item_idx;
> > +}
> > +
> > +uint32_t
> > +delete_tcp_item(struct gro_tcp_item *items, uint32_t item_idx,
> > +             uint32_t *item_num,
> > +             uint32_t prev_item_idx)
> > +{
> > +     uint32_t next_idx = items[item_idx].next_pkt_idx;
> > +
> > +     /* NULL indicates an empty item */
> > +     items[item_idx].firstseg = NULL;
> > +     (*item_num) -= 1;
> > +     if (prev_item_idx != INVALID_ARRAY_INDEX)
> > +             items[prev_item_idx].next_pkt_idx = next_idx;
> > +
> > +     return next_idx;
> > +}
>
> This function can be inline.
>
> > +
> > +int32_t
> > +gro_process_tcp_item(struct rte_mbuf *pkt,
> > +     struct rte_tcp_hdr *tcp_hdr,
> > +     int32_t tcp_dl,
> > +     struct gro_tcp_item *items,
> > +     uint32_t item_idx,
> > +     uint32_t *item_num,
> > +     uint32_t max_item_num,
> > +     uint16_t ip_id,
> > +     uint8_t is_atomic,
> > +     uint64_t start_time)
>
> It is for internal use, so it's better to remove "gro_" from the function
> name.
>
> > +{
> > +     uint32_t cur_idx;
> > +     uint32_t prev_idx;
> > +     int cmp;
> > +     uint32_t sent_seq;
> > +
> > +     sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> > +     /*
> > +      * Check all packets in the flow and try to find a neighbor for
> > +      * the input packet.
> > +      */
> > +     cur_idx = item_idx;
> > +     prev_idx = cur_idx;
> > +     do {
> > +             cmp = check_seq_option(&items[cur_idx], tcp_hdr,
> > +                             sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
> > +                             is_atomic);
> > +             if (cmp) {
> > +                     if (merge_two_tcp_packets(&items[cur_idx],
> > +                                             pkt, cmp, sent_seq, ip_id,
> 0))
> > +                             return 1;
> > +                     /*
> > +                      * Fail to merge the two packets, as the packet
> > +                      * length is greater than the max value. Store
> > +                      * the packet into the flow.
> > +                      */
> > +                     if (insert_new_tcp_item(pkt, items, item_num,
> > max_item_num, start_time, cur_idx,
> > +                                             sent_seq, ip_id,
> is_atomic) ==
> > +                                     INVALID_ARRAY_INDEX)
> > +                             return -1;
> > +                     return 0;
> > +             }
> > +             prev_idx = cur_idx;
> > +             cur_idx = items[cur_idx].next_pkt_idx;
> > +     } while (cur_idx != INVALID_ARRAY_INDEX);
> > +
> > +     /* Fail to find a neighbor, so store the packet into the flow. */
> > +     if (insert_new_tcp_item(pkt, items, item_num, max_item_num,
> > start_time, prev_idx, sent_seq,
> > +                             ip_id, is_atomic) == INVALID_ARRAY_INDEX)
> > +             return -1;
> > +
> > +     return 0;
> > +}
> > diff --git a/lib/gro/gro_tcp.h b/lib/gro/gro_tcp.h new file mode 100644
> index
> > 0000000000..4b5b4eda9c
> > --- /dev/null
> > +++ b/lib/gro/gro_tcp.h
> > @@ -0,0 +1,209 @@
> > +#ifndef _GRO_TCP_H_
> > +#define _GRO_TCP_H_
> > +
> > +#define INVALID_ARRAY_INDEX 0xffffffffUL
> > +
> > +#include <rte_tcp.h>
> > +
> > +/*
> > + * The max length of a IPv4 packet, which includes the length of the L3
> > + * header, the L4 header and the data payload.
> > + */
> > +#define MAX_IP_PKT_LENGTH UINT16_MAX
> > +
> > +/* The maximum TCP header length */
> > +#define MAX_TCP_HLEN 60
> > +#define INVALID_TCP_HDRLEN(len) \
> > +     (((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN))
> > +
> > +struct gro_tcp_flow {
>
> This structure name is confusing. In the upper layer, tcp4 and tcp6 have
> gro_tcp4_flow
> and gro_tcp6_flow, which represent a flow. Inside gro_tcp4/6_flow, there
> are keys,
> represented by struct tcp4/6_flow_key. But inside struct tcp4/6_flow_key,
> there is
> struct gro_tcp_flow. Need to rename struct gro_tcp_flow, like
> common_tcp_flow_key.
>
> > +     struct rte_ether_addr eth_saddr;
> > +     struct rte_ether_addr eth_daddr;
> > +     uint32_t recv_ack;
> > +     uint16_t src_port;
> > +     uint16_t dst_port;
> > +};
> > +
> > +#define ASSIGN_TCP_FLOW_KEY(k1, k2) \
>
> Ditto. The macro needs rename, like ASSIGN_COMMON_TCP_FLOW_KEY.
>
> > +     do {\
> > +             rte_ether_addr_copy(&(k1->eth_saddr), &(k2->eth_saddr)); \
> > +             rte_ether_addr_copy(&(k1->eth_daddr), &(k2->eth_daddr));
> > \
> > +             k2->recv_ack = k1->recv_ack; \
> > +             k2->src_port = k1->src_port; \
> > +             k2->dst_port = k1->dst_port; \
> > +     } while (0)
> > +
> > +struct gro_tcp_item {
> > +     /*
> > +      * The first MBUF segment of the packet. If the value
> > +      * is NULL, it means the item is empty.
> > +      */
> > +     struct rte_mbuf *firstseg;
> > +     /* The last MBUF segment of the packet */
> > +     struct rte_mbuf *lastseg;
> > +     /*
> > +      * The time when the first packet is inserted into the table.
> > +      * This value won't be updated, even if the packet is merged
> > +      * with other packets.
> > +      */
> > +     uint64_t start_time;
> > +     /*
> > +      * next_pkt_idx is used to chain the packets that
> > +      * are in the same flow but can't be merged together
> > +      * (e.g. caused by packet reordering).
> > +      */
> > +     uint32_t next_pkt_idx;
> > +     /* TCP sequence number of the packet */
> > +     uint32_t sent_seq;
> > +     /* IPv4 ID of the packet */
> > +     uint16_t ip_id;
>
> The ip_id field is not used by tcp6. It's better to use an union to
> include ip_id for IPv4 and
> an useless member for IPv6 with some comments to avoid confusing.
>
> > +     /* the number of merged packets */
> > +     uint16_t nb_merged;
> > +     /* Indicate if IPv4 ID can be ignored */
> > +     uint8_t is_atomic;
> > +};
> > +
> > +uint32_t
> > +insert_new_tcp_item(struct rte_mbuf *pkt,
> > +             struct gro_tcp_item *items,
> > +             uint32_t *item_num,
> > +             uint32_t table_size,
> > +             uint64_t start_time,
> > +             uint32_t prev_idx,
> > +             uint32_t sent_seq,
> > +             uint16_t ip_id,
> > +             uint8_t is_atomic);
> > +
> > +uint32_t
> > +delete_tcp_item(struct gro_tcp_item *items,
> > +             uint32_t item_idx,
> > +             uint32_t *item_num,
> > +             uint32_t prev_item_idx);
> > +
> > +int32_t
> > +gro_process_tcp_item(struct rte_mbuf *pkt,
> > +     struct rte_tcp_hdr *tcp_hdr,
> > +     int32_t tcp_dl,
> > +     struct gro_tcp_item *items,
> > +     uint32_t item_idx,
> > +     uint32_t *item_num,
> > +     uint32_t table_size,
> > +     uint16_t ip_id,
> > +     uint8_t is_atomic,
> > +     uint64_t start_time);
> > +
> > +/*
> > + * Merge two TCP packets without updating checksums.
> > + * If cmp is larger than 0, append the new packet to the
> > + * original packet. Otherwise, pre-pend the new packet to
> > + * the original packet.
> > + */
> > +static inline int
> > +merge_two_tcp_packets(struct gro_tcp_item *item,
> > +             struct rte_mbuf *pkt,
> > +             int cmp,
> > +             uint32_t sent_seq,
> > +             uint16_t ip_id,
> > +             uint16_t l2_offset)
> > +{
> > +     struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> > +     uint16_t hdr_len, l2_len;
> > +
> > +     if (cmp > 0) {
> > +             pkt_head = item->firstseg;
> > +             pkt_tail = pkt;
> > +     } else {
> > +             pkt_head = pkt;
> > +             pkt_tail = item->firstseg;
> > +     }
> > +
> > +     /* check if the IPv4 packet length is greater than the max value */
> > +     hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
> > +             pkt_head->l4_len;
> > +     l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
> > +     if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
> > +                             hdr_len > MAX_IP_PKT_LENGTH))
> > +             return 0;
> > +
> > +     /* remove the packet header for the tail packet */
> > +     rte_pktmbuf_adj(pkt_tail, hdr_len);
> > +
> > +     /* chain two packets together */
> > +     if (cmp > 0) {
> > +             item->lastseg->next = pkt;
> > +             item->lastseg = rte_pktmbuf_lastseg(pkt);
> > +             /* update IP ID to the larger value */
> > +             item->ip_id = ip_id;
> > +     } else {
> > +             lastseg = rte_pktmbuf_lastseg(pkt);
> > +             lastseg->next = item->firstseg;
> > +             item->firstseg = pkt;
> > +             /* update sent_seq to the smaller value */
> > +             item->sent_seq = sent_seq;
> > +             item->ip_id = ip_id;
> > +     }
> > +     item->nb_merged++;
> > +
> > +     /* update MBUF metadata for the merged packet */
> > +     pkt_head->nb_segs += pkt_tail->nb_segs;
> > +     pkt_head->pkt_len += pkt_tail->pkt_len;
> > +
> > +     return 1;
> > +}
> > +
> > +/*
> > + * Check if two TCP/IPv4 packets are neighbors.
> > + */
> > +static inline int
> > +check_seq_option(struct gro_tcp_item *item,
> > +             struct rte_tcp_hdr *tcph,
> > +             uint32_t sent_seq,
> > +             uint16_t ip_id,
> > +             uint16_t tcp_hl,
> > +             uint16_t tcp_dl,
> > +             uint16_t l2_offset,
> > +             uint8_t is_atomic)
> > +{
> > +     struct rte_mbuf *pkt_orig = item->firstseg;
> > +     char *iph_orig;
> > +     struct rte_tcp_hdr *tcph_orig;
> > +     uint16_t len, tcp_hl_orig;
> > +
> > +     iph_orig = (char *)(rte_pktmbuf_mtod(pkt_orig, char *) +
> > +                     l2_offset + pkt_orig->l2_len);
> > +     tcph_orig = (struct rte_tcp_hdr *)(iph_orig + pkt_orig->l3_len);
> > +     tcp_hl_orig = pkt_orig->l4_len;
> > +
> > +     /* Check if TCP option fields equal */
> > +     len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct rte_tcp_hdr);
> > +     if ((tcp_hl != tcp_hl_orig) || ((len > 0) &&
> > +                             (memcmp(tcph + 1, tcph_orig + 1,
> > +                                     len) != 0)))
> > +             return 0;
> > +
> > +     /* Don't merge packets whose DF bits are different */
> > +     if (unlikely(item->is_atomic ^ is_atomic))
> > +             return 0;
> > +
> > +     /* check if the two packets are neighbors */
> > +     len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
> > +             pkt_orig->l3_len - tcp_hl_orig;
> > +     if ((sent_seq == item->sent_seq + len) && (is_atomic ||
> > +                             (ip_id == item->ip_id + 1)))
> > +             /* append the new packet */
> > +             return 1;
> > +     else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
> > +                             (ip_id + item->nb_merged == item->ip_id)))
> > +             /* pre-pend the new packet */
> > +             return -1;
> > +
> > +     return 0;
> > +}
> > +
> > +static inline int
> > +is_same_tcp_flow(struct gro_tcp_flow *k1, struct gro_tcp_flow *k2) {
> > +     return (!memcmp(k1, k2, sizeof(struct gro_tcp_flow))); }
>
> I think this function needs rename, as the result of this function cannot
> identify if they are
> same TCP flow.
>
> > +
> > +#endif
> > diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c index
> > 0014096e63..ffc33747c4 100644
> > --- a/lib/gro/gro_tcp4.c
> > +++ b/lib/gro/gro_tcp4.c
> > @@ -7,6 +7,7 @@
> >  #include <rte_ethdev.h>
> >
> >  #include "gro_tcp4.h"
> > +#include "gro_tcp.h"
> >
> >  void *
> >  gro_tcp4_tbl_create(uint16_t socket_id, @@ -30,7 +31,7 @@
> > gro_tcp4_tbl_create(uint16_t socket_id,
> >       if (tbl == NULL)
> >               return NULL;
> >
> > -     size = sizeof(struct gro_tcp4_item) * entries_num;
> > +     size = sizeof(struct gro_tcp_item) * entries_num;
> >       tbl->items = rte_zmalloc_socket(__func__,
> >                       size,
> >                       RTE_CACHE_LINE_SIZE,
> > @@ -71,18 +72,6 @@ gro_tcp4_tbl_destroy(void *tbl)
> >       rte_free(tcp_tbl);
> >  }
> >
> > -static inline uint32_t
> > -find_an_empty_item(struct gro_tcp4_tbl *tbl) -{
> > -     uint32_t i;
> > -     uint32_t max_item_num = tbl->max_item_num;
> > -
> > -     for (i = 0; i < max_item_num; i++)
> > -             if (tbl->items[i].firstseg == NULL)
> > -                     return i;
> > -     return INVALID_ARRAY_INDEX;
> > -}
> > -
> >  static inline uint32_t
> >  find_an_empty_flow(struct gro_tcp4_tbl *tbl)  { @@ -95,56 +84,6 @@
> > find_an_empty_flow(struct gro_tcp4_tbl *tbl)
> >       return INVALID_ARRAY_INDEX;
> >  }
> >
> > -static inline uint32_t
> > -insert_new_item(struct gro_tcp4_tbl *tbl,
> > -             struct rte_mbuf *pkt,
> > -             uint64_t start_time,
> > -             uint32_t prev_idx,
> > -             uint32_t sent_seq,
> > -             uint16_t ip_id,
> > -             uint8_t is_atomic)
> > -{
> > -     uint32_t item_idx;
> > -
> > -     item_idx = find_an_empty_item(tbl);
> > -     if (item_idx == INVALID_ARRAY_INDEX)
> > -             return INVALID_ARRAY_INDEX;
> > -
> > -     tbl->items[item_idx].firstseg = pkt;
> > -     tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt);
> > -     tbl->items[item_idx].start_time = start_time;
> > -     tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
> > -     tbl->items[item_idx].sent_seq = sent_seq;
> > -     tbl->items[item_idx].ip_id = ip_id;
> > -     tbl->items[item_idx].nb_merged = 1;
> > -     tbl->items[item_idx].is_atomic = is_atomic;
> > -     tbl->item_num++;
> > -
> > -     /* if the previous packet exists, chain them together. */
> > -     if (prev_idx != INVALID_ARRAY_INDEX) {
> > -             tbl->items[item_idx].next_pkt_idx =
> > -                     tbl->items[prev_idx].next_pkt_idx;
> > -             tbl->items[prev_idx].next_pkt_idx = item_idx;
> > -     }
> > -
> > -     return item_idx;
> > -}
> > -
> > -static inline uint32_t
> > -delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
> > -             uint32_t prev_item_idx)
> > -{
> > -     uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
> > -
> > -     /* NULL indicates an empty item */
> > -     tbl->items[item_idx].firstseg = NULL;
> > -     tbl->item_num--;
> > -     if (prev_item_idx != INVALID_ARRAY_INDEX)
> > -             tbl->items[prev_item_idx].next_pkt_idx = next_idx;
> > -
> > -     return next_idx;
> > -}
> > -
> >  static inline uint32_t
> >  insert_new_flow(struct gro_tcp4_tbl *tbl,
> >               struct tcp4_flow_key *src,
> > @@ -159,13 +98,10 @@ insert_new_flow(struct gro_tcp4_tbl *tbl,
> >
> >       dst = &(tbl->flows[flow_idx].key);
> >
> > -     rte_ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
> > -     rte_ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
> > +     ASSIGN_TCP_FLOW_KEY((&src->tcp_flow), (&dst->tcp_flow));
> > +
> >       dst->ip_src_addr = src->ip_src_addr;
> >       dst->ip_dst_addr = src->ip_dst_addr;
> > -     dst->recv_ack = src->recv_ack;
> > -     dst->src_port = src->src_port;
> > -     dst->dst_port = src->dst_port;
> >
> >       tbl->flows[flow_idx].start_index = item_idx;
> >       tbl->flow_num++;
> > @@ -173,21 +109,6 @@ insert_new_flow(struct gro_tcp4_tbl *tbl,
> >       return flow_idx;
> >  }
> >
> > -/*
> > - * update the packet length for the flushed packet.
> > - */
> > -static inline void
> > -update_header(struct gro_tcp4_item *item) -{
> > -     struct rte_ipv4_hdr *ipv4_hdr;
> > -     struct rte_mbuf *pkt = item->firstseg;
> > -
> > -     ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
> > -                     pkt->l2_len);
> > -     ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len -
> > -                     pkt->l2_len);
> > -}
> > -
> >  int32_t
> >  gro_tcp4_reassemble(struct rte_mbuf *pkt,
> >               struct gro_tcp4_tbl *tbl,
> > @@ -195,16 +116,15 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,  {
> >       struct rte_ether_hdr *eth_hdr;
> >       struct rte_ipv4_hdr *ipv4_hdr;
> > -     struct rte_tcp_hdr *tcp_hdr;
> > -     uint32_t sent_seq;
> >       int32_t tcp_dl;
> > +     struct rte_tcp_hdr *tcp_hdr;
> >       uint16_t ip_id, hdr_len, frag_off, ip_tlen;
> >       uint8_t is_atomic;
> > +     uint32_t sent_seq;
>
> No need to change tcp_hdr and sent_seq here.
>
>> The flow matching is done in the function and if the flow is not found
>> insert_new_tcp_item is invoked from this function itself. Did you mean to
>> move that to the process_tcp_item as well? If that is the case we should
>> pass the start_idx as INVALID_ARRAY_INDEX and in process_tcp_item check if
>> INVALID_ARRAY_INDEX do a insert_new_tcp_item and return, do not do the
>> sequnce number checks etc.
>>
> >
> >       struct tcp4_flow_key key;
> > -     uint32_t cur_idx, prev_idx, item_idx;
> > +     uint32_t item_idx;
> >       uint32_t i, max_flow_num, remaining_flow_num;
> > -     int cmp;
> >       uint8_t find;
> >
> >       /*
> > @@ -216,7 +136,7 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> >
> >       eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
> >       ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
> > -     tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
> > +     tcp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_tcp_hdr *,
> > +pkt->l2_len + pkt->l3_len);
> >       hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
> >
> >       /*
> > @@ -230,7 +150,6 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> >       ip_tlen = rte_be_to_cpu_16(ipv4_hdr->total_length);
> >       if (pkt->pkt_len > (uint32_t)(ip_tlen + pkt->l2_len))
> >               rte_pktmbuf_trim(pkt, pkt->pkt_len - ip_tlen -
> pkt->l2_len);
> > -
> >       /*
> >        * Don't process the packet whose payload length is less than or
> >        * equal to 0.
> > @@ -239,6 +158,13 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> >       if (tcp_dl <= 0)
> >               return -1;
> >
> > +     rte_ether_addr_copy(&(eth_hdr->src_addr),
> > &(key.tcp_flow.eth_saddr));
> > +     rte_ether_addr_copy(&(eth_hdr->dst_addr),
> > &(key.tcp_flow.eth_daddr));
> > +     key.ip_src_addr = ipv4_hdr->src_addr;
> > +     key.ip_dst_addr = ipv4_hdr->dst_addr;
> > +     key.tcp_flow.src_port = tcp_hdr->src_port;
> > +     key.tcp_flow.dst_port = tcp_hdr->dst_port;
> > +     key.tcp_flow.recv_ack = tcp_hdr->recv_ack;
> >       /*
> >        * Save IPv4 ID for the packet whose DF bit is 0. For the packet
> >        * whose DF bit is 1, IPv4 ID is ignored.
> > @@ -246,15 +172,6 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> >       frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
> >       is_atomic = (frag_off & RTE_IPV4_HDR_DF_FLAG) ==
> > RTE_IPV4_HDR_DF_FLAG;
> >       ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
> > -     sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> > -
> > -     rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.eth_saddr));
> > -     rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.eth_daddr));
> > -     key.ip_src_addr = ipv4_hdr->src_addr;
> > -     key.ip_dst_addr = ipv4_hdr->dst_addr;
> > -     key.src_port = tcp_hdr->src_port;
> > -     key.dst_port = tcp_hdr->dst_port;
> > -     key.recv_ack = tcp_hdr->recv_ack;
> >
> >       /* Search for a matched flow. */
> >       max_flow_num = tbl->max_flow_num;
> > @@ -270,63 +187,44 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> >               }
> >       }
> >
> > -     /*
> > -      * Fail to find a matched flow. Insert a new flow and store the
> > -      * packet into the flow.
> > -      */
> >       if (find == 0) {
> > -             item_idx = insert_new_item(tbl, pkt, start_time,
> > -                             INVALID_ARRAY_INDEX, sent_seq, ip_id,
> > -                             is_atomic);
> > +             sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> > +             item_idx = insert_new_tcp_item(pkt, tbl->items, &tbl-
> > >item_num, tbl->max_item_num, start_time,
> > +                                             INVALID_ARRAY_INDEX,
> > sent_seq, ip_id,
> > +                                             is_atomic);
> >               if (item_idx == INVALID_ARRAY_INDEX)
> >                       return -1;
> >               if (insert_new_flow(tbl, &key, item_idx) ==
> > -                             INVALID_ARRAY_INDEX) {
> > +                     INVALID_ARRAY_INDEX) {
> >                       /*
> >                        * Fail to insert a new flow, so delete the
> >                        * stored packet.
> > -                      */
> > -                     delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
> > +                     */
> > +                     delete_tcp_item(tbl->items, item_idx, &tbl-
> > >item_num,
> > +INVALID_ARRAY_INDEX);
> >                       return -1;
> >               }
> >               return 0;
> >       }
> > +     item_idx = tbl->flows[i].start_index;
>
> No need to update item_idx, and you can directly pass
> tbl->flows[i].start_index to
> gro_process_tcp_item(). And same in gro_tcp6_reassemble().
>
> >
> > -     /*
> > -      * Check all packets in the flow and try to find a neighbor for
> > -      * the input packet.
> > -      */
> > -     cur_idx = tbl->flows[i].start_index;
> > -     prev_idx = cur_idx;
> > -     do {
> > -             cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
> > -                             sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
> > -                             is_atomic);
> > -             if (cmp) {
> > -                     if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
> > -                                             pkt, cmp, sent_seq, ip_id,
> 0))
> > -                             return 1;
> > -                     /*
> > -                      * Fail to merge the two packets, as the packet
> > -                      * length is greater than the max value. Store
> > -                      * the packet into the flow.
> > -                      */
> > -                     if (insert_new_item(tbl, pkt, start_time, cur_idx,
> > -                                             sent_seq, ip_id,
> is_atomic) ==
> > -                                     INVALID_ARRAY_INDEX)
> > -                             return -1;
> > -                     return 0;
> > -             }
> > -             prev_idx = cur_idx;
> > -             cur_idx = tbl->items[cur_idx].next_pkt_idx;
> > -     } while (cur_idx != INVALID_ARRAY_INDEX);
> > +     return gro_process_tcp_item(pkt, tcp_hdr, tcp_dl, tbl->items,
> > item_idx,
> > +                                             &tbl->item_num, tbl-
> > >max_item_num,
> > +                                             ip_id, is_atomic,
> start_time);
> > +}
> >
> > -     /* Fail to find a neighbor, so store the packet into the flow. */
> > -     if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
> > -                             ip_id, is_atomic) == INVALID_ARRAY_INDEX)
> > -             return -1;
>
  
Hu, Jiayu June 9, 2023, 1:04 a.m. UTC | #3
Hi Kumara,

In your replied mail, the line doesn’t preface with “>” and it’s hard to find your replies.
So I reply here. In gro_tcp4_reassemble(), I mean there is no need to switch the sequences
of tcp_hdr and sent_seq definition statements in the code, rather than not updating them.

Thanks,
Jiayu

From: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>
Sent: Friday, June 9, 2023 12:52 AM
To: Hu, Jiayu <jiayu.hu@intel.com>
Cc: dev@dpdk.org
Subject: Re: [PATCH v4] gro : ipv6 changes to support GRO for TCP/ipv6

Hi Jiyau,

Thanks for the quick review comments. Will address the review comments. Require clarification in one of the comments. Please find it inline.

On Thu, Jun 8, 2023 at 9:35 AM Hu, Jiayu <jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>> wrote:
Hi Kumara,

Please see replies inline.

In addition, you need to update the programmer guide in generic_receive_offload_lib.rst,
and release note release_23_07.rst.

Thanks,
Jiayu

> -----Original Message-----
> From: Kumara Parameshwaran <kumaraparamesh92@gmail.com<mailto:kumaraparamesh92@gmail.com>>
> Sent: Tuesday, June 6, 2023 10:58 PM
> To: Hu, Jiayu <jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>>
> Cc: dev@dpdk.org<mailto:dev@dpdk.org>; Kumara Parameshwaran
> <kumaraparamesh92@gmail.com<mailto:kumaraparamesh92@gmail.com>>
> Subject: [PATCH v4] gro : ipv6 changes to support GRO for TCP/ipv6
>
> The patch adds GRO support for TCP/ipv6 packets. This does not include the
> support for vxlan, udp ipv6 packets.
>
> Signed-off-by: Kumara Parameshwaran <kumaraparamesh92@gmail.com<mailto:kumaraparamesh92@gmail.com>>>
> diff --git a/lib/gro/gro_tcp.c b/lib/gro/gro_tcp.c new file mode 100644 index
> 0000000000..02a7d0f8c5
> --- /dev/null
> +++ b/lib/gro/gro_tcp.c

For gro_tcp.c and gro_tcp.h, it's better to add "_internal" in the file name.

> @@ -0,0 +1,128 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2017 Intel Corporation
> + */
> +#include <rte_malloc.h>
> +#include <rte_mbuf.h>
> +#include <rte_ethdev.h>
> +
> +#include "gro_tcp.h"
> +
> +static inline uint32_t
> +find_an_empty_item(struct gro_tcp_item *items,
> +     uint32_t max_item_num)
> +{
> +     uint32_t i;
> +
> +     for (i = 0; i < max_item_num; i++)
> +             if (items[i].firstseg == NULL)
> +                     return i;
> +     return INVALID_ARRAY_INDEX;
> +}
> +
> +uint32_t
> +insert_new_tcp_item(struct rte_mbuf *pkt,
> +             struct gro_tcp_item *items,
> +             uint32_t *item_num,
> +             uint32_t max_item_num,
> +             uint64_t start_time,
> +             uint32_t prev_idx,
> +             uint32_t sent_seq,
> +             uint16_t ip_id,
> +             uint8_t is_atomic)

This function can be inline.

> +{
> +     uint32_t item_idx;
> +
> +     item_idx = find_an_empty_item(items, max_item_num);
> +     if (item_idx == INVALID_ARRAY_INDEX)
> +             return INVALID_ARRAY_INDEX;
> +
> +     items[item_idx].firstseg = pkt;
> +     items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt);
> +     items[item_idx].start_time = start_time;
> +     items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
> +     items[item_idx].sent_seq = sent_seq;
> +     items[item_idx].ip_id = ip_id;
> +     items[item_idx].nb_merged = 1;
> +     items[item_idx].is_atomic = is_atomic;
> +     (*item_num) += 1;
> +
> +     /* if the previous packet exists, chain them together. */
> +     if (prev_idx != INVALID_ARRAY_INDEX) {
> +             items[item_idx].next_pkt_idx =
> +                     items[prev_idx].next_pkt_idx;
> +             items[prev_idx].next_pkt_idx = item_idx;
> +     }
> +
> +     return item_idx;
> +}
> +
> +uint32_t
> +delete_tcp_item(struct gro_tcp_item *items, uint32_t item_idx,
> +             uint32_t *item_num,
> +             uint32_t prev_item_idx)
> +{
> +     uint32_t next_idx = items[item_idx].next_pkt_idx;
> +
> +     /* NULL indicates an empty item */
> +     items[item_idx].firstseg = NULL;
> +     (*item_num) -= 1;
> +     if (prev_item_idx != INVALID_ARRAY_INDEX)
> +             items[prev_item_idx].next_pkt_idx = next_idx;
> +
> +     return next_idx;
> +}

This function can be inline.

> +
> +int32_t
> +gro_process_tcp_item(struct rte_mbuf *pkt,
> +     struct rte_tcp_hdr *tcp_hdr,
> +     int32_t tcp_dl,
> +     struct gro_tcp_item *items,
> +     uint32_t item_idx,
> +     uint32_t *item_num,
> +     uint32_t max_item_num,
> +     uint16_t ip_id,
> +     uint8_t is_atomic,
> +     uint64_t start_time)

It is for internal use, so it's better to remove "gro_" from the function name.

> +{
> +     uint32_t cur_idx;
> +     uint32_t prev_idx;
> +     int cmp;
> +     uint32_t sent_seq;
> +
> +     sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> +     /*
> +      * Check all packets in the flow and try to find a neighbor for
> +      * the input packet.
> +      */
> +     cur_idx = item_idx;
> +     prev_idx = cur_idx;
> +     do {
> +             cmp = check_seq_option(&items[cur_idx], tcp_hdr,
> +                             sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
> +                             is_atomic);
> +             if (cmp) {
> +                     if (merge_two_tcp_packets(&items[cur_idx],
> +                                             pkt, cmp, sent_seq, ip_id, 0))
> +                             return 1;
> +                     /*
> +                      * Fail to merge the two packets, as the packet
> +                      * length is greater than the max value. Store
> +                      * the packet into the flow.
> +                      */
> +                     if (insert_new_tcp_item(pkt, items, item_num,
> max_item_num, start_time, cur_idx,
> +                                             sent_seq, ip_id, is_atomic) ==
> +                                     INVALID_ARRAY_INDEX)
> +                             return -1;
> +                     return 0;
> +             }
> +             prev_idx = cur_idx;
> +             cur_idx = items[cur_idx].next_pkt_idx;
> +     } while (cur_idx != INVALID_ARRAY_INDEX);
> +
> +     /* Fail to find a neighbor, so store the packet into the flow. */
> +     if (insert_new_tcp_item(pkt, items, item_num, max_item_num,
> start_time, prev_idx, sent_seq,
> +                             ip_id, is_atomic) == INVALID_ARRAY_INDEX)
> +             return -1;
> +
> +     return 0;
> +}
> diff --git a/lib/gro/gro_tcp.h b/lib/gro/gro_tcp.h new file mode 100644 index
> 0000000000..4b5b4eda9c
> --- /dev/null
> +++ b/lib/gro/gro_tcp.h
> @@ -0,0 +1,209 @@
> +#ifndef _GRO_TCP_H_
> +#define _GRO_TCP_H_
> +
> +#define INVALID_ARRAY_INDEX 0xffffffffUL
> +
> +#include <rte_tcp.h>
> +
> +/*
> + * The max length of a IPv4 packet, which includes the length of the L3
> + * header, the L4 header and the data payload.
> + */
> +#define MAX_IP_PKT_LENGTH UINT16_MAX
> +
> +/* The maximum TCP header length */
> +#define MAX_TCP_HLEN 60
> +#define INVALID_TCP_HDRLEN(len) \
> +     (((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN))
> +
> +struct gro_tcp_flow {

This structure name is confusing. In the upper layer, tcp4 and tcp6 have gro_tcp4_flow
and gro_tcp6_flow, which represent a flow. Inside gro_tcp4/6_flow, there are keys,
represented by struct tcp4/6_flow_key. But inside struct tcp4/6_flow_key, there is
struct gro_tcp_flow. Need to rename struct gro_tcp_flow, like common_tcp_flow_key.

> +     struct rte_ether_addr eth_saddr;
> +     struct rte_ether_addr eth_daddr;
> +     uint32_t recv_ack;
> +     uint16_t src_port;
> +     uint16_t dst_port;
> +};
> +
> +#define ASSIGN_TCP_FLOW_KEY(k1, k2) \

Ditto. The macro needs rename, like ASSIGN_COMMON_TCP_FLOW_KEY.

> +     do {\
> +             rte_ether_addr_copy(&(k1->eth_saddr), &(k2->eth_saddr)); \
> +             rte_ether_addr_copy(&(k1->eth_daddr), &(k2->eth_daddr));
> \
> +             k2->recv_ack = k1->recv_ack; \
> +             k2->src_port = k1->src_port; \
> +             k2->dst_port = k1->dst_port; \
> +     } while (0)
> +
> +struct gro_tcp_item {
> +     /*
> +      * The first MBUF segment of the packet. If the value
> +      * is NULL, it means the item is empty.
> +      */
> +     struct rte_mbuf *firstseg;
> +     /* The last MBUF segment of the packet */
> +     struct rte_mbuf *lastseg;
> +     /*
> +      * The time when the first packet is inserted into the table.
> +      * This value won't be updated, even if the packet is merged
> +      * with other packets.
> +      */
> +     uint64_t start_time;
> +     /*
> +      * next_pkt_idx is used to chain the packets that
> +      * are in the same flow but can't be merged together
> +      * (e.g. caused by packet reordering).
> +      */
> +     uint32_t next_pkt_idx;
> +     /* TCP sequence number of the packet */
> +     uint32_t sent_seq;
> +     /* IPv4 ID of the packet */
> +     uint16_t ip_id;

The ip_id field is not used by tcp6. It's better to use an union to include ip_id for IPv4 and
an useless member for IPv6 with some comments to avoid confusing.

> +     /* the number of merged packets */
> +     uint16_t nb_merged;
> +     /* Indicate if IPv4 ID can be ignored */
> +     uint8_t is_atomic;
> +};
> +
> +uint32_t
> +insert_new_tcp_item(struct rte_mbuf *pkt,
> +             struct gro_tcp_item *items,
> +             uint32_t *item_num,
> +             uint32_t table_size,
> +             uint64_t start_time,
> +             uint32_t prev_idx,
> +             uint32_t sent_seq,
> +             uint16_t ip_id,
> +             uint8_t is_atomic);
> +
> +uint32_t
> +delete_tcp_item(struct gro_tcp_item *items,
> +             uint32_t item_idx,
> +             uint32_t *item_num,
> +             uint32_t prev_item_idx);
> +
> +int32_t
> +gro_process_tcp_item(struct rte_mbuf *pkt,
> +     struct rte_tcp_hdr *tcp_hdr,
> +     int32_t tcp_dl,
> +     struct gro_tcp_item *items,
> +     uint32_t item_idx,
> +     uint32_t *item_num,
> +     uint32_t table_size,
> +     uint16_t ip_id,
> +     uint8_t is_atomic,
> +     uint64_t start_time);
> +
> +/*
> + * Merge two TCP packets without updating checksums.
> + * If cmp is larger than 0, append the new packet to the
> + * original packet. Otherwise, pre-pend the new packet to
> + * the original packet.
> + */
> +static inline int
> +merge_two_tcp_packets(struct gro_tcp_item *item,
> +             struct rte_mbuf *pkt,
> +             int cmp,
> +             uint32_t sent_seq,
> +             uint16_t ip_id,
> +             uint16_t l2_offset)
> +{
> +     struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> +     uint16_t hdr_len, l2_len;
> +
> +     if (cmp > 0) {
> +             pkt_head = item->firstseg;
> +             pkt_tail = pkt;
> +     } else {
> +             pkt_head = pkt;
> +             pkt_tail = item->firstseg;
> +     }
> +
> +     /* check if the IPv4 packet length is greater than the max value */
> +     hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
> +             pkt_head->l4_len;
> +     l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
> +     if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
> +                             hdr_len > MAX_IP_PKT_LENGTH))
> +             return 0;
> +
> +     /* remove the packet header for the tail packet */
> +     rte_pktmbuf_adj(pkt_tail, hdr_len);
> +
> +     /* chain two packets together */
> +     if (cmp > 0) {
> +             item->lastseg->next = pkt;
> +             item->lastseg = rte_pktmbuf_lastseg(pkt);
> +             /* update IP ID to the larger value */
> +             item->ip_id = ip_id;
> +     } else {
> +             lastseg = rte_pktmbuf_lastseg(pkt);
> +             lastseg->next = item->firstseg;
> +             item->firstseg = pkt;
> +             /* update sent_seq to the smaller value */
> +             item->sent_seq = sent_seq;
> +             item->ip_id = ip_id;
> +     }
> +     item->nb_merged++;
> +
> +     /* update MBUF metadata for the merged packet */
> +     pkt_head->nb_segs += pkt_tail->nb_segs;
> +     pkt_head->pkt_len += pkt_tail->pkt_len;
> +
> +     return 1;
> +}
> +
> +/*
> + * Check if two TCP/IPv4 packets are neighbors.
> + */
> +static inline int
> +check_seq_option(struct gro_tcp_item *item,
> +             struct rte_tcp_hdr *tcph,
> +             uint32_t sent_seq,
> +             uint16_t ip_id,
> +             uint16_t tcp_hl,
> +             uint16_t tcp_dl,
> +             uint16_t l2_offset,
> +             uint8_t is_atomic)
> +{
> +     struct rte_mbuf *pkt_orig = item->firstseg;
> +     char *iph_orig;
> +     struct rte_tcp_hdr *tcph_orig;
> +     uint16_t len, tcp_hl_orig;
> +
> +     iph_orig = (char *)(rte_pktmbuf_mtod(pkt_orig, char *) +
> +                     l2_offset + pkt_orig->l2_len);
> +     tcph_orig = (struct rte_tcp_hdr *)(iph_orig + pkt_orig->l3_len);
> +     tcp_hl_orig = pkt_orig->l4_len;
> +
> +     /* Check if TCP option fields equal */
> +     len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct rte_tcp_hdr);
> +     if ((tcp_hl != tcp_hl_orig) || ((len > 0) &&
> +                             (memcmp(tcph + 1, tcph_orig + 1,
> +                                     len) != 0)))
> +             return 0;
> +
> +     /* Don't merge packets whose DF bits are different */
> +     if (unlikely(item->is_atomic ^ is_atomic))
> +             return 0;
> +
> +     /* check if the two packets are neighbors */
> +     len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
> +             pkt_orig->l3_len - tcp_hl_orig;
> +     if ((sent_seq == item->sent_seq + len) && (is_atomic ||
> +                             (ip_id == item->ip_id + 1)))
> +             /* append the new packet */
> +             return 1;
> +     else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
> +                             (ip_id + item->nb_merged == item->ip_id)))
> +             /* pre-pend the new packet */
> +             return -1;
> +
> +     return 0;
> +}
> +
> +static inline int
> +is_same_tcp_flow(struct gro_tcp_flow *k1, struct gro_tcp_flow *k2) {
> +     return (!memcmp(k1, k2, sizeof(struct gro_tcp_flow))); }

I think this function needs rename, as the result of this function cannot identify if they are
same TCP flow.

> +
> +#endif
> diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c index
> 0014096e63..ffc33747c4 100644
> --- a/lib/gro/gro_tcp4.c
> +++ b/lib/gro/gro_tcp4.c
> @@ -7,6 +7,7 @@
>  #include <rte_ethdev.h>
>
>  #include "gro_tcp4.h"
> +#include "gro_tcp.h"
>
>  void *
>  gro_tcp4_tbl_create(uint16_t socket_id, @@ -30,7 +31,7 @@
> gro_tcp4_tbl_create(uint16_t socket_id,
>       if (tbl == NULL)
>               return NULL;
>
> -     size = sizeof(struct gro_tcp4_item) * entries_num;
> +     size = sizeof(struct gro_tcp_item) * entries_num;
>       tbl->items = rte_zmalloc_socket(__func__,
>                       size,
>                       RTE_CACHE_LINE_SIZE,
> @@ -71,18 +72,6 @@ gro_tcp4_tbl_destroy(void *tbl)
>       rte_free(tcp_tbl);
>  }
>
> -static inline uint32_t
> -find_an_empty_item(struct gro_tcp4_tbl *tbl) -{
> -     uint32_t i;
> -     uint32_t max_item_num = tbl->max_item_num;
> -
> -     for (i = 0; i < max_item_num; i++)
> -             if (tbl->items[i].firstseg == NULL)
> -                     return i;
> -     return INVALID_ARRAY_INDEX;
> -}
> -
>  static inline uint32_t
>  find_an_empty_flow(struct gro_tcp4_tbl *tbl)  { @@ -95,56 +84,6 @@
> find_an_empty_flow(struct gro_tcp4_tbl *tbl)
>       return INVALID_ARRAY_INDEX;
>  }
>
> -static inline uint32_t
> -insert_new_item(struct gro_tcp4_tbl *tbl,
> -             struct rte_mbuf *pkt,
> -             uint64_t start_time,
> -             uint32_t prev_idx,
> -             uint32_t sent_seq,
> -             uint16_t ip_id,
> -             uint8_t is_atomic)
> -{
> -     uint32_t item_idx;
> -
> -     item_idx = find_an_empty_item(tbl);
> -     if (item_idx == INVALID_ARRAY_INDEX)
> -             return INVALID_ARRAY_INDEX;
> -
> -     tbl->items[item_idx].firstseg = pkt;
> -     tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt);
> -     tbl->items[item_idx].start_time = start_time;
> -     tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
> -     tbl->items[item_idx].sent_seq = sent_seq;
> -     tbl->items[item_idx].ip_id = ip_id;
> -     tbl->items[item_idx].nb_merged = 1;
> -     tbl->items[item_idx].is_atomic = is_atomic;
> -     tbl->item_num++;
> -
> -     /* if the previous packet exists, chain them together. */
> -     if (prev_idx != INVALID_ARRAY_INDEX) {
> -             tbl->items[item_idx].next_pkt_idx =
> -                     tbl->items[prev_idx].next_pkt_idx;
> -             tbl->items[prev_idx].next_pkt_idx = item_idx;
> -     }
> -
> -     return item_idx;
> -}
> -
> -static inline uint32_t
> -delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
> -             uint32_t prev_item_idx)
> -{
> -     uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
> -
> -     /* NULL indicates an empty item */
> -     tbl->items[item_idx].firstseg = NULL;
> -     tbl->item_num--;
> -     if (prev_item_idx != INVALID_ARRAY_INDEX)
> -             tbl->items[prev_item_idx].next_pkt_idx = next_idx;
> -
> -     return next_idx;
> -}
> -
>  static inline uint32_t
>  insert_new_flow(struct gro_tcp4_tbl *tbl,
>               struct tcp4_flow_key *src,
> @@ -159,13 +98,10 @@ insert_new_flow(struct gro_tcp4_tbl *tbl,
>
>       dst = &(tbl->flows[flow_idx].key);
>
> -     rte_ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
> -     rte_ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
> +     ASSIGN_TCP_FLOW_KEY((&src->tcp_flow), (&dst->tcp_flow));
> +
>       dst->ip_src_addr = src->ip_src_addr;
>       dst->ip_dst_addr = src->ip_dst_addr;
> -     dst->recv_ack = src->recv_ack;
> -     dst->src_port = src->src_port;
> -     dst->dst_port = src->dst_port;
>
>       tbl->flows[flow_idx].start_index = item_idx;
>       tbl->flow_num++;
> @@ -173,21 +109,6 @@ insert_new_flow(struct gro_tcp4_tbl *tbl,
>       return flow_idx;
>  }
>
> -/*
> - * update the packet length for the flushed packet.
> - */
> -static inline void
> -update_header(struct gro_tcp4_item *item) -{
> -     struct rte_ipv4_hdr *ipv4_hdr;
> -     struct rte_mbuf *pkt = item->firstseg;
> -
> -     ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
> -                     pkt->l2_len);
> -     ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len -
> -                     pkt->l2_len);
> -}
> -
>  int32_t
>  gro_tcp4_reassemble(struct rte_mbuf *pkt,
>               struct gro_tcp4_tbl *tbl,
> @@ -195,16 +116,15 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,  {
>       struct rte_ether_hdr *eth_hdr;
>       struct rte_ipv4_hdr *ipv4_hdr;
> -     struct rte_tcp_hdr *tcp_hdr;
> -     uint32_t sent_seq;
>       int32_t tcp_dl;
> +     struct rte_tcp_hdr *tcp_hdr;
>       uint16_t ip_id, hdr_len, frag_off, ip_tlen;
>       uint8_t is_atomic;
> +     uint32_t sent_seq;

No need to change tcp_hdr and sent_seq here.
The flow matching is done in the function and if the flow is not found insert_new_tcp_item is invoked from this function itself. Did you mean to move that to the process_tcp_item as well? If that is the case we should pass the start_idx as INVALID_ARRAY_INDEX and in process_tcp_item check if INVALID_ARRAY_INDEX do a insert_new_tcp_item and return, do not do the sequnce number checks etc.
>
>       struct tcp4_flow_key key;
> -     uint32_t cur_idx, prev_idx, item_idx;
> +     uint32_t item_idx;
>       uint32_t i, max_flow_num, remaining_flow_num;
> -     int cmp;
>       uint8_t find;
>
>       /*
> @@ -216,7 +136,7 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>
>       eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
>       ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
> -     tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
> +     tcp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_tcp_hdr *,
> +pkt->l2_len + pkt->l3_len);
>       hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
>
>       /*
> @@ -230,7 +150,6 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>       ip_tlen = rte_be_to_cpu_16(ipv4_hdr->total_length);
>       if (pkt->pkt_len > (uint32_t)(ip_tlen + pkt->l2_len))
>               rte_pktmbuf_trim(pkt, pkt->pkt_len - ip_tlen - pkt->l2_len);
> -
>       /*
>        * Don't process the packet whose payload length is less than or
>        * equal to 0.
> @@ -239,6 +158,13 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>       if (tcp_dl <= 0)
>               return -1;
>
> +     rte_ether_addr_copy(&(eth_hdr->src_addr),
> &(key.tcp_flow.eth_saddr));
> +     rte_ether_addr_copy(&(eth_hdr->dst_addr),
> &(key.tcp_flow.eth_daddr));
> +     key.ip_src_addr = ipv4_hdr->src_addr;
> +     key.ip_dst_addr = ipv4_hdr->dst_addr;
> +     key.tcp_flow.src_port = tcp_hdr->src_port;
> +     key.tcp_flow.dst_port = tcp_hdr->dst_port;
> +     key.tcp_flow.recv_ack = tcp_hdr->recv_ack;
>       /*
>        * Save IPv4 ID for the packet whose DF bit is 0. For the packet
>        * whose DF bit is 1, IPv4 ID is ignored.
> @@ -246,15 +172,6 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>       frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
>       is_atomic = (frag_off & RTE_IPV4_HDR_DF_FLAG) ==
> RTE_IPV4_HDR_DF_FLAG;
>       ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
> -     sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> -
> -     rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.eth_saddr));
> -     rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.eth_daddr));
> -     key.ip_src_addr = ipv4_hdr->src_addr;
> -     key.ip_dst_addr = ipv4_hdr->dst_addr;
> -     key.src_port = tcp_hdr->src_port;
> -     key.dst_port = tcp_hdr->dst_port;
> -     key.recv_ack = tcp_hdr->recv_ack;
>
>       /* Search for a matched flow. */
>       max_flow_num = tbl->max_flow_num;
> @@ -270,63 +187,44 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>               }
>       }
>
> -     /*
> -      * Fail to find a matched flow. Insert a new flow and store the
> -      * packet into the flow.
> -      */
>       if (find == 0) {
> -             item_idx = insert_new_item(tbl, pkt, start_time,
> -                             INVALID_ARRAY_INDEX, sent_seq, ip_id,
> -                             is_atomic);
> +             sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> +             item_idx = insert_new_tcp_item(pkt, tbl->items, &tbl-
> >item_num, tbl->max_item_num, start_time,
> +                                             INVALID_ARRAY_INDEX,
> sent_seq, ip_id,
> +                                             is_atomic);
>               if (item_idx == INVALID_ARRAY_INDEX)
>                       return -1;
>               if (insert_new_flow(tbl, &key, item_idx) ==
> -                             INVALID_ARRAY_INDEX) {
> +                     INVALID_ARRAY_INDEX) {
>                       /*
>                        * Fail to insert a new flow, so delete the
>                        * stored packet.
> -                      */
> -                     delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
> +                     */
> +                     delete_tcp_item(tbl->items, item_idx, &tbl-
> >item_num,
> +INVALID_ARRAY_INDEX);
>                       return -1;
>               }
>               return 0;
>       }
> +     item_idx = tbl->flows[i].start_index;

No need to update item_idx, and you can directly pass tbl->flows[i].start_index to
gro_process_tcp_item(). And same in gro_tcp6_reassemble().

>
> -     /*
> -      * Check all packets in the flow and try to find a neighbor for
> -      * the input packet.
> -      */
> -     cur_idx = tbl->flows[i].start_index;
> -     prev_idx = cur_idx;
> -     do {
> -             cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
> -                             sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
> -                             is_atomic);
> -             if (cmp) {
> -                     if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
> -                                             pkt, cmp, sent_seq, ip_id, 0))
> -                             return 1;
> -                     /*
> -                      * Fail to merge the two packets, as the packet
> -                      * length is greater than the max value. Store
> -                      * the packet into the flow.
> -                      */
> -                     if (insert_new_item(tbl, pkt, start_time, cur_idx,
> -                                             sent_seq, ip_id, is_atomic) ==
> -                                     INVALID_ARRAY_INDEX)
> -                             return -1;
> -                     return 0;
> -             }
> -             prev_idx = cur_idx;
> -             cur_idx = tbl->items[cur_idx].next_pkt_idx;
> -     } while (cur_idx != INVALID_ARRAY_INDEX);
> +     return gro_process_tcp_item(pkt, tcp_hdr, tcp_dl, tbl->items,
> item_idx,
> +                                             &tbl->item_num, tbl-
> >max_item_num,
> +                                             ip_id, is_atomic, start_time);
> +}
>
> -     /* Fail to find a neighbor, so store the packet into the flow. */
> -     if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
> -                             ip_id, is_atomic) == INVALID_ARRAY_INDEX)
> -             return -1;
  

Patch

diff --git a/lib/gro/gro_tcp.c b/lib/gro/gro_tcp.c
new file mode 100644
index 0000000000..02a7d0f8c5
--- /dev/null
+++ b/lib/gro/gro_tcp.c
@@ -0,0 +1,128 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
+ */
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include "gro_tcp.h"
+
+static inline uint32_t
+find_an_empty_item(struct gro_tcp_item *items,
+	uint32_t max_item_num)
+{
+	uint32_t i;
+
+	for (i = 0; i < max_item_num; i++)
+		if (items[i].firstseg == NULL)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+uint32_t
+insert_new_tcp_item(struct rte_mbuf *pkt,
+		struct gro_tcp_item *items,
+		uint32_t *item_num,
+		uint32_t max_item_num,
+		uint64_t start_time,
+		uint32_t prev_idx,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint8_t is_atomic)
+{
+	uint32_t item_idx;
+
+	item_idx = find_an_empty_item(items, max_item_num);
+	if (item_idx == INVALID_ARRAY_INDEX)
+		return INVALID_ARRAY_INDEX;
+
+	items[item_idx].firstseg = pkt;
+	items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt);
+	items[item_idx].start_time = start_time;
+	items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
+	items[item_idx].sent_seq = sent_seq;
+	items[item_idx].ip_id = ip_id;
+	items[item_idx].nb_merged = 1;
+	items[item_idx].is_atomic = is_atomic;
+	(*item_num) += 1;
+
+	/* if the previous packet exists, chain them together. */
+	if (prev_idx != INVALID_ARRAY_INDEX) {
+		items[item_idx].next_pkt_idx =
+			items[prev_idx].next_pkt_idx;
+		items[prev_idx].next_pkt_idx = item_idx;
+	}
+
+	return item_idx;
+}
+
+uint32_t
+delete_tcp_item(struct gro_tcp_item *items, uint32_t item_idx,
+		uint32_t *item_num,
+		uint32_t prev_item_idx)
+{
+	uint32_t next_idx = items[item_idx].next_pkt_idx;
+
+	/* NULL indicates an empty item */
+	items[item_idx].firstseg = NULL;
+	(*item_num) -= 1;
+	if (prev_item_idx != INVALID_ARRAY_INDEX)
+		items[prev_item_idx].next_pkt_idx = next_idx;
+
+	return next_idx;
+}
+
+int32_t 
+gro_process_tcp_item(struct rte_mbuf *pkt, 
+	struct rte_tcp_hdr *tcp_hdr,
+	int32_t tcp_dl,
+	struct gro_tcp_item *items,
+	uint32_t item_idx,
+	uint32_t *item_num,
+	uint32_t max_item_num,
+	uint16_t ip_id,
+	uint8_t is_atomic,
+	uint64_t start_time)
+{
+	uint32_t cur_idx;
+	uint32_t prev_idx;
+	int cmp;
+	uint32_t sent_seq;
+
+	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);	
+	/*
+	 * Check all packets in the flow and try to find a neighbor for
+	 * the input packet.
+	 */
+	cur_idx = item_idx;
+	prev_idx = cur_idx;
+	do {
+		cmp = check_seq_option(&items[cur_idx], tcp_hdr,
+				sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
+				is_atomic);
+		if (cmp) {
+			if (merge_two_tcp_packets(&items[cur_idx],
+						pkt, cmp, sent_seq, ip_id, 0))
+				return 1;
+			/*
+			 * Fail to merge the two packets, as the packet
+			 * length is greater than the max value. Store
+			 * the packet into the flow.
+			 */
+			if (insert_new_tcp_item(pkt, items, item_num, max_item_num, start_time, cur_idx,
+						sent_seq, ip_id, is_atomic) ==
+					INVALID_ARRAY_INDEX)
+				return -1;
+			return 0;
+		}
+		prev_idx = cur_idx;
+		cur_idx = items[cur_idx].next_pkt_idx;
+	} while (cur_idx != INVALID_ARRAY_INDEX);
+
+	/* Fail to find a neighbor, so store the packet into the flow. */
+	if (insert_new_tcp_item(pkt, items, item_num, max_item_num, start_time, prev_idx, sent_seq,
+				ip_id, is_atomic) == INVALID_ARRAY_INDEX)
+		return -1;
+
+	return 0;
+}
diff --git a/lib/gro/gro_tcp.h b/lib/gro/gro_tcp.h
new file mode 100644
index 0000000000..4b5b4eda9c
--- /dev/null
+++ b/lib/gro/gro_tcp.h
@@ -0,0 +1,209 @@ 
+#ifndef _GRO_TCP_H_
+#define _GRO_TCP_H_
+
+#define INVALID_ARRAY_INDEX 0xffffffffUL
+
+#include <rte_tcp.h>
+
+/*
+ * The max length of a IPv4 packet, which includes the length of the L3
+ * header, the L4 header and the data payload.
+ */
+#define MAX_IP_PKT_LENGTH UINT16_MAX
+
+/* The maximum TCP header length */
+#define MAX_TCP_HLEN 60
+#define INVALID_TCP_HDRLEN(len) \
+	(((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN))
+
+struct gro_tcp_flow {
+	struct rte_ether_addr eth_saddr;
+	struct rte_ether_addr eth_daddr;
+	uint32_t recv_ack;
+	uint16_t src_port;
+	uint16_t dst_port;
+};
+
+#define ASSIGN_TCP_FLOW_KEY(k1, k2) \
+	do {\
+		rte_ether_addr_copy(&(k1->eth_saddr), &(k2->eth_saddr)); \
+		rte_ether_addr_copy(&(k1->eth_daddr), &(k2->eth_daddr)); \
+		k2->recv_ack = k1->recv_ack; \
+		k2->src_port = k1->src_port; \
+		k2->dst_port = k1->dst_port; \
+	} while (0) 
+
+struct gro_tcp_item {
+	/*
+	 * The first MBUF segment of the packet. If the value
+	 * is NULL, it means the item is empty.
+	 */
+	struct rte_mbuf *firstseg;
+	/* The last MBUF segment of the packet */
+	struct rte_mbuf *lastseg;
+	/*
+	 * The time when the first packet is inserted into the table.
+	 * This value won't be updated, even if the packet is merged
+	 * with other packets.
+	 */
+	uint64_t start_time;
+	/*
+	 * next_pkt_idx is used to chain the packets that
+	 * are in the same flow but can't be merged together
+	 * (e.g. caused by packet reordering).
+	 */
+	uint32_t next_pkt_idx;
+	/* TCP sequence number of the packet */
+	uint32_t sent_seq;
+	/* IPv4 ID of the packet */
+	uint16_t ip_id;
+	/* the number of merged packets */
+	uint16_t nb_merged;
+	/* Indicate if IPv4 ID can be ignored */
+	uint8_t is_atomic;
+};
+
+uint32_t
+insert_new_tcp_item(struct rte_mbuf *pkt,
+		struct gro_tcp_item *items,
+		uint32_t *item_num,
+		uint32_t table_size,
+		uint64_t start_time,
+		uint32_t prev_idx,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint8_t is_atomic);
+
+uint32_t
+delete_tcp_item(struct gro_tcp_item *items, 
+		uint32_t item_idx, 
+		uint32_t *item_num,
+		uint32_t prev_item_idx);
+
+int32_t 
+gro_process_tcp_item(struct rte_mbuf *pkt, 
+	struct rte_tcp_hdr *tcp_hdr,
+	int32_t tcp_dl,
+	struct gro_tcp_item *items,
+	uint32_t item_idx,
+	uint32_t *item_num,
+	uint32_t table_size,
+	uint16_t ip_id,
+	uint8_t is_atomic,
+	uint64_t start_time);
+
+/*
+ * Merge two TCP packets without updating checksums.
+ * If cmp is larger than 0, append the new packet to the
+ * original packet. Otherwise, pre-pend the new packet to
+ * the original packet.
+ */
+static inline int
+merge_two_tcp_packets(struct gro_tcp_item *item,
+		struct rte_mbuf *pkt,
+		int cmp,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint16_t l2_offset)
+{
+	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
+	uint16_t hdr_len, l2_len;
+
+	if (cmp > 0) {
+		pkt_head = item->firstseg;
+		pkt_tail = pkt;
+	} else {
+		pkt_head = pkt;
+		pkt_tail = item->firstseg;
+	}
+
+	/* check if the IPv4 packet length is greater than the max value */
+	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
+		pkt_head->l4_len;
+	l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
+	if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
+				hdr_len > MAX_IP_PKT_LENGTH))
+		return 0;
+
+	/* remove the packet header for the tail packet */
+	rte_pktmbuf_adj(pkt_tail, hdr_len);
+
+	/* chain two packets together */
+	if (cmp > 0) {
+		item->lastseg->next = pkt;
+		item->lastseg = rte_pktmbuf_lastseg(pkt);
+		/* update IP ID to the larger value */
+		item->ip_id = ip_id;
+	} else {
+		lastseg = rte_pktmbuf_lastseg(pkt);
+		lastseg->next = item->firstseg;
+		item->firstseg = pkt;
+		/* update sent_seq to the smaller value */
+		item->sent_seq = sent_seq;
+		item->ip_id = ip_id;
+	}
+	item->nb_merged++;
+
+	/* update MBUF metadata for the merged packet */
+	pkt_head->nb_segs += pkt_tail->nb_segs;
+	pkt_head->pkt_len += pkt_tail->pkt_len;
+
+	return 1;
+}
+
+/*
+ * Check if two TCP/IPv4 packets are neighbors.
+ */
+static inline int
+check_seq_option(struct gro_tcp_item *item,
+		struct rte_tcp_hdr *tcph,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint16_t l2_offset,
+		uint8_t is_atomic)
+{
+	struct rte_mbuf *pkt_orig = item->firstseg;
+	char *iph_orig;
+	struct rte_tcp_hdr *tcph_orig;
+	uint16_t len, tcp_hl_orig;
+
+	iph_orig = (char *)(rte_pktmbuf_mtod(pkt_orig, char *) +
+			l2_offset + pkt_orig->l2_len);
+	tcph_orig = (struct rte_tcp_hdr *)(iph_orig + pkt_orig->l3_len);
+	tcp_hl_orig = pkt_orig->l4_len;
+
+	/* Check if TCP option fields equal */
+	len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct rte_tcp_hdr);
+	if ((tcp_hl != tcp_hl_orig) || ((len > 0) &&
+				(memcmp(tcph + 1, tcph_orig + 1,
+					len) != 0)))
+		return 0;
+
+	/* Don't merge packets whose DF bits are different */
+	if (unlikely(item->is_atomic ^ is_atomic))
+		return 0;
+
+	/* check if the two packets are neighbors */
+	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
+		pkt_orig->l3_len - tcp_hl_orig;
+	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
+				(ip_id == item->ip_id + 1)))
+		/* append the new packet */
+		return 1;
+	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
+				(ip_id + item->nb_merged == item->ip_id)))
+		/* pre-pend the new packet */
+		return -1;
+
+	return 0;
+}
+
+static inline int
+is_same_tcp_flow(struct gro_tcp_flow *k1, struct gro_tcp_flow *k2)
+{
+	return (!memcmp(k1, k2, sizeof(struct gro_tcp_flow)));
+}
+
+#endif
diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c
index 0014096e63..ffc33747c4 100644
--- a/lib/gro/gro_tcp4.c
+++ b/lib/gro/gro_tcp4.c
@@ -7,6 +7,7 @@ 
 #include <rte_ethdev.h>
 
 #include "gro_tcp4.h"
+#include "gro_tcp.h"
 
 void *
 gro_tcp4_tbl_create(uint16_t socket_id,
@@ -30,7 +31,7 @@  gro_tcp4_tbl_create(uint16_t socket_id,
 	if (tbl == NULL)
 		return NULL;
 
-	size = sizeof(struct gro_tcp4_item) * entries_num;
+	size = sizeof(struct gro_tcp_item) * entries_num;
 	tbl->items = rte_zmalloc_socket(__func__,
 			size,
 			RTE_CACHE_LINE_SIZE,
@@ -71,18 +72,6 @@  gro_tcp4_tbl_destroy(void *tbl)
 	rte_free(tcp_tbl);
 }
 
-static inline uint32_t
-find_an_empty_item(struct gro_tcp4_tbl *tbl)
-{
-	uint32_t i;
-	uint32_t max_item_num = tbl->max_item_num;
-
-	for (i = 0; i < max_item_num; i++)
-		if (tbl->items[i].firstseg == NULL)
-			return i;
-	return INVALID_ARRAY_INDEX;
-}
-
 static inline uint32_t
 find_an_empty_flow(struct gro_tcp4_tbl *tbl)
 {
@@ -95,56 +84,6 @@  find_an_empty_flow(struct gro_tcp4_tbl *tbl)
 	return INVALID_ARRAY_INDEX;
 }
 
-static inline uint32_t
-insert_new_item(struct gro_tcp4_tbl *tbl,
-		struct rte_mbuf *pkt,
-		uint64_t start_time,
-		uint32_t prev_idx,
-		uint32_t sent_seq,
-		uint16_t ip_id,
-		uint8_t is_atomic)
-{
-	uint32_t item_idx;
-
-	item_idx = find_an_empty_item(tbl);
-	if (item_idx == INVALID_ARRAY_INDEX)
-		return INVALID_ARRAY_INDEX;
-
-	tbl->items[item_idx].firstseg = pkt;
-	tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt);
-	tbl->items[item_idx].start_time = start_time;
-	tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
-	tbl->items[item_idx].sent_seq = sent_seq;
-	tbl->items[item_idx].ip_id = ip_id;
-	tbl->items[item_idx].nb_merged = 1;
-	tbl->items[item_idx].is_atomic = is_atomic;
-	tbl->item_num++;
-
-	/* if the previous packet exists, chain them together. */
-	if (prev_idx != INVALID_ARRAY_INDEX) {
-		tbl->items[item_idx].next_pkt_idx =
-			tbl->items[prev_idx].next_pkt_idx;
-		tbl->items[prev_idx].next_pkt_idx = item_idx;
-	}
-
-	return item_idx;
-}
-
-static inline uint32_t
-delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
-		uint32_t prev_item_idx)
-{
-	uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
-
-	/* NULL indicates an empty item */
-	tbl->items[item_idx].firstseg = NULL;
-	tbl->item_num--;
-	if (prev_item_idx != INVALID_ARRAY_INDEX)
-		tbl->items[prev_item_idx].next_pkt_idx = next_idx;
-
-	return next_idx;
-}
-
 static inline uint32_t
 insert_new_flow(struct gro_tcp4_tbl *tbl,
 		struct tcp4_flow_key *src,
@@ -159,13 +98,10 @@  insert_new_flow(struct gro_tcp4_tbl *tbl,
 
 	dst = &(tbl->flows[flow_idx].key);
 
-	rte_ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
-	rte_ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
+	ASSIGN_TCP_FLOW_KEY((&src->tcp_flow), (&dst->tcp_flow));	
+
 	dst->ip_src_addr = src->ip_src_addr;
 	dst->ip_dst_addr = src->ip_dst_addr;
-	dst->recv_ack = src->recv_ack;
-	dst->src_port = src->src_port;
-	dst->dst_port = src->dst_port;
 
 	tbl->flows[flow_idx].start_index = item_idx;
 	tbl->flow_num++;
@@ -173,21 +109,6 @@  insert_new_flow(struct gro_tcp4_tbl *tbl,
 	return flow_idx;
 }
 
-/*
- * update the packet length for the flushed packet.
- */
-static inline void
-update_header(struct gro_tcp4_item *item)
-{
-	struct rte_ipv4_hdr *ipv4_hdr;
-	struct rte_mbuf *pkt = item->firstseg;
-
-	ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
-			pkt->l2_len);
-	ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len -
-			pkt->l2_len);
-}
-
 int32_t
 gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		struct gro_tcp4_tbl *tbl,
@@ -195,16 +116,15 @@  gro_tcp4_reassemble(struct rte_mbuf *pkt,
 {
 	struct rte_ether_hdr *eth_hdr;
 	struct rte_ipv4_hdr *ipv4_hdr;
-	struct rte_tcp_hdr *tcp_hdr;
-	uint32_t sent_seq;
 	int32_t tcp_dl;
+	struct rte_tcp_hdr *tcp_hdr;
 	uint16_t ip_id, hdr_len, frag_off, ip_tlen;
 	uint8_t is_atomic;
+	uint32_t sent_seq;
 
 	struct tcp4_flow_key key;
-	uint32_t cur_idx, prev_idx, item_idx;
+	uint32_t item_idx;
 	uint32_t i, max_flow_num, remaining_flow_num;
-	int cmp;
 	uint8_t find;
 
 	/*
@@ -216,7 +136,7 @@  gro_tcp4_reassemble(struct rte_mbuf *pkt,
 
 	eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
 	ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
-	tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+	tcp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_tcp_hdr *, pkt->l2_len + pkt->l3_len);
 	hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
 
 	/*
@@ -230,7 +150,6 @@  gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	ip_tlen = rte_be_to_cpu_16(ipv4_hdr->total_length);
 	if (pkt->pkt_len > (uint32_t)(ip_tlen + pkt->l2_len))
 		rte_pktmbuf_trim(pkt, pkt->pkt_len - ip_tlen - pkt->l2_len);
-
 	/*
 	 * Don't process the packet whose payload length is less than or
 	 * equal to 0.
@@ -239,6 +158,13 @@  gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	if (tcp_dl <= 0)
 		return -1;
 
+	rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.tcp_flow.eth_saddr));
+	rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.tcp_flow.eth_daddr));
+	key.ip_src_addr = ipv4_hdr->src_addr;
+	key.ip_dst_addr = ipv4_hdr->dst_addr;
+	key.tcp_flow.src_port = tcp_hdr->src_port;
+	key.tcp_flow.dst_port = tcp_hdr->dst_port;
+	key.tcp_flow.recv_ack = tcp_hdr->recv_ack;
 	/*
 	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
 	 * whose DF bit is 1, IPv4 ID is ignored.
@@ -246,15 +172,6 @@  gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
 	is_atomic = (frag_off & RTE_IPV4_HDR_DF_FLAG) == RTE_IPV4_HDR_DF_FLAG;
 	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
-	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
-
-	rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.eth_saddr));
-	rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.eth_daddr));
-	key.ip_src_addr = ipv4_hdr->src_addr;
-	key.ip_dst_addr = ipv4_hdr->dst_addr;
-	key.src_port = tcp_hdr->src_port;
-	key.dst_port = tcp_hdr->dst_port;
-	key.recv_ack = tcp_hdr->recv_ack;
 
 	/* Search for a matched flow. */
 	max_flow_num = tbl->max_flow_num;
@@ -270,63 +187,44 @@  gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		}
 	}
 
-	/*
-	 * Fail to find a matched flow. Insert a new flow and store the
-	 * packet into the flow.
-	 */
 	if (find == 0) {
-		item_idx = insert_new_item(tbl, pkt, start_time,
-				INVALID_ARRAY_INDEX, sent_seq, ip_id,
-				is_atomic);
+		sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);	
+		item_idx = insert_new_tcp_item(pkt, tbl->items, &tbl->item_num, tbl->max_item_num, start_time,
+						INVALID_ARRAY_INDEX, sent_seq, ip_id,
+						is_atomic);
 		if (item_idx == INVALID_ARRAY_INDEX)
 			return -1;
 		if (insert_new_flow(tbl, &key, item_idx) ==
-				INVALID_ARRAY_INDEX) {
+			INVALID_ARRAY_INDEX) {
 			/*
 			 * Fail to insert a new flow, so delete the
 			 * stored packet.
-			 */
-			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
+			*/
+			delete_tcp_item(tbl->items, item_idx, &tbl->item_num, INVALID_ARRAY_INDEX);
 			return -1;
 		}
 		return 0;
 	}
+	item_idx = tbl->flows[i].start_index;
 
-	/*
-	 * Check all packets in the flow and try to find a neighbor for
-	 * the input packet.
-	 */
-	cur_idx = tbl->flows[i].start_index;
-	prev_idx = cur_idx;
-	do {
-		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
-				sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
-				is_atomic);
-		if (cmp) {
-			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
-						pkt, cmp, sent_seq, ip_id, 0))
-				return 1;
-			/*
-			 * Fail to merge the two packets, as the packet
-			 * length is greater than the max value. Store
-			 * the packet into the flow.
-			 */
-			if (insert_new_item(tbl, pkt, start_time, cur_idx,
-						sent_seq, ip_id, is_atomic) ==
-					INVALID_ARRAY_INDEX)
-				return -1;
-			return 0;
-		}
-		prev_idx = cur_idx;
-		cur_idx = tbl->items[cur_idx].next_pkt_idx;
-	} while (cur_idx != INVALID_ARRAY_INDEX);
+	return gro_process_tcp_item(pkt, tcp_hdr, tcp_dl, tbl->items, item_idx,
+						&tbl->item_num, tbl->max_item_num, 
+						ip_id, is_atomic, start_time);
+}
 
-	/* Fail to find a neighbor, so store the packet into the flow. */
-	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
-				ip_id, is_atomic) == INVALID_ARRAY_INDEX)
-		return -1;
+/*
+ * update the packet length for the flushed packet.
+ */
+static inline void
+update_header(struct gro_tcp_item *item)
+{
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_mbuf *pkt = item->firstseg;
 
-	return 0;
+	ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			pkt->l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len -
+			pkt->l2_len);
 }
 
 uint16_t
@@ -353,7 +251,7 @@  gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 				 * Delete the packet and get the next
 				 * packet in the flow.
 				 */
-				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				j = delete_tcp_item(tbl->items, j, &tbl->item_num, INVALID_ARRAY_INDEX);
 				tbl->flows[i].start_index = j;
 				if (j == INVALID_ARRAY_INDEX)
 					tbl->flow_num--;
diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h
index 212f97a042..8ff1eed096 100644
--- a/lib/gro/gro_tcp4.h
+++ b/lib/gro/gro_tcp4.h
@@ -5,32 +5,15 @@ 
 #ifndef _GRO_TCP4_H_
 #define _GRO_TCP4_H_
 
-#include <rte_tcp.h>
+#include <gro_tcp.h>
 
-#define INVALID_ARRAY_INDEX 0xffffffffUL
 #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
 
-/*
- * The max length of a IPv4 packet, which includes the length of the L3
- * header, the L4 header and the data payload.
- */
-#define MAX_IPV4_PKT_LENGTH UINT16_MAX
-
-/* The maximum TCP header length */
-#define MAX_TCP_HLEN 60
-#define INVALID_TCP_HDRLEN(len) \
-	(((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN))
-
 /* Header fields representing a TCP/IPv4 flow */
 struct tcp4_flow_key {
-	struct rte_ether_addr eth_saddr;
-	struct rte_ether_addr eth_daddr;
+	struct gro_tcp_flow tcp_flow;
 	uint32_t ip_src_addr;
 	uint32_t ip_dst_addr;
-
-	uint32_t recv_ack;
-	uint16_t src_port;
-	uint16_t dst_port;
 };
 
 struct gro_tcp4_flow {
@@ -42,42 +25,12 @@  struct gro_tcp4_flow {
 	uint32_t start_index;
 };
 
-struct gro_tcp4_item {
-	/*
-	 * The first MBUF segment of the packet. If the value
-	 * is NULL, it means the item is empty.
-	 */
-	struct rte_mbuf *firstseg;
-	/* The last MBUF segment of the packet */
-	struct rte_mbuf *lastseg;
-	/*
-	 * The time when the first packet is inserted into the table.
-	 * This value won't be updated, even if the packet is merged
-	 * with other packets.
-	 */
-	uint64_t start_time;
-	/*
-	 * next_pkt_idx is used to chain the packets that
-	 * are in the same flow but can't be merged together
-	 * (e.g. caused by packet reordering).
-	 */
-	uint32_t next_pkt_idx;
-	/* TCP sequence number of the packet */
-	uint32_t sent_seq;
-	/* IPv4 ID of the packet */
-	uint16_t ip_id;
-	/* the number of merged packets */
-	uint16_t nb_merged;
-	/* Indicate if IPv4 ID can be ignored */
-	uint8_t is_atomic;
-};
-
 /*
  * TCP/IPv4 reassembly table structure.
  */
 struct gro_tcp4_tbl {
 	/* item array */
-	struct gro_tcp4_item *items;
+	struct gro_tcp_item *items;
 	/* flow array */
 	struct gro_tcp4_flow *flows;
 	/* current item number */
@@ -186,120 +139,9 @@  uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
 static inline int
 is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
 {
-	return (rte_is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
-			rte_is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
-			(k1.ip_src_addr == k2.ip_src_addr) &&
+	return ((k1.ip_src_addr == k2.ip_src_addr) &&
 			(k1.ip_dst_addr == k2.ip_dst_addr) &&
-			(k1.recv_ack == k2.recv_ack) &&
-			(k1.src_port == k2.src_port) &&
-			(k1.dst_port == k2.dst_port));
+			is_same_tcp_flow(&k1.tcp_flow, &k2.tcp_flow));
 }
 
-/*
- * Merge two TCP/IPv4 packets without updating checksums.
- * If cmp is larger than 0, append the new packet to the
- * original packet. Otherwise, pre-pend the new packet to
- * the original packet.
- */
-static inline int
-merge_two_tcp4_packets(struct gro_tcp4_item *item,
-		struct rte_mbuf *pkt,
-		int cmp,
-		uint32_t sent_seq,
-		uint16_t ip_id,
-		uint16_t l2_offset)
-{
-	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
-	uint16_t hdr_len, l2_len;
-
-	if (cmp > 0) {
-		pkt_head = item->firstseg;
-		pkt_tail = pkt;
-	} else {
-		pkt_head = pkt;
-		pkt_tail = item->firstseg;
-	}
-
-	/* check if the IPv4 packet length is greater than the max value */
-	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
-		pkt_head->l4_len;
-	l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
-	if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
-				hdr_len > MAX_IPV4_PKT_LENGTH))
-		return 0;
-
-	/* remove the packet header for the tail packet */
-	rte_pktmbuf_adj(pkt_tail, hdr_len);
-
-	/* chain two packets together */
-	if (cmp > 0) {
-		item->lastseg->next = pkt;
-		item->lastseg = rte_pktmbuf_lastseg(pkt);
-		/* update IP ID to the larger value */
-		item->ip_id = ip_id;
-	} else {
-		lastseg = rte_pktmbuf_lastseg(pkt);
-		lastseg->next = item->firstseg;
-		item->firstseg = pkt;
-		/* update sent_seq to the smaller value */
-		item->sent_seq = sent_seq;
-		item->ip_id = ip_id;
-	}
-	item->nb_merged++;
-
-	/* update MBUF metadata for the merged packet */
-	pkt_head->nb_segs += pkt_tail->nb_segs;
-	pkt_head->pkt_len += pkt_tail->pkt_len;
-
-	return 1;
-}
-
-/*
- * Check if two TCP/IPv4 packets are neighbors.
- */
-static inline int
-check_seq_option(struct gro_tcp4_item *item,
-		struct rte_tcp_hdr *tcph,
-		uint32_t sent_seq,
-		uint16_t ip_id,
-		uint16_t tcp_hl,
-		uint16_t tcp_dl,
-		uint16_t l2_offset,
-		uint8_t is_atomic)
-{
-	struct rte_mbuf *pkt_orig = item->firstseg;
-	struct rte_ipv4_hdr *iph_orig;
-	struct rte_tcp_hdr *tcph_orig;
-	uint16_t len, tcp_hl_orig;
-
-	iph_orig = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
-			l2_offset + pkt_orig->l2_len);
-	tcph_orig = (struct rte_tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
-	tcp_hl_orig = pkt_orig->l4_len;
-
-	/* Check if TCP option fields equal */
-	len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct rte_tcp_hdr);
-	if ((tcp_hl != tcp_hl_orig) || ((len > 0) &&
-				(memcmp(tcph + 1, tcph_orig + 1,
-					len) != 0)))
-		return 0;
-
-	/* Don't merge packets whose DF bits are different */
-	if (unlikely(item->is_atomic ^ is_atomic))
-		return 0;
-
-	/* check if the two packets are neighbors */
-	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
-		pkt_orig->l3_len - tcp_hl_orig;
-	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
-				(ip_id == item->ip_id + 1)))
-		/* append the new packet */
-		return 1;
-	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
-				(ip_id + item->nb_merged == item->ip_id)))
-		/* pre-pend the new packet */
-		return -1;
-
-	return 0;
-}
 #endif
diff --git a/lib/gro/gro_tcp6.c b/lib/gro/gro_tcp6.c
new file mode 100644
index 0000000000..66b94b4123
--- /dev/null
+++ b/lib/gro/gro_tcp6.c
@@ -0,0 +1,266 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include "gro_tcp6.h"
+#include "gro_tcp.h"
+
+void *
+gro_tcp6_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow)
+{
+	struct gro_tcp6_tbl *tbl;
+	size_t size;
+	uint32_t entries_num, i;
+
+	entries_num = max_flow_num * max_item_per_flow;
+	entries_num = RTE_MIN(entries_num, GRO_TCP6_TBL_MAX_ITEM_NUM);
+
+	if (entries_num == 0)
+		return NULL;
+
+	tbl = rte_zmalloc_socket(__func__,
+			sizeof(struct gro_tcp6_tbl),
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl == NULL)
+		return NULL;
+
+	size = sizeof(struct gro_tcp_item) * entries_num;
+	tbl->items = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->items == NULL) {
+		rte_free(tbl);
+		return NULL;
+	}
+	tbl->max_item_num = entries_num;
+
+	size = sizeof(struct gro_tcp6_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->flows == NULL) {
+		rte_free(tbl->items);
+		rte_free(tbl);
+		return NULL;
+	}
+	/* INVALID_ARRAY_INDEX indicates an empty flow */
+	for (i = 0; i < entries_num; i++)
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
+
+	return tbl;
+}
+
+void
+gro_tcp6_tbl_destroy(void *tbl)
+{
+	struct gro_tcp6_tbl *tcp_tbl = tbl;
+
+	if (tcp_tbl) {
+		rte_free(tcp_tbl->items);
+		rte_free(tcp_tbl->flows);
+	}
+	rte_free(tcp_tbl);
+}
+
+static inline uint32_t
+find_an_empty_flow(struct gro_tcp6_tbl *tbl)
+{
+	uint32_t i;
+	uint32_t max_flow_num = tbl->max_flow_num;
+
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+insert_new_flow(struct gro_tcp6_tbl *tbl,
+		struct tcp6_flow_key *src,
+		uint32_t item_idx)
+{
+	struct tcp6_flow_key *dst;
+	uint32_t flow_idx;
+
+	flow_idx = find_an_empty_flow(tbl);
+	if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
+		return INVALID_ARRAY_INDEX;
+
+	dst = &(tbl->flows[flow_idx].key);
+
+	ASSIGN_TCP_FLOW_KEY((&src->tcp_flow), (&dst->tcp_flow));
+	memcpy(&dst->src_addr[0], &src->src_addr[0], sizeof(dst->src_addr));
+	memcpy(&dst->dst_addr[0], &src->dst_addr[0], sizeof(dst->dst_addr));
+	dst->vtc_flow = src->vtc_flow;
+
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
+
+	return flow_idx;
+}
+
+/*
+ * update the packet length for the flushed packet.
+ */
+static inline void
+update_header(struct gro_tcp_item *item)
+{
+	struct rte_ipv6_hdr *ipv6_hdr;
+	struct rte_mbuf *pkt = item->firstseg;
+
+	ipv6_hdr = (struct rte_ipv6_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			pkt->l2_len);
+	ipv6_hdr->payload_len = rte_cpu_to_be_16(pkt->pkt_len -
+			pkt->l2_len - pkt->l3_len);
+}
+
+int32_t
+gro_tcp6_reassemble(struct rte_mbuf *pkt,
+		struct gro_tcp6_tbl *tbl,
+		uint64_t start_time)
+{
+	struct rte_ether_hdr *eth_hdr;
+	struct rte_ipv6_hdr *ipv6_hdr;
+	int32_t tcp_dl;
+	uint16_t ip_tlen;
+	struct tcp6_flow_key key;
+	uint32_t i, max_flow_num, remaining_flow_num;
+	uint32_t sent_seq;
+	struct rte_tcp_hdr *tcp_hdr;
+	uint8_t find;
+	uint32_t item_idx;
+	/*
+	 * Don't process the packet whose TCP header length is greater
+	 * than 60 bytes or less than 20 bytes.
+	 */
+	if (unlikely(INVALID_TCP_HDRLEN(pkt->l4_len)))
+		return -1;
+
+	eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+	ipv6_hdr = (struct rte_ipv6_hdr *)((char *)eth_hdr + pkt->l2_len);
+	tcp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_tcp_hdr *, pkt->l2_len + pkt->l3_len);
+
+	/*
+	 * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
+	 * or CWR set.
+	 */
+	if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
+		return -1;
+
+	ip_tlen = rte_be_to_cpu_16(ipv6_hdr->payload_len);
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	tcp_dl = ip_tlen - pkt->l4_len;
+	if (tcp_dl <= 0)
+		return -1;
+
+	rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.tcp_flow.eth_saddr));
+	rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.tcp_flow.eth_daddr));
+	memcpy(&key.src_addr[0], &ipv6_hdr->src_addr, sizeof(key.src_addr));
+	memcpy(&key.dst_addr[0], &ipv6_hdr->dst_addr, sizeof(key.dst_addr));
+	key.tcp_flow.src_port = tcp_hdr->src_port;
+	key.tcp_flow.dst_port = tcp_hdr->dst_port;
+	key.tcp_flow.recv_ack = tcp_hdr->recv_ack;
+	key.vtc_flow = ipv6_hdr->vtc_flow;
+
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	remaining_flow_num = tbl->flow_num;
+	find = 0;
+	for (i = 0; i < max_flow_num && remaining_flow_num; i++) {
+		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
+			if (is_same_tcp6_flow(&tbl->flows[i].key, &key)) {
+				find = 1;
+				break;
+			}
+			remaining_flow_num--;
+		}
+	}
+
+	if (find == 0) {
+		sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+		item_idx = insert_new_tcp_item(pkt, tbl->items, &tbl->item_num, tbl->max_item_num, start_time,
+						INVALID_ARRAY_INDEX, sent_seq, 0, true);
+		if (item_idx == INVALID_ARRAY_INDEX)
+			return -1;
+		if (insert_new_flow(tbl, &key, item_idx) ==
+			INVALID_ARRAY_INDEX) {
+			/*
+			 * Fail to insert a new flow, so delete the
+			 * stored packet.
+			*/
+			delete_tcp_item(tbl->items, item_idx, &tbl->item_num, INVALID_ARRAY_INDEX);
+			return -1;
+		}
+		return 0;
+	}
+	item_idx = tbl->flows[i].start_index;
+	return gro_process_tcp_item(pkt, tcp_hdr, tcp_dl, tbl->items, item_idx,
+						&tbl->item_num, tbl->max_item_num, 
+						0, true, start_time);
+}
+
+uint16_t
+gro_tcp6_tbl_timeout_flush(struct gro_tcp6_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out)
+{
+	uint16_t k = 0;
+	uint32_t i, j;
+	uint32_t max_flow_num = tbl->max_flow_num;
+
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
+			return k;
+
+		j = tbl->flows[i].start_index;
+		while (j != INVALID_ARRAY_INDEX) {
+			if (tbl->items[j].start_time <= flush_timestamp) {
+				out[k++] = tbl->items[j].firstseg;
+				if (tbl->items[j].nb_merged > 1)
+					update_header(&(tbl->items[j]));
+				/*
+				 * Delete the packet and get the next
+				 * packet in the flow.
+				 */
+				j = delete_tcp_item(tbl->items, j, &tbl->item_num, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
+
+				if (unlikely(k == nb_out))
+					return k;
+			} else
+				/*
+				 * The left packets in this flow won't be
+				 * timeout. Go to check other flows.
+				 */
+				break;
+		}
+	}
+	return k;
+}
+
+uint32_t
+gro_tcp6_tbl_pkt_count(void *tbl)
+{
+	struct gro_tcp6_tbl *gro_tbl = tbl;
+
+	if (gro_tbl)
+		return gro_tbl->item_num;
+
+	return 0;
+}
diff --git a/lib/gro/gro_tcp6.h b/lib/gro/gro_tcp6.h
new file mode 100644
index 0000000000..9eb6be2dfe
--- /dev/null
+++ b/lib/gro/gro_tcp6.h
@@ -0,0 +1,163 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
+ */
+
+#ifndef _GRO_TCP6_H_
+#define _GRO_TCP6_H_
+
+#include <gro_tcp.h>
+
+#define INVALID_ARRAY_INDEX 0xffffffffUL
+#define GRO_TCP6_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
+
+/* Header fields representing a TCP/IPv6 flow */
+struct tcp6_flow_key {
+	struct gro_tcp_flow tcp_flow;
+	uint8_t  src_addr[16];
+	uint8_t  dst_addr[16];
+	rte_be32_t vtc_flow;
+};
+
+struct gro_tcp6_flow {
+	struct tcp6_flow_key key;
+	/*
+	 * The index of the first packet in the flow.
+	 * INVALID_ARRAY_INDEX indicates an empty flow.
+	 */
+	uint32_t start_index;
+};
+
+/*
+ * TCP/IPv6 reassembly table structure.
+ */
+struct gro_tcp6_tbl {
+	/* item array */
+	struct gro_tcp_item *items;
+	/* flow array */
+	struct gro_tcp6_flow *flows;
+	/* current item number */
+	uint32_t item_num;
+	/* current flow num */
+	uint32_t flow_num;
+	/* item array size */
+	uint32_t max_item_num;
+	/* flow array size */
+	uint32_t max_flow_num;
+};
+
+/**
+ * This function creates a TCP/IPv6 reassembly table.
+ *
+ * @param socket_id
+ *  Socket index for allocating the TCP/IPv6 reassemble table
+ * @param max_flow_num
+ *  The maximum number of flows in the TCP/IPv6 GRO table
+ * @param max_item_per_flow
+ *  The maximum number of packets per flow
+ *
+ * @return
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
+ */
+void *gro_tcp6_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow);
+
+/**
+ * This function destroys a TCP/IPv6 reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the TCP/IPv6 reassembly table.
+ */
+void gro_tcp6_tbl_destroy(void *tbl);
+
+/**
+ * This function merges a TCP/IPv6 packet. It doesn't process the packet,
+ * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
+ * payload.
+ *
+ * This function doesn't check if the packet has correct checksums and
+ * doesn't re-calculate checksums for the merged packet. Additionally,
+ * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
+ * when IP fragmentation is possible (i.e., DF==0). It returns the
+ * packet, if the packet has invalid parameters (e.g. SYN bit is set)
+ * or there is no available space in the table.
+ *
+ * @param pkt
+ *  Packet to reassemble
+ * @param tbl
+ *  Pointer pointing to the TCP/IPv6 reassembly table
+ * @start_time
+ *  The time when the packet is inserted into the table
+ *
+ * @return
+ *  - Return a positive value if the packet is merged.
+ *  - Return zero if the packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters or no available
+ *    space in the table.
+ */
+int32_t gro_tcp6_reassemble(struct rte_mbuf *pkt,
+		struct gro_tcp6_tbl *tbl,
+		uint64_t start_time);
+
+/**
+ * This function flushes timeout packets in a TCP/IPv6 reassembly table,
+ * and without updating checksums.
+ *
+ * @param tbl
+ *  TCP/IPv6 reassembly table pointer
+ * @param flush_timestamp
+ *  Flush packets which are inserted into the table before or at the
+ *  flush_timestamp.
+ * @param out
+ *  Pointer array used to keep flushed packets
+ * @param nb_out
+ *  The element number in 'out'. It also determines the maximum number of
+ *  packets that can be flushed finally.
+ *
+ * @return
+ *  The number of flushed packets
+ */
+uint16_t gro_tcp6_tbl_timeout_flush(struct gro_tcp6_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out);
+
+/**
+ * This function returns the number of the packets in a TCP/IPv6
+ * reassembly table.
+ *
+ * @param tbl
+ *  TCP/IPv6 reassembly table pointer
+ *
+ * @return
+ *  The number of packets in the table
+ */
+uint32_t gro_tcp6_tbl_pkt_count(void *tbl);
+
+/*
+ * Check if two TCP/IPv6 packets belong to the same flow.
+ */
+static inline int
+is_same_tcp6_flow(struct tcp6_flow_key *k1, struct tcp6_flow_key *k2)
+{
+	rte_be32_t vtc_flow_diff;
+
+	if (memcmp(&k1->src_addr, &k2->src_addr, 16)) {
+		return 0;
+	}
+	if (memcmp(&k1->dst_addr, &k2->dst_addr, 16)) {
+		return 0;
+	}
+	/*
+	* IP version (4) Traffic Class (8) Flow Label (20)
+	* All fields except Traffic class should be same
+	*/
+	vtc_flow_diff = (k1->vtc_flow ^ k2->vtc_flow);
+	if (vtc_flow_diff & htonl(0xF00FFFFF))
+		return 0;
+
+	return is_same_tcp_flow(&k1->tcp_flow, &k2->tcp_flow);
+}
+
+#endif
diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c
index 3be4deb7c7..16aea7049f 100644
--- a/lib/gro/gro_vxlan_tcp4.c
+++ b/lib/gro/gro_vxlan_tcp4.c
@@ -7,6 +7,7 @@ 
 #include <rte_ethdev.h>
 #include <rte_udp.h>
 
+#include "gro_tcp.h"
 #include "gro_vxlan_tcp4.h"
 
 void *
@@ -163,15 +164,9 @@  insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl,
 
 	dst = &(tbl->flows[flow_idx].key);
 
-	rte_ether_addr_copy(&(src->inner_key.eth_saddr),
-			&(dst->inner_key.eth_saddr));
-	rte_ether_addr_copy(&(src->inner_key.eth_daddr),
-			&(dst->inner_key.eth_daddr));
+	ASSIGN_TCP_FLOW_KEY((&(src->inner_key.tcp_flow)), (&(dst->inner_key.tcp_flow)));
 	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
 	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
-	dst->inner_key.recv_ack = src->inner_key.recv_ack;
-	dst->inner_key.src_port = src->inner_key.src_port;
-	dst->inner_key.dst_port = src->inner_key.dst_port;
 
 	dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
 	dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
@@ -248,7 +243,7 @@  merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item,
 		uint16_t outer_ip_id,
 		uint16_t ip_id)
 {
-	if (merge_two_tcp4_packets(&item->inner_item, pkt, cmp, sent_seq,
+	if (merge_two_tcp_packets(&item->inner_item, pkt, cmp, sent_seq,
 				ip_id, pkt->outer_l2_len +
 				pkt->outer_l3_len)) {
 		/* Update the outer IPv4 ID to the large value. */
@@ -357,13 +352,13 @@  gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
 
 	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
 
-	rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.inner_key.eth_saddr));
-	rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.inner_key.eth_daddr));
+	rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.inner_key.tcp_flow.eth_saddr));
+	rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.inner_key.tcp_flow.eth_daddr));
 	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
 	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
-	key.inner_key.recv_ack = tcp_hdr->recv_ack;
-	key.inner_key.src_port = tcp_hdr->src_port;
-	key.inner_key.dst_port = tcp_hdr->dst_port;
+	key.inner_key.tcp_flow.recv_ack = tcp_hdr->recv_ack;
+	key.inner_key.tcp_flow.src_port = tcp_hdr->src_port;
+	key.inner_key.tcp_flow.dst_port = tcp_hdr->dst_port;
 
 	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
 	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
diff --git a/lib/gro/gro_vxlan_tcp4.h b/lib/gro/gro_vxlan_tcp4.h
index 7832942a68..d68d5fcd5b 100644
--- a/lib/gro/gro_vxlan_tcp4.h
+++ b/lib/gro/gro_vxlan_tcp4.h
@@ -5,6 +5,7 @@ 
 #ifndef _GRO_VXLAN_TCP4_H_
 #define _GRO_VXLAN_TCP4_H_
 
+#include "gro_tcp.h"
 #include "gro_tcp4.h"
 
 #define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
@@ -36,7 +37,7 @@  struct gro_vxlan_tcp4_flow {
 };
 
 struct gro_vxlan_tcp4_item {
-	struct gro_tcp4_item inner_item;
+	struct gro_tcp_item inner_item;
 	/* IPv4 ID in the outer IPv4 header */
 	uint16_t outer_ip_id;
 	/* Indicate if outer IPv4 ID can be ignored */
diff --git a/lib/gro/meson.build b/lib/gro/meson.build
index e4fa2958bd..a7d73f21ec 100644
--- a/lib/gro/meson.build
+++ b/lib/gro/meson.build
@@ -3,7 +3,9 @@ 
 
 sources = files(
         'rte_gro.c',
+        'gro_tcp.c',
         'gro_tcp4.c',
+        'gro_tcp6.c',
         'gro_udp4.c',
         'gro_vxlan_tcp4.c',
         'gro_vxlan_udp4.c',
diff --git a/lib/gro/rte_gro.c b/lib/gro/rte_gro.c
index e35399fd42..d824eebd93 100644
--- a/lib/gro/rte_gro.c
+++ b/lib/gro/rte_gro.c
@@ -8,6 +8,7 @@ 
 
 #include "rte_gro.h"
 #include "gro_tcp4.h"
+#include "gro_tcp6.h"
 #include "gro_udp4.h"
 #include "gro_vxlan_tcp4.h"
 #include "gro_vxlan_udp4.h"
@@ -20,14 +21,16 @@  typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
 
 static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
 		gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create,
-		gro_udp4_tbl_create, gro_vxlan_udp4_tbl_create, NULL};
+		gro_udp4_tbl_create, gro_vxlan_udp4_tbl_create, gro_tcp6_tbl_create, NULL};
 static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
 			gro_udp4_tbl_destroy, gro_vxlan_udp4_tbl_destroy,
+			gro_tcp6_tbl_destroy,
 			NULL};
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
 			gro_udp4_tbl_pkt_count, gro_vxlan_udp4_tbl_pkt_count,
+			gro_tcp6_tbl_pkt_count,
 			NULL};
 
 #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
@@ -35,6 +38,12 @@  static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
 		((ptype & RTE_PTYPE_L4_FRAG) != RTE_PTYPE_L4_FRAG) && \
 		(RTE_ETH_IS_TUNNEL_PKT(ptype) == 0))
 
+/* GRO with extension headers is not supported */
+#define IS_IPV6_TCP_PKT(ptype) (RTE_ETH_IS_IPV6_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP) && \
+		((ptype & RTE_PTYPE_L4_FRAG) != RTE_PTYPE_L4_FRAG) && \
+		(RTE_ETH_IS_TUNNEL_PKT(ptype) == 0))
+
 #define IS_IPV4_UDP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
 		((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
 		(RTE_ETH_IS_TUNNEL_PKT(ptype) == 0))
@@ -147,7 +156,11 @@  rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	/* allocate a reassembly table for TCP/IPv4 GRO */
 	struct gro_tcp4_tbl tcp_tbl;
 	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
-	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
+	struct gro_tcp_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
+
+	struct gro_tcp6_tbl tcp6_tbl;
+	struct gro_tcp6_flow tcp6_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_tcp_item tcp6_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
 	/* allocate a reassembly table for UDP/IPv4 GRO */
 	struct gro_udp4_tbl udp_tbl;
@@ -171,10 +184,10 @@  rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	int32_t ret;
 	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
 	uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
-		do_vxlan_udp_gro = 0;
+		do_vxlan_udp_gro = 0, do_tcp6_gro = 0;
 
 	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
-					RTE_GRO_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4 | RTE_GRO_TCP_IPV6 |
 					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
 					RTE_GRO_UDP_IPV4)) == 0))
 		return nb_pkts;
@@ -236,6 +249,18 @@  rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 		do_udp4_gro = 1;
 	}
 
+	if (param->gro_types & RTE_GRO_TCP_IPV6) {
+		for (i = 0; i < item_num; i++)
+			tcp6_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		tcp6_tbl.flows = tcp6_flows;
+		tcp6_tbl.items = tcp6_items;
+		tcp6_tbl.flow_num = 0;
+		tcp6_tbl.item_num = 0;
+		tcp6_tbl.max_flow_num = item_num;
+		tcp6_tbl.max_item_num = item_num;
+		do_tcp6_gro = 1;
+	}
 
 	for (i = 0; i < nb_pkts; i++) {
 		/*
@@ -276,6 +301,14 @@  rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 				nb_after_gro--;
 			else if (ret < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV6_TCP_PKT(pkts[i]->packet_type) &&
+				do_tcp6_gro) {
+			ret = gro_tcp6_reassemble(pkts[i], &tcp6_tbl, 0);
+			if (ret > 0)
+				/* merge successfully */
+				nb_after_gro--;
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
@@ -283,9 +316,17 @@  rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	if ((nb_after_gro < nb_pkts)
 		 || (unprocess_num < nb_pkts)) {
 		i = 0;
+		/* Copy unprocessed packets */
+		if (unprocess_num > 0) {
+			memcpy(&pkts[i], unprocess_pkts,
+					sizeof(struct rte_mbuf *) *
+					unprocess_num);
+			i = unprocess_num;
+		}
+
 		/* Flush all packets from the tables */
 		if (do_vxlan_tcp_gro) {
-			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
+			i += gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
 					0, pkts, nb_pkts);
 		}
 
@@ -304,13 +345,11 @@  rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 			i += gro_udp4_tbl_timeout_flush(&udp_tbl, 0,
 					&pkts[i], nb_pkts - i);
 		}
-		/* Copy unprocessed packets */
-		if (unprocess_num > 0) {
-			memcpy(&pkts[i], unprocess_pkts,
-					sizeof(struct rte_mbuf *) *
-					unprocess_num);
+
+		if (do_tcp6_gro) {
+			i += gro_tcp6_tbl_timeout_flush(&tcp6_tbl, 0,
+					&pkts[i], nb_pkts - i);
 		}
-		nb_after_gro = i + unprocess_num;
 	}
 
 	return nb_after_gro;
@@ -323,13 +362,13 @@  rte_gro_reassemble(struct rte_mbuf **pkts,
 {
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
-	void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
+	void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl, *tcp6_tbl;
 	uint64_t current_time;
 	uint16_t i, unprocess_num = 0;
-	uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;
+	uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro, do_tcp6_gro;
 
 	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
-					RTE_GRO_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4 | RTE_GRO_TCP_IPV6 |
 					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
 					RTE_GRO_UDP_IPV4)) == 0))
 		return nb_pkts;
@@ -338,6 +377,7 @@  rte_gro_reassemble(struct rte_mbuf **pkts,
 	vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
 	udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
 	vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
+	tcp6_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV6_INDEX];
 
 	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
 		RTE_GRO_TCP_IPV4;
@@ -347,6 +387,7 @@  rte_gro_reassemble(struct rte_mbuf **pkts,
 		RTE_GRO_UDP_IPV4;
 	do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
 		RTE_GRO_IPV4_VXLAN_UDP_IPV4;
+	do_tcp6_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV6) == RTE_GRO_TCP_IPV6;
 
 	current_time = rte_rdtsc();
 
@@ -371,6 +412,11 @@  rte_gro_reassemble(struct rte_mbuf **pkts,
 			if (gro_udp4_reassemble(pkts[i], udp_tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV6_TCP_PKT(pkts[i]->packet_type) &&
+				do_tcp6_gro) {
+			if (gro_tcp6_reassemble(pkts[i], tcp6_tbl,
+						current_time) < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
@@ -426,6 +472,15 @@  rte_gro_timeout_flush(void *ctx,
 				gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX],
 				flush_timestamp,
 				&out[num], left_nb_out);
+		left_nb_out = max_nb_out - num;
+	}
+
+	if ((gro_types & RTE_GRO_TCP_IPV6) && left_nb_out > 0) {
+		num += gro_tcp6_tbl_timeout_flush(
+				gro_ctx->tbls[RTE_GRO_TCP_IPV6_INDEX],
+				flush_timestamp,
+				&out[num], left_nb_out);
+
 	}
 
 	return num;
diff --git a/lib/gro/rte_gro.h b/lib/gro/rte_gro.h
index 9f9ed4935a..c83dfd9ad1 100644
--- a/lib/gro/rte_gro.h
+++ b/lib/gro/rte_gro.h
@@ -38,6 +38,9 @@  extern "C" {
 #define RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX 3
 #define RTE_GRO_IPV4_VXLAN_UDP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX)
 /**< VxLAN UDP/IPv4 GRO flag. */
+#define RTE_GRO_TCP_IPV6_INDEX 4
+#define RTE_GRO_TCP_IPV6 (1ULL << RTE_GRO_TCP_IPV6_INDEX)
+/**< TCP/IPv6 GRO flag. */
 
 /**
  * Structure used to create GRO context objects or used to pass