[v6,06/10] ipsec: add transmit segmentation offload support

Message ID 20210917091747.1528262-7-radu.nicolau@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series new features for ipsec and security libraries |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Radu Nicolau Sept. 17, 2021, 9:17 a.m. UTC
  Add support for transmit segmentation offload to inline crypto processing
mode. This offload is not supported by other offload modes, as at a
minimum it requires inline crypto for IPsec to be supported on the
network interface.

Signed-off-by: Declan Doherty <declan.doherty@intel.com>
Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
Signed-off-by: Abhijit Sinha <abhijit.sinha@intel.com>
Signed-off-by: Daniel Martin Buckley <daniel.m.buckley@intel.com>
Acked-by: Fan Zhang <roy.fan.zhang@intel.com>
---
 lib/ipsec/esp_inb.c  |   4 +-
 lib/ipsec/esp_outb.c | 115 +++++++++++++++++++++++++++++++++++--------
 lib/ipsec/iph.h      |  10 +++-
 lib/ipsec/sa.c       |   6 +++
 lib/ipsec/sa.h       |   4 ++
 5 files changed, 114 insertions(+), 25 deletions(-)
  

Comments

Ananyev, Konstantin Sept. 23, 2021, 2:09 p.m. UTC | #1
> Add support for transmit segmentation offload to inline crypto processing
> mode. This offload is not supported by other offload modes, as at a
> minimum it requires inline crypto for IPsec to be supported on the
> network interface.
> 
> Signed-off-by: Declan Doherty <declan.doherty@intel.com>
> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
> Signed-off-by: Abhijit Sinha <abhijit.sinha@intel.com>
> Signed-off-by: Daniel Martin Buckley <daniel.m.buckley@intel.com>
> Acked-by: Fan Zhang <roy.fan.zhang@intel.com>
> ---
>  lib/ipsec/esp_inb.c  |   4 +-
>  lib/ipsec/esp_outb.c | 115 +++++++++++++++++++++++++++++++++++--------
>  lib/ipsec/iph.h      |  10 +++-
>  lib/ipsec/sa.c       |   6 +++
>  lib/ipsec/sa.h       |   4 ++
>  5 files changed, 114 insertions(+), 25 deletions(-)
> 
> diff --git a/lib/ipsec/esp_inb.c b/lib/ipsec/esp_inb.c
> index d66c88f05d..a6ab8fbdd5 100644
> --- a/lib/ipsec/esp_inb.c
> +++ b/lib/ipsec/esp_inb.c
> @@ -668,8 +668,8 @@ trs_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[],
>  			/* modify packet's layout */
>  			np = trs_process_step2(mb[i], ml[i], hl[i], cofs,
>  				to[i], tl, sqn + k);
> -			update_trs_l3hdr(sa, np + l2, mb[i]->pkt_len,
> -				l2, hl[i] - l2, espt[i].next_proto);
> +			update_trs_l34hdrs(sa, np + l2, mb[i]->pkt_len,
> +				l2, hl[i] - l2, espt[i].next_proto, 0);
> 
>  			/* update mbuf's metadata */
>  			trs_process_step3(mb[i]);
> diff --git a/lib/ipsec/esp_outb.c b/lib/ipsec/esp_outb.c
> index a3f77469c3..9fc7075796 100644
> --- a/lib/ipsec/esp_outb.c
> +++ b/lib/ipsec/esp_outb.c
> @@ -2,6 +2,8 @@
>   * Copyright(c) 2018-2020 Intel Corporation
>   */
> 
> +#include <math.h>
> +
>  #include <rte_ipsec.h>
>  #include <rte_esp.h>
>  #include <rte_ip.h>
> @@ -156,11 +158,20 @@ outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> 
>  	/* number of bytes to encrypt */
>  	clen = plen + sizeof(*espt);
> -	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +
> +	/* We don't need to pad/ailgn packet when using TSO offload */
> +	if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
> +		clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +

Here and everywhere:
It doesn't look nice that we have to pollute generic functions with
checking TSO specific flags all over the place.
Can we probably have a specific prepare/process function for inline+tso case?
As we do have for cpu and inline cases right now.
Or just update inline version?

> 
>  	/* pad length + esp tail */
>  	pdlen = clen - plen;
> -	tlen = pdlen + sa->icv_len + sqh_len;
> +
> +	/* We don't append ICV length when using TSO offload */
> +	if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
> +		tlen = pdlen + sa->icv_len + sqh_len;
> +	else
> +		tlen = pdlen + sqh_len;
> 
>  	/* do append and prepend */
>  	ml = rte_pktmbuf_lastseg(mb);
> @@ -337,6 +348,7 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>  	char *ph, *pt;
>  	uint64_t *iv;
>  	uint32_t l2len, l3len;
> +	uint8_t tso = mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG) ? 1 : 0;
> 
>  	l2len = mb->l2_len;
>  	l3len = mb->l3_len;
> @@ -349,11 +361,19 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> 
>  	/* number of bytes to encrypt */
>  	clen = plen + sizeof(*espt);
> -	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +
> +	/* We don't need to pad/ailgn packet when using TSO offload */
> +	if (likely(!tso))
> +		clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> 
>  	/* pad length + esp tail */
>  	pdlen = clen - plen;
> -	tlen = pdlen + sa->icv_len + sqh_len;
> +
> +	/* We don't append ICV length when using TSO offload */
> +	if (likely(!tso))
> +		tlen = pdlen + sa->icv_len + sqh_len;
> +	else
> +		tlen = pdlen + sqh_len;
> 
>  	/* do append and insert */
>  	ml = rte_pktmbuf_lastseg(mb);
> @@ -375,8 +395,8 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>  	insert_esph(ph, ph + hlen, uhlen);
> 
>  	/* update ip  header fields */
> -	np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
> -			l3len, IPPROTO_ESP);
> +	np = update_trs_l34hdrs(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
> +			l3len, IPPROTO_ESP, tso);
> 
>  	/* update spi, seqn and iv */
>  	esph = (struct rte_esp_hdr *)(ph + uhlen);
> @@ -651,6 +671,33 @@ inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
>  	}
>  }
> 
> +/* check if packet will exceed MSS and segmentation is required */
> +static inline int
> +esn_outb_nb_segments(const struct rte_ipsec_sa *sa, struct rte_mbuf *m) {
> +	uint16_t segments = 1;
> +	uint16_t pkt_l3len = m->pkt_len - m->l2_len;
> +
> +	/* Only support segmentation for UDP/TCP flows */
> +	if (!(m->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)))
> +		return segments;
> +
> +	if (sa->tso.enabled && pkt_l3len > sa->tso.mss) {
> +		segments = ceil((float)pkt_l3len / sa->tso.mss);

Float calculations in the middle of data-path?
Just to calculate roundup?
Doesn't look good to me at all.

> +
> +		if  (m->packet_type & RTE_PTYPE_L4_TCP) {
> +			m->ol_flags |= (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM);

That's really strange - why ipsec library will set PKT_TX_TCP_SEG unconditionally?
That should be responsibility of the upper layer, I think.
In the lib we should only check was tso requested for that packet or not.
Same for UDP.

> +			m->l4_len = sizeof(struct rte_tcp_hdr);

Hmm, how do we know there are no TCP options present for that packet?
Wouldn't it be better to expect user to provide proper l4_len for such packets?

> +		} else {
> +			m->ol_flags |= (PKT_TX_UDP_SEG | PKT_TX_UDP_CKSUM);
> +			m->l4_len = sizeof(struct rte_udp_hdr);
> +		}
> +
> +		m->tso_segsz = sa->tso.mss;
> +	}
> +
> +	return segments;
> +}
> +
>  /*
>   * process group of ESP outbound tunnel packets destined for
>   * INLINE_CRYPTO type of device.
> @@ -660,24 +707,29 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
>  	struct rte_mbuf *mb[], uint16_t num)
>  {
>  	int32_t rc;
> -	uint32_t i, k, n;
> +	uint32_t i, k, nb_sqn = 0, nb_sqn_alloc;
>  	uint64_t sqn;
>  	rte_be64_t sqc;
>  	struct rte_ipsec_sa *sa;
>  	union sym_op_data icv;
>  	uint64_t iv[IPSEC_MAX_IV_QWORD];
>  	uint32_t dr[num];
> +	uint16_t nb_segs[num];
> 
>  	sa = ss->sa;
> 
> -	n = num;
> -	sqn = esn_outb_update_sqn(sa, &n);
> -	if (n != num)
> +	for (i = 0; i != num; i++) {
> +		nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
> +		nb_sqn += nb_segs[i];
> +	}
> +
> +	nb_sqn_alloc = nb_sqn;
> +	sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
> +	if (nb_sqn_alloc != nb_sqn)
>  		rte_errno = EOVERFLOW;
> 
>  	k = 0;
> -	for (i = 0; i != n; i++) {
> -
> +	for (i = 0; i != num; i++) {
>  		sqc = rte_cpu_to_be_64(sqn + i);
>  		gen_iv(iv, sqc);
> 
> @@ -691,11 +743,18 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
>  			dr[i - k] = i;
>  			rte_errno = -rc;
>  		}
> +
> +		/**
> +		 * If packet is using tso, increment sqn by the number of
> +		 * segments for	packet
> +		 */
> +		if  (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
> +			sqn += nb_segs[i] - 1;
>  	}
> 
>  	/* copy not processed mbufs beyond good ones */
> -	if (k != n && k != 0)
> -		move_bad_mbufs(mb, dr, n, n - k);
> +	if (k != num && k != 0)
> +		move_bad_mbufs(mb, dr, num, num - k);
> 
>  	inline_outb_mbuf_prepare(ss, mb, k);
>  	return k;
> @@ -710,23 +769,30 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
>  	struct rte_mbuf *mb[], uint16_t num)
>  {
>  	int32_t rc;
> -	uint32_t i, k, n;
> +	uint32_t i, k, nb_sqn, nb_sqn_alloc;
>  	uint64_t sqn;
>  	rte_be64_t sqc;
>  	struct rte_ipsec_sa *sa;
>  	union sym_op_data icv;
>  	uint64_t iv[IPSEC_MAX_IV_QWORD];
>  	uint32_t dr[num];
> +	uint16_t nb_segs[num];
> 
>  	sa = ss->sa;
> 
> -	n = num;
> -	sqn = esn_outb_update_sqn(sa, &n);
> -	if (n != num)
> +	/* Calculate number of sequence numbers required */
> +	for (i = 0, nb_sqn = 0; i != num; i++) {
> +		nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
> +		nb_sqn += nb_segs[i];
> +	}
> +
> +	nb_sqn_alloc = nb_sqn;
> +	sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
> +	if (nb_sqn_alloc != nb_sqn)
>  		rte_errno = EOVERFLOW;
> 
>  	k = 0;
> -	for (i = 0; i != n; i++) {
> +	for (i = 0; i != num; i++) {
> 
>  		sqc = rte_cpu_to_be_64(sqn + i);
>  		gen_iv(iv, sqc);
> @@ -741,11 +807,18 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
>  			dr[i - k] = i;
>  			rte_errno = -rc;
>  		}
> +
> +		/**
> +		 * If packet is using tso, increment sqn by the number of
> +		 * segments for	packet
> +		 */
> +		if  (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
> +			sqn += nb_segs[i] - 1;
>  	}
> 
>  	/* copy not processed mbufs beyond good ones */
> -	if (k != n && k != 0)
> -		move_bad_mbufs(mb, dr, n, n - k);
> +	if (k != num && k != 0)
> +		move_bad_mbufs(mb, dr, num, num - k);
> 
>  	inline_outb_mbuf_prepare(ss, mb, k);
>  	return k;
> diff --git a/lib/ipsec/iph.h b/lib/ipsec/iph.h
> index 861f16905a..2d223199ac 100644
> --- a/lib/ipsec/iph.h
> +++ b/lib/ipsec/iph.h
> @@ -6,6 +6,8 @@
>  #define _IPH_H_
> 
>  #include <rte_ip.h>
> +#include <rte_udp.h>
> +#include <rte_tcp.h>
> 
>  /**
>   * @file iph.h
> @@ -39,8 +41,8 @@ insert_esph(char *np, char *op, uint32_t hlen)
> 
>  /* update original ip header fields for transport case */
>  static inline int
> -update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> -		uint32_t l2len, uint32_t l3len, uint8_t proto)
> +update_trs_l34hdrs(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> +		uint32_t l2len, uint32_t l3len, uint8_t proto, uint8_t tso)

Hmm... why to change name of the function?

>  {
>  	int32_t rc;
> 
> @@ -51,6 +53,10 @@ update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
>  		v4h = p;
>  		rc = v4h->next_proto_id;
>  		v4h->next_proto_id = proto;
> +		if (tso) {
> +			v4h->hdr_checksum = 0;
> +			v4h->total_length = 0;

total_len will be overwritten unconditionally at next line below.

Another question - why it is necessary?
Is it HW specific requirment or ... ?


> +		}
>  		v4h->total_length = rte_cpu_to_be_16(plen - l2len);


>  	/* IPv6 */
>  	} else {
> diff --git a/lib/ipsec/sa.c b/lib/ipsec/sa.c
> index 720e0f365b..2ecbbce0a4 100644
> --- a/lib/ipsec/sa.c
> +++ b/lib/ipsec/sa.c
> @@ -565,6 +565,12 @@ rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
>  	sa->type = type;
>  	sa->size = sz;
> 
> +
> +	if (prm->ipsec_xform.options.tso == 1) {
> +		sa->tso.enabled = 1;
> +		sa->tso.mss = prm->ipsec_xform.mss;
> +	}
> +
>  	/* check for ESN flag */
>  	sa->sqn_mask = (prm->ipsec_xform.options.esn == 0) ?
>  		UINT32_MAX : UINT64_MAX;
> diff --git a/lib/ipsec/sa.h b/lib/ipsec/sa.h
> index 107ebd1519..5e237f3525 100644
> --- a/lib/ipsec/sa.h
> +++ b/lib/ipsec/sa.h
> @@ -113,6 +113,10 @@ struct rte_ipsec_sa {
>  	uint8_t iv_len;
>  	uint8_t pad_align;
>  	uint8_t tos_mask;
> +	struct {
> +		uint8_t enabled:1;
> +		uint16_t mss;
> +	} tso;

Wouldn't one field be enough?
uint16_t tso_mss;
And it it is zero, then tso is disabled. 
In fact, do we need it at all?
Wouldn't it be better to request user to fill mbuf->tso_segsz properly for us?

> 
>  	/* template for tunnel header */
>  	uint8_t hdr[IPSEC_MAX_HDR_SIZE];
> --
> 2.25.1
  
Radu Nicolau Sept. 28, 2021, 3:14 p.m. UTC | #2
On 9/23/2021 3:09 PM, Ananyev, Konstantin wrote:
>
>> Add support for transmit segmentation offload to inline crypto processing
>> mode. This offload is not supported by other offload modes, as at a
>> minimum it requires inline crypto for IPsec to be supported on the
>> network interface.
>>
>> Signed-off-by: Declan Doherty <declan.doherty@intel.com>
>> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
>> Signed-off-by: Abhijit Sinha <abhijit.sinha@intel.com>
>> Signed-off-by: Daniel Martin Buckley <daniel.m.buckley@intel.com>
>> Acked-by: Fan Zhang <roy.fan.zhang@intel.com>
>> ---
>>   lib/ipsec/esp_inb.c  |   4 +-
>>   lib/ipsec/esp_outb.c | 115 +++++++++++++++++++++++++++++++++++--------
>>   lib/ipsec/iph.h      |  10 +++-
>>   lib/ipsec/sa.c       |   6 +++
>>   lib/ipsec/sa.h       |   4 ++
>>   5 files changed, 114 insertions(+), 25 deletions(-)
>>
>> diff --git a/lib/ipsec/esp_inb.c b/lib/ipsec/esp_inb.c
>> index d66c88f05d..a6ab8fbdd5 100644
>> --- a/lib/ipsec/esp_inb.c
>> +++ b/lib/ipsec/esp_inb.c
>> @@ -668,8 +668,8 @@ trs_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[],
>>   			/* modify packet's layout */
>>   			np = trs_process_step2(mb[i], ml[i], hl[i], cofs,
>>   				to[i], tl, sqn + k);
>> -			update_trs_l3hdr(sa, np + l2, mb[i]->pkt_len,
>> -				l2, hl[i] - l2, espt[i].next_proto);
>> +			update_trs_l34hdrs(sa, np + l2, mb[i]->pkt_len,
>> +				l2, hl[i] - l2, espt[i].next_proto, 0);
>>
>>   			/* update mbuf's metadata */
>>   			trs_process_step3(mb[i]);
>> diff --git a/lib/ipsec/esp_outb.c b/lib/ipsec/esp_outb.c
>> index a3f77469c3..9fc7075796 100644
>> --- a/lib/ipsec/esp_outb.c
>> +++ b/lib/ipsec/esp_outb.c
>> @@ -2,6 +2,8 @@
>>    * Copyright(c) 2018-2020 Intel Corporation
>>    */
>>
>> +#include <math.h>
>> +
>>   #include <rte_ipsec.h>
>>   #include <rte_esp.h>
>>   #include <rte_ip.h>
>> @@ -156,11 +158,20 @@ outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>>
>>   	/* number of bytes to encrypt */
>>   	clen = plen + sizeof(*espt);
>> -	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
>> +
>> +	/* We don't need to pad/ailgn packet when using TSO offload */
>> +	if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
>> +		clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
>> +
> Here and everywhere:
> It doesn't look nice that we have to pollute generic functions with
> checking TSO specific flags all over the place.
> Can we probably have a specific prepare/process function for inline+tso case?
> As we do have for cpu and inline cases right now.
> Or just update inline version?
I looked at doing this but unless I copy these 2 functions I can't move 
this out.
>
>>   	/* pad length + esp tail */
>>   	pdlen = clen - plen;
>> -	tlen = pdlen + sa->icv_len + sqh_len;
>> +
>> +	/* We don't append ICV length when using TSO offload */
>> +	if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
>> +		tlen = pdlen + sa->icv_len + sqh_len;
>> +	else
>> +		tlen = pdlen + sqh_len;
>>
>>   	/* do append and prepend */
>>   	ml = rte_pktmbuf_lastseg(mb);
>> @@ -337,6 +348,7 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>>   	char *ph, *pt;
>>   	uint64_t *iv;
>>   	uint32_t l2len, l3len;
>> +	uint8_t tso = mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG) ? 1 : 0;
>>
>>   	l2len = mb->l2_len;
>>   	l3len = mb->l3_len;
>> @@ -349,11 +361,19 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>>
>>   	/* number of bytes to encrypt */
>>   	clen = plen + sizeof(*espt);
>> -	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
>> +
>> +	/* We don't need to pad/ailgn packet when using TSO offload */
>> +	if (likely(!tso))
>> +		clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
>>
>>   	/* pad length + esp tail */
>>   	pdlen = clen - plen;
>> -	tlen = pdlen + sa->icv_len + sqh_len;
>> +
>> +	/* We don't append ICV length when using TSO offload */
>> +	if (likely(!tso))
>> +		tlen = pdlen + sa->icv_len + sqh_len;
>> +	else
>> +		tlen = pdlen + sqh_len;
>>
>>   	/* do append and insert */
>>   	ml = rte_pktmbuf_lastseg(mb);
>> @@ -375,8 +395,8 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>>   	insert_esph(ph, ph + hlen, uhlen);
>>
>>   	/* update ip  header fields */
>> -	np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
>> -			l3len, IPPROTO_ESP);
>> +	np = update_trs_l34hdrs(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
>> +			l3len, IPPROTO_ESP, tso);
>>
>>   	/* update spi, seqn and iv */
>>   	esph = (struct rte_esp_hdr *)(ph + uhlen);
>> @@ -651,6 +671,33 @@ inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
>>   	}
>>   }
>>
>> +/* check if packet will exceed MSS and segmentation is required */
>> +static inline int
>> +esn_outb_nb_segments(const struct rte_ipsec_sa *sa, struct rte_mbuf *m) {
>> +	uint16_t segments = 1;
>> +	uint16_t pkt_l3len = m->pkt_len - m->l2_len;
>> +
>> +	/* Only support segmentation for UDP/TCP flows */
>> +	if (!(m->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)))
>> +		return segments;
>> +
>> +	if (sa->tso.enabled && pkt_l3len > sa->tso.mss) {
>> +		segments = ceil((float)pkt_l3len / sa->tso.mss);
> Float calculations in the middle of data-path?
> Just to calculate roundup?
> Doesn't look good to me at all.
It doesn't look good to me either - I will rework it.
>
>> +
>> +		if  (m->packet_type & RTE_PTYPE_L4_TCP) {
>> +			m->ol_flags |= (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM);
> That's really strange - why ipsec library will set PKT_TX_TCP_SEG unconditionally?
> That should be responsibility of the upper layer, I think.
> In the lib we should only check was tso requested for that packet or not.
> Same for UDP.
These are under an if(TSO) condition.
>
>> +			m->l4_len = sizeof(struct rte_tcp_hdr);
> Hmm, how do we know there are no TCP options present for that packet?
> Wouldn't it be better to expect user to provide proper l4_len for such packets?
You're right, I will update it.

>
>> +		} else {
>> +			m->ol_flags |= (PKT_TX_UDP_SEG | PKT_TX_UDP_CKSUM);
>> +			m->l4_len = sizeof(struct rte_udp_hdr);
>> +		}
>> +
>> +		m->tso_segsz = sa->tso.mss;
>> +	}
>> +
>> +	return segments;
>> +}
>> +
>>   /*
>>    * process group of ESP outbound tunnel packets destined for
>>    * INLINE_CRYPTO type of device.
>> @@ -660,24 +707,29 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
>>   	struct rte_mbuf *mb[], uint16_t num)
>>   {
>>   	int32_t rc;
>> -	uint32_t i, k, n;
>> +	uint32_t i, k, nb_sqn = 0, nb_sqn_alloc;
>>   	uint64_t sqn;
>>   	rte_be64_t sqc;
>>   	struct rte_ipsec_sa *sa;
>>   	union sym_op_data icv;
>>   	uint64_t iv[IPSEC_MAX_IV_QWORD];
>>   	uint32_t dr[num];
>> +	uint16_t nb_segs[num];
>>
>>   	sa = ss->sa;
>>
>> -	n = num;
>> -	sqn = esn_outb_update_sqn(sa, &n);
>> -	if (n != num)
>> +	for (i = 0; i != num; i++) {
>> +		nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
>> +		nb_sqn += nb_segs[i];
>> +	}
>> +
>> +	nb_sqn_alloc = nb_sqn;
>> +	sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
>> +	if (nb_sqn_alloc != nb_sqn)
>>   		rte_errno = EOVERFLOW;
>>
>>   	k = 0;
>> -	for (i = 0; i != n; i++) {
>> -
>> +	for (i = 0; i != num; i++) {
>>   		sqc = rte_cpu_to_be_64(sqn + i);
>>   		gen_iv(iv, sqc);
>>
>> @@ -691,11 +743,18 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
>>   			dr[i - k] = i;
>>   			rte_errno = -rc;
>>   		}
>> +
>> +		/**
>> +		 * If packet is using tso, increment sqn by the number of
>> +		 * segments for	packet
>> +		 */
>> +		if  (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
>> +			sqn += nb_segs[i] - 1;
>>   	}
>>
>>   	/* copy not processed mbufs beyond good ones */
>> -	if (k != n && k != 0)
>> -		move_bad_mbufs(mb, dr, n, n - k);
>> +	if (k != num && k != 0)
>> +		move_bad_mbufs(mb, dr, num, num - k);
>>
>>   	inline_outb_mbuf_prepare(ss, mb, k);
>>   	return k;
>> @@ -710,23 +769,30 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
>>   	struct rte_mbuf *mb[], uint16_t num)
>>   {
>>   	int32_t rc;
>> -	uint32_t i, k, n;
>> +	uint32_t i, k, nb_sqn, nb_sqn_alloc;
>>   	uint64_t sqn;
>>   	rte_be64_t sqc;
>>   	struct rte_ipsec_sa *sa;
>>   	union sym_op_data icv;
>>   	uint64_t iv[IPSEC_MAX_IV_QWORD];
>>   	uint32_t dr[num];
>> +	uint16_t nb_segs[num];
>>
>>   	sa = ss->sa;
>>
>> -	n = num;
>> -	sqn = esn_outb_update_sqn(sa, &n);
>> -	if (n != num)
>> +	/* Calculate number of sequence numbers required */
>> +	for (i = 0, nb_sqn = 0; i != num; i++) {
>> +		nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
>> +		nb_sqn += nb_segs[i];
>> +	}
>> +
>> +	nb_sqn_alloc = nb_sqn;
>> +	sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
>> +	if (nb_sqn_alloc != nb_sqn)
>>   		rte_errno = EOVERFLOW;
>>
>>   	k = 0;
>> -	for (i = 0; i != n; i++) {
>> +	for (i = 0; i != num; i++) {
>>
>>   		sqc = rte_cpu_to_be_64(sqn + i);
>>   		gen_iv(iv, sqc);
>> @@ -741,11 +807,18 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
>>   			dr[i - k] = i;
>>   			rte_errno = -rc;
>>   		}
>> +
>> +		/**
>> +		 * If packet is using tso, increment sqn by the number of
>> +		 * segments for	packet
>> +		 */
>> +		if  (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
>> +			sqn += nb_segs[i] - 1;
>>   	}
>>
>>   	/* copy not processed mbufs beyond good ones */
>> -	if (k != n && k != 0)
>> -		move_bad_mbufs(mb, dr, n, n - k);
>> +	if (k != num && k != 0)
>> +		move_bad_mbufs(mb, dr, num, num - k);
>>
>>   	inline_outb_mbuf_prepare(ss, mb, k);
>>   	return k;
>> diff --git a/lib/ipsec/iph.h b/lib/ipsec/iph.h
>> index 861f16905a..2d223199ac 100644
>> --- a/lib/ipsec/iph.h
>> +++ b/lib/ipsec/iph.h
>> @@ -6,6 +6,8 @@
>>   #define _IPH_H_
>>
>>   #include <rte_ip.h>
>> +#include <rte_udp.h>
>> +#include <rte_tcp.h>
>>
>>   /**
>>    * @file iph.h
>> @@ -39,8 +41,8 @@ insert_esph(char *np, char *op, uint32_t hlen)
>>
>>   /* update original ip header fields for transport case */
>>   static inline int
>> -update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
>> -		uint32_t l2len, uint32_t l3len, uint8_t proto)
>> +update_trs_l34hdrs(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
>> +		uint32_t l2len, uint32_t l3len, uint8_t proto, uint8_t tso)
> Hmm... why to change name of the function?
>
>>   {
>>   	int32_t rc;
>>
>> @@ -51,6 +53,10 @@ update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
>>   		v4h = p;
>>   		rc = v4h->next_proto_id;
>>   		v4h->next_proto_id = proto;
>> +		if (tso) {
>> +			v4h->hdr_checksum = 0;
>> +			v4h->total_length = 0;
> total_len will be overwritten unconditionally at next line below.
>
> Another question - why it is necessary?
> Is it HW specific requirment or ... ?
It looks wrong I will rewrite this.
>
>
>> +		}
>>   		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
>
>>   	/* IPv6 */
>>   	} else {
>> diff --git a/lib/ipsec/sa.c b/lib/ipsec/sa.c
>> index 720e0f365b..2ecbbce0a4 100644
>> --- a/lib/ipsec/sa.c
>> +++ b/lib/ipsec/sa.c
>> @@ -565,6 +565,12 @@ rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
>>   	sa->type = type;
>>   	sa->size = sz;
>>
>> +
>> +	if (prm->ipsec_xform.options.tso == 1) {
>> +		sa->tso.enabled = 1;
>> +		sa->tso.mss = prm->ipsec_xform.mss;
>> +	}
>> +
>>   	/* check for ESN flag */
>>   	sa->sqn_mask = (prm->ipsec_xform.options.esn == 0) ?
>>   		UINT32_MAX : UINT64_MAX;
>> diff --git a/lib/ipsec/sa.h b/lib/ipsec/sa.h
>> index 107ebd1519..5e237f3525 100644
>> --- a/lib/ipsec/sa.h
>> +++ b/lib/ipsec/sa.h
>> @@ -113,6 +113,10 @@ struct rte_ipsec_sa {
>>   	uint8_t iv_len;
>>   	uint8_t pad_align;
>>   	uint8_t tos_mask;
>> +	struct {
>> +		uint8_t enabled:1;
>> +		uint16_t mss;
>> +	} tso;
> Wouldn't one field be enough?
> uint16_t tso_mss;
> And it it is zero, then tso is disabled.
> In fact, do we need it at all?
> Wouldn't it be better to request user to fill mbuf->tso_segsz properly for us?

We added an option to rte_security_ipsec_sa_options to allow the user to 
enable TSO per SA and specify the MSS in the sessions parameters.

We can request user to fill mbuf->tso_segsz, but with this patch we are 
doing it for the user.

>
>>   	/* template for tunnel header */
>>   	uint8_t hdr[IPSEC_MAX_HDR_SIZE];
>> --
>> 2.25.1
  
Ananyev, Konstantin Sept. 28, 2021, 10:24 p.m. UTC | #3
> On 9/23/2021 3:09 PM, Ananyev, Konstantin wrote:
> >
> >> Add support for transmit segmentation offload to inline crypto processing
> >> mode. This offload is not supported by other offload modes, as at a
> >> minimum it requires inline crypto for IPsec to be supported on the
> >> network interface.
> >>
> >> Signed-off-by: Declan Doherty <declan.doherty@intel.com>
> >> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
> >> Signed-off-by: Abhijit Sinha <abhijit.sinha@intel.com>
> >> Signed-off-by: Daniel Martin Buckley <daniel.m.buckley@intel.com>
> >> Acked-by: Fan Zhang <roy.fan.zhang@intel.com>
> >> ---
> >>   lib/ipsec/esp_inb.c  |   4 +-
> >>   lib/ipsec/esp_outb.c | 115 +++++++++++++++++++++++++++++++++++--------
> >>   lib/ipsec/iph.h      |  10 +++-
> >>   lib/ipsec/sa.c       |   6 +++
> >>   lib/ipsec/sa.h       |   4 ++
> >>   5 files changed, 114 insertions(+), 25 deletions(-)
> >>
> >> diff --git a/lib/ipsec/esp_inb.c b/lib/ipsec/esp_inb.c
> >> index d66c88f05d..a6ab8fbdd5 100644
> >> --- a/lib/ipsec/esp_inb.c
> >> +++ b/lib/ipsec/esp_inb.c
> >> @@ -668,8 +668,8 @@ trs_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[],
> >>   			/* modify packet's layout */
> >>   			np = trs_process_step2(mb[i], ml[i], hl[i], cofs,
> >>   				to[i], tl, sqn + k);
> >> -			update_trs_l3hdr(sa, np + l2, mb[i]->pkt_len,
> >> -				l2, hl[i] - l2, espt[i].next_proto);
> >> +			update_trs_l34hdrs(sa, np + l2, mb[i]->pkt_len,
> >> +				l2, hl[i] - l2, espt[i].next_proto, 0);
> >>
> >>   			/* update mbuf's metadata */
> >>   			trs_process_step3(mb[i]);
> >> diff --git a/lib/ipsec/esp_outb.c b/lib/ipsec/esp_outb.c
> >> index a3f77469c3..9fc7075796 100644
> >> --- a/lib/ipsec/esp_outb.c
> >> +++ b/lib/ipsec/esp_outb.c
> >> @@ -2,6 +2,8 @@
> >>    * Copyright(c) 2018-2020 Intel Corporation
> >>    */
> >>
> >> +#include <math.h>
> >> +
> >>   #include <rte_ipsec.h>
> >>   #include <rte_esp.h>
> >>   #include <rte_ip.h>
> >> @@ -156,11 +158,20 @@ outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> >>
> >>   	/* number of bytes to encrypt */
> >>   	clen = plen + sizeof(*espt);
> >> -	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> >> +
> >> +	/* We don't need to pad/ailgn packet when using TSO offload */
> >> +	if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
> >> +		clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> >> +
> > Here and everywhere:
> > It doesn't look nice that we have to pollute generic functions with
> > checking TSO specific flags all over the place.
> > Can we probably have a specific prepare/process function for inline+tso case?
> > As we do have for cpu and inline cases right now.
> > Or just update inline version?
> I looked at doing this but unless I copy these 2 functions I can't move
> this out.
> >
> >>   	/* pad length + esp tail */
> >>   	pdlen = clen - plen;
> >> -	tlen = pdlen + sa->icv_len + sqh_len;
> >> +
> >> +	/* We don't append ICV length when using TSO offload */
> >> +	if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
> >> +		tlen = pdlen + sa->icv_len + sqh_len;
> >> +	else
> >> +		tlen = pdlen + sqh_len;
> >>
> >>   	/* do append and prepend */
> >>   	ml = rte_pktmbuf_lastseg(mb);
> >> @@ -337,6 +348,7 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> >>   	char *ph, *pt;
> >>   	uint64_t *iv;
> >>   	uint32_t l2len, l3len;
> >> +	uint8_t tso = mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG) ? 1 : 0;
> >>
> >>   	l2len = mb->l2_len;
> >>   	l3len = mb->l3_len;
> >> @@ -349,11 +361,19 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> >>
> >>   	/* number of bytes to encrypt */
> >>   	clen = plen + sizeof(*espt);
> >> -	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> >> +
> >> +	/* We don't need to pad/ailgn packet when using TSO offload */
> >> +	if (likely(!tso))
> >> +		clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> >>
> >>   	/* pad length + esp tail */
> >>   	pdlen = clen - plen;
> >> -	tlen = pdlen + sa->icv_len + sqh_len;
> >> +
> >> +	/* We don't append ICV length when using TSO offload */
> >> +	if (likely(!tso))
> >> +		tlen = pdlen + sa->icv_len + sqh_len;
> >> +	else
> >> +		tlen = pdlen + sqh_len;
> >>
> >>   	/* do append and insert */
> >>   	ml = rte_pktmbuf_lastseg(mb);
> >> @@ -375,8 +395,8 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> >>   	insert_esph(ph, ph + hlen, uhlen);
> >>
> >>   	/* update ip  header fields */
> >> -	np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
> >> -			l3len, IPPROTO_ESP);
> >> +	np = update_trs_l34hdrs(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
> >> +			l3len, IPPROTO_ESP, tso);
> >>
> >>   	/* update spi, seqn and iv */
> >>   	esph = (struct rte_esp_hdr *)(ph + uhlen);
> >> @@ -651,6 +671,33 @@ inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
> >>   	}
> >>   }
> >>
> >> +/* check if packet will exceed MSS and segmentation is required */
> >> +static inline int
> >> +esn_outb_nb_segments(const struct rte_ipsec_sa *sa, struct rte_mbuf *m) {
> >> +	uint16_t segments = 1;
> >> +	uint16_t pkt_l3len = m->pkt_len - m->l2_len;
> >> +
> >> +	/* Only support segmentation for UDP/TCP flows */
> >> +	if (!(m->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)))
> >> +		return segments;
> >> +
> >> +	if (sa->tso.enabled && pkt_l3len > sa->tso.mss) {
> >> +		segments = ceil((float)pkt_l3len / sa->tso.mss);
> > Float calculations in the middle of data-path?
> > Just to calculate roundup?
> > Doesn't look good to me at all.
> It doesn't look good to me either - I will rework it.
> >
> >> +
> >> +		if  (m->packet_type & RTE_PTYPE_L4_TCP) {
> >> +			m->ol_flags |= (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM);
> > That's really strange - why ipsec library will set PKT_TX_TCP_SEG unconditionally?
> > That should be responsibility of the upper layer, I think.
> > In the lib we should only check was tso requested for that packet or not.
> > Same for UDP.
> These are under an if(TSO) condition.
> >
> >> +			m->l4_len = sizeof(struct rte_tcp_hdr);
> > Hmm, how do we know there are no TCP options present for that packet?
> > Wouldn't it be better to expect user to provide proper l4_len for such packets?
> You're right, I will update it.
> 
> >
> >> +		} else {
> >> +			m->ol_flags |= (PKT_TX_UDP_SEG | PKT_TX_UDP_CKSUM);
> >> +			m->l4_len = sizeof(struct rte_udp_hdr);
> >> +		}
> >> +
> >> +		m->tso_segsz = sa->tso.mss;
> >> +	}
> >> +
> >> +	return segments;
> >> +}
> >> +
> >>   /*
> >>    * process group of ESP outbound tunnel packets destined for
> >>    * INLINE_CRYPTO type of device.
> >> @@ -660,24 +707,29 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
> >>   	struct rte_mbuf *mb[], uint16_t num)
> >>   {
> >>   	int32_t rc;
> >> -	uint32_t i, k, n;
> >> +	uint32_t i, k, nb_sqn = 0, nb_sqn_alloc;
> >>   	uint64_t sqn;
> >>   	rte_be64_t sqc;
> >>   	struct rte_ipsec_sa *sa;
> >>   	union sym_op_data icv;
> >>   	uint64_t iv[IPSEC_MAX_IV_QWORD];
> >>   	uint32_t dr[num];
> >> +	uint16_t nb_segs[num];
> >>
> >>   	sa = ss->sa;
> >>
> >> -	n = num;
> >> -	sqn = esn_outb_update_sqn(sa, &n);
> >> -	if (n != num)
> >> +	for (i = 0; i != num; i++) {
> >> +		nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
> >> +		nb_sqn += nb_segs[i];
> >> +	}
> >> +
> >> +	nb_sqn_alloc = nb_sqn;
> >> +	sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
> >> +	if (nb_sqn_alloc != nb_sqn)
> >>   		rte_errno = EOVERFLOW;
> >>
> >>   	k = 0;
> >> -	for (i = 0; i != n; i++) {
> >> -
> >> +	for (i = 0; i != num; i++) {
> >>   		sqc = rte_cpu_to_be_64(sqn + i);
> >>   		gen_iv(iv, sqc);
> >>
> >> @@ -691,11 +743,18 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
> >>   			dr[i - k] = i;
> >>   			rte_errno = -rc;
> >>   		}
> >> +
> >> +		/**
> >> +		 * If packet is using tso, increment sqn by the number of
> >> +		 * segments for	packet
> >> +		 */
> >> +		if  (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
> >> +			sqn += nb_segs[i] - 1;
> >>   	}
> >>
> >>   	/* copy not processed mbufs beyond good ones */
> >> -	if (k != n && k != 0)
> >> -		move_bad_mbufs(mb, dr, n, n - k);
> >> +	if (k != num && k != 0)
> >> +		move_bad_mbufs(mb, dr, num, num - k);
> >>
> >>   	inline_outb_mbuf_prepare(ss, mb, k);
> >>   	return k;
> >> @@ -710,23 +769,30 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
> >>   	struct rte_mbuf *mb[], uint16_t num)
> >>   {
> >>   	int32_t rc;
> >> -	uint32_t i, k, n;
> >> +	uint32_t i, k, nb_sqn, nb_sqn_alloc;
> >>   	uint64_t sqn;
> >>   	rte_be64_t sqc;
> >>   	struct rte_ipsec_sa *sa;
> >>   	union sym_op_data icv;
> >>   	uint64_t iv[IPSEC_MAX_IV_QWORD];
> >>   	uint32_t dr[num];
> >> +	uint16_t nb_segs[num];
> >>
> >>   	sa = ss->sa;
> >>
> >> -	n = num;
> >> -	sqn = esn_outb_update_sqn(sa, &n);
> >> -	if (n != num)
> >> +	/* Calculate number of sequence numbers required */
> >> +	for (i = 0, nb_sqn = 0; i != num; i++) {
> >> +		nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
> >> +		nb_sqn += nb_segs[i];
> >> +	}
> >> +
> >> +	nb_sqn_alloc = nb_sqn;
> >> +	sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
> >> +	if (nb_sqn_alloc != nb_sqn)
> >>   		rte_errno = EOVERFLOW;
> >>
> >>   	k = 0;
> >> -	for (i = 0; i != n; i++) {
> >> +	for (i = 0; i != num; i++) {
> >>
> >>   		sqc = rte_cpu_to_be_64(sqn + i);
> >>   		gen_iv(iv, sqc);
> >> @@ -741,11 +807,18 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
> >>   			dr[i - k] = i;
> >>   			rte_errno = -rc;
> >>   		}
> >> +
> >> +		/**
> >> +		 * If packet is using tso, increment sqn by the number of
> >> +		 * segments for	packet
> >> +		 */
> >> +		if  (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
> >> +			sqn += nb_segs[i] - 1;
> >>   	}
> >>
> >>   	/* copy not processed mbufs beyond good ones */
> >> -	if (k != n && k != 0)
> >> -		move_bad_mbufs(mb, dr, n, n - k);
> >> +	if (k != num && k != 0)
> >> +		move_bad_mbufs(mb, dr, num, num - k);
> >>
> >>   	inline_outb_mbuf_prepare(ss, mb, k);
> >>   	return k;
> >> diff --git a/lib/ipsec/iph.h b/lib/ipsec/iph.h
> >> index 861f16905a..2d223199ac 100644
> >> --- a/lib/ipsec/iph.h
> >> +++ b/lib/ipsec/iph.h
> >> @@ -6,6 +6,8 @@
> >>   #define _IPH_H_
> >>
> >>   #include <rte_ip.h>
> >> +#include <rte_udp.h>
> >> +#include <rte_tcp.h>
> >>
> >>   /**
> >>    * @file iph.h
> >> @@ -39,8 +41,8 @@ insert_esph(char *np, char *op, uint32_t hlen)
> >>
> >>   /* update original ip header fields for transport case */
> >>   static inline int
> >> -update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> >> -		uint32_t l2len, uint32_t l3len, uint8_t proto)
> >> +update_trs_l34hdrs(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> >> +		uint32_t l2len, uint32_t l3len, uint8_t proto, uint8_t tso)
> > Hmm... why to change name of the function?
> >
> >>   {
> >>   	int32_t rc;
> >>
> >> @@ -51,6 +53,10 @@ update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> >>   		v4h = p;
> >>   		rc = v4h->next_proto_id;
> >>   		v4h->next_proto_id = proto;
> >> +		if (tso) {
> >> +			v4h->hdr_checksum = 0;
> >> +			v4h->total_length = 0;
> > total_len will be overwritten unconditionally at next line below.
> >
> > Another question - why it is necessary?
> > Is it HW specific requirment or ... ?
> It looks wrong I will rewrite this.
> >
> >
> >> +		}
> >>   		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
> >
> >>   	/* IPv6 */
> >>   	} else {
> >> diff --git a/lib/ipsec/sa.c b/lib/ipsec/sa.c
> >> index 720e0f365b..2ecbbce0a4 100644
> >> --- a/lib/ipsec/sa.c
> >> +++ b/lib/ipsec/sa.c
> >> @@ -565,6 +565,12 @@ rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
> >>   	sa->type = type;
> >>   	sa->size = sz;
> >>
> >> +
> >> +	if (prm->ipsec_xform.options.tso == 1) {
> >> +		sa->tso.enabled = 1;
> >> +		sa->tso.mss = prm->ipsec_xform.mss;
> >> +	}
> >> +
> >>   	/* check for ESN flag */
> >>   	sa->sqn_mask = (prm->ipsec_xform.options.esn == 0) ?
> >>   		UINT32_MAX : UINT64_MAX;
> >> diff --git a/lib/ipsec/sa.h b/lib/ipsec/sa.h
> >> index 107ebd1519..5e237f3525 100644
> >> --- a/lib/ipsec/sa.h
> >> +++ b/lib/ipsec/sa.h
> >> @@ -113,6 +113,10 @@ struct rte_ipsec_sa {
> >>   	uint8_t iv_len;
> >>   	uint8_t pad_align;
> >>   	uint8_t tos_mask;
> >> +	struct {
> >> +		uint8_t enabled:1;
> >> +		uint16_t mss;
> >> +	} tso;
> > Wouldn't one field be enough?
> > uint16_t tso_mss;
> > And it it is zero, then tso is disabled.
> > In fact, do we need it at all?
> > Wouldn't it be better to request user to fill mbuf->tso_segsz properly for us?
> 
> We added an option to rte_security_ipsec_sa_options to allow the user to
> enable TSO per SA and specify the MSS in the sessions parameters.

After another thought, it doesn’t look like a good approach to me:
from one side same SA can be used for multiple IP addresses,
from other side - MSS value can differ on a per connection basis.
So different TCP connections within same SA can easily have different MSS values. 
So I think we shouldn't save mss in SA at all.
Instead, we probably need to request user to fill mbuf->tso_segsz for us.

> 
> We can request user to fill mbuf->tso_segsz, but with this patch we are
> doing it for the user.
> 
> >
> >>   	/* template for tunnel header */
> >>   	uint8_t hdr[IPSEC_MAX_HDR_SIZE];
> >> --
> >> 2.25.1
  

Patch

diff --git a/lib/ipsec/esp_inb.c b/lib/ipsec/esp_inb.c
index d66c88f05d..a6ab8fbdd5 100644
--- a/lib/ipsec/esp_inb.c
+++ b/lib/ipsec/esp_inb.c
@@ -668,8 +668,8 @@  trs_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[],
 			/* modify packet's layout */
 			np = trs_process_step2(mb[i], ml[i], hl[i], cofs,
 				to[i], tl, sqn + k);
-			update_trs_l3hdr(sa, np + l2, mb[i]->pkt_len,
-				l2, hl[i] - l2, espt[i].next_proto);
+			update_trs_l34hdrs(sa, np + l2, mb[i]->pkt_len,
+				l2, hl[i] - l2, espt[i].next_proto, 0);
 
 			/* update mbuf's metadata */
 			trs_process_step3(mb[i]);
diff --git a/lib/ipsec/esp_outb.c b/lib/ipsec/esp_outb.c
index a3f77469c3..9fc7075796 100644
--- a/lib/ipsec/esp_outb.c
+++ b/lib/ipsec/esp_outb.c
@@ -2,6 +2,8 @@ 
  * Copyright(c) 2018-2020 Intel Corporation
  */
 
+#include <math.h>
+
 #include <rte_ipsec.h>
 #include <rte_esp.h>
 #include <rte_ip.h>
@@ -156,11 +158,20 @@  outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
 
 	/* number of bytes to encrypt */
 	clen = plen + sizeof(*espt);
-	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
+
+	/* We don't need to pad/ailgn packet when using TSO offload */
+	if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
+		clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
+
 
 	/* pad length + esp tail */
 	pdlen = clen - plen;
-	tlen = pdlen + sa->icv_len + sqh_len;
+
+	/* We don't append ICV length when using TSO offload */
+	if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
+		tlen = pdlen + sa->icv_len + sqh_len;
+	else
+		tlen = pdlen + sqh_len;
 
 	/* do append and prepend */
 	ml = rte_pktmbuf_lastseg(mb);
@@ -337,6 +348,7 @@  outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
 	char *ph, *pt;
 	uint64_t *iv;
 	uint32_t l2len, l3len;
+	uint8_t tso = mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG) ? 1 : 0;
 
 	l2len = mb->l2_len;
 	l3len = mb->l3_len;
@@ -349,11 +361,19 @@  outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
 
 	/* number of bytes to encrypt */
 	clen = plen + sizeof(*espt);
-	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
+
+	/* We don't need to pad/ailgn packet when using TSO offload */
+	if (likely(!tso))
+		clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
 
 	/* pad length + esp tail */
 	pdlen = clen - plen;
-	tlen = pdlen + sa->icv_len + sqh_len;
+
+	/* We don't append ICV length when using TSO offload */
+	if (likely(!tso))
+		tlen = pdlen + sa->icv_len + sqh_len;
+	else
+		tlen = pdlen + sqh_len;
 
 	/* do append and insert */
 	ml = rte_pktmbuf_lastseg(mb);
@@ -375,8 +395,8 @@  outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
 	insert_esph(ph, ph + hlen, uhlen);
 
 	/* update ip  header fields */
-	np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
-			l3len, IPPROTO_ESP);
+	np = update_trs_l34hdrs(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
+			l3len, IPPROTO_ESP, tso);
 
 	/* update spi, seqn and iv */
 	esph = (struct rte_esp_hdr *)(ph + uhlen);
@@ -651,6 +671,33 @@  inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
 	}
 }
 
+/* check if packet will exceed MSS and segmentation is required */
+static inline int
+esn_outb_nb_segments(const struct rte_ipsec_sa *sa, struct rte_mbuf *m) {
+	uint16_t segments = 1;
+	uint16_t pkt_l3len = m->pkt_len - m->l2_len;
+
+	/* Only support segmentation for UDP/TCP flows */
+	if (!(m->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)))
+		return segments;
+
+	if (sa->tso.enabled && pkt_l3len > sa->tso.mss) {
+		segments = ceil((float)pkt_l3len / sa->tso.mss);
+
+		if  (m->packet_type & RTE_PTYPE_L4_TCP) {
+			m->ol_flags |= (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM);
+			m->l4_len = sizeof(struct rte_tcp_hdr);
+		} else {
+			m->ol_flags |= (PKT_TX_UDP_SEG | PKT_TX_UDP_CKSUM);
+			m->l4_len = sizeof(struct rte_udp_hdr);
+		}
+
+		m->tso_segsz = sa->tso.mss;
+	}
+
+	return segments;
+}
+
 /*
  * process group of ESP outbound tunnel packets destined for
  * INLINE_CRYPTO type of device.
@@ -660,24 +707,29 @@  inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
 	struct rte_mbuf *mb[], uint16_t num)
 {
 	int32_t rc;
-	uint32_t i, k, n;
+	uint32_t i, k, nb_sqn = 0, nb_sqn_alloc;
 	uint64_t sqn;
 	rte_be64_t sqc;
 	struct rte_ipsec_sa *sa;
 	union sym_op_data icv;
 	uint64_t iv[IPSEC_MAX_IV_QWORD];
 	uint32_t dr[num];
+	uint16_t nb_segs[num];
 
 	sa = ss->sa;
 
-	n = num;
-	sqn = esn_outb_update_sqn(sa, &n);
-	if (n != num)
+	for (i = 0; i != num; i++) {
+		nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
+		nb_sqn += nb_segs[i];
+	}
+
+	nb_sqn_alloc = nb_sqn;
+	sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
+	if (nb_sqn_alloc != nb_sqn)
 		rte_errno = EOVERFLOW;
 
 	k = 0;
-	for (i = 0; i != n; i++) {
-
+	for (i = 0; i != num; i++) {
 		sqc = rte_cpu_to_be_64(sqn + i);
 		gen_iv(iv, sqc);
 
@@ -691,11 +743,18 @@  inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
 			dr[i - k] = i;
 			rte_errno = -rc;
 		}
+
+		/**
+		 * If packet is using tso, increment sqn by the number of
+		 * segments for	packet
+		 */
+		if  (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
+			sqn += nb_segs[i] - 1;
 	}
 
 	/* copy not processed mbufs beyond good ones */
-	if (k != n && k != 0)
-		move_bad_mbufs(mb, dr, n, n - k);
+	if (k != num && k != 0)
+		move_bad_mbufs(mb, dr, num, num - k);
 
 	inline_outb_mbuf_prepare(ss, mb, k);
 	return k;
@@ -710,23 +769,30 @@  inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
 	struct rte_mbuf *mb[], uint16_t num)
 {
 	int32_t rc;
-	uint32_t i, k, n;
+	uint32_t i, k, nb_sqn, nb_sqn_alloc;
 	uint64_t sqn;
 	rte_be64_t sqc;
 	struct rte_ipsec_sa *sa;
 	union sym_op_data icv;
 	uint64_t iv[IPSEC_MAX_IV_QWORD];
 	uint32_t dr[num];
+	uint16_t nb_segs[num];
 
 	sa = ss->sa;
 
-	n = num;
-	sqn = esn_outb_update_sqn(sa, &n);
-	if (n != num)
+	/* Calculate number of sequence numbers required */
+	for (i = 0, nb_sqn = 0; i != num; i++) {
+		nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
+		nb_sqn += nb_segs[i];
+	}
+
+	nb_sqn_alloc = nb_sqn;
+	sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
+	if (nb_sqn_alloc != nb_sqn)
 		rte_errno = EOVERFLOW;
 
 	k = 0;
-	for (i = 0; i != n; i++) {
+	for (i = 0; i != num; i++) {
 
 		sqc = rte_cpu_to_be_64(sqn + i);
 		gen_iv(iv, sqc);
@@ -741,11 +807,18 @@  inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
 			dr[i - k] = i;
 			rte_errno = -rc;
 		}
+
+		/**
+		 * If packet is using tso, increment sqn by the number of
+		 * segments for	packet
+		 */
+		if  (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
+			sqn += nb_segs[i] - 1;
 	}
 
 	/* copy not processed mbufs beyond good ones */
-	if (k != n && k != 0)
-		move_bad_mbufs(mb, dr, n, n - k);
+	if (k != num && k != 0)
+		move_bad_mbufs(mb, dr, num, num - k);
 
 	inline_outb_mbuf_prepare(ss, mb, k);
 	return k;
diff --git a/lib/ipsec/iph.h b/lib/ipsec/iph.h
index 861f16905a..2d223199ac 100644
--- a/lib/ipsec/iph.h
+++ b/lib/ipsec/iph.h
@@ -6,6 +6,8 @@ 
 #define _IPH_H_
 
 #include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
 
 /**
  * @file iph.h
@@ -39,8 +41,8 @@  insert_esph(char *np, char *op, uint32_t hlen)
 
 /* update original ip header fields for transport case */
 static inline int
-update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
-		uint32_t l2len, uint32_t l3len, uint8_t proto)
+update_trs_l34hdrs(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
+		uint32_t l2len, uint32_t l3len, uint8_t proto, uint8_t tso)
 {
 	int32_t rc;
 
@@ -51,6 +53,10 @@  update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
 		v4h = p;
 		rc = v4h->next_proto_id;
 		v4h->next_proto_id = proto;
+		if (tso) {
+			v4h->hdr_checksum = 0;
+			v4h->total_length = 0;
+		}
 		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
 	/* IPv6 */
 	} else {
diff --git a/lib/ipsec/sa.c b/lib/ipsec/sa.c
index 720e0f365b..2ecbbce0a4 100644
--- a/lib/ipsec/sa.c
+++ b/lib/ipsec/sa.c
@@ -565,6 +565,12 @@  rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
 	sa->type = type;
 	sa->size = sz;
 
+
+	if (prm->ipsec_xform.options.tso == 1) {
+		sa->tso.enabled = 1;
+		sa->tso.mss = prm->ipsec_xform.mss;
+	}
+
 	/* check for ESN flag */
 	sa->sqn_mask = (prm->ipsec_xform.options.esn == 0) ?
 		UINT32_MAX : UINT64_MAX;
diff --git a/lib/ipsec/sa.h b/lib/ipsec/sa.h
index 107ebd1519..5e237f3525 100644
--- a/lib/ipsec/sa.h
+++ b/lib/ipsec/sa.h
@@ -113,6 +113,10 @@  struct rte_ipsec_sa {
 	uint8_t iv_len;
 	uint8_t pad_align;
 	uint8_t tos_mask;
+	struct {
+		uint8_t enabled:1;
+		uint16_t mss;
+	} tso;
 
 	/* template for tunnel header */
 	uint8_t hdr[IPSEC_MAX_HDR_SIZE];