[dpdk-dev,v5,7/8] i40e:support VxLAN Tx checksum offload
Commit Message
Support VxLAN Tx checksum offload, which include
- outer L3(IP) checksum offload
- inner L3(IP) checksum offload
- inner L4(UDP, TCP and SCTP) checksum offload
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
Acked-by: Helin Zhang <helin.zhang@intel.com>
Acked-by: Jingjing Wu <jingjing.wu@intel.com>
Acked-by: Jing Chen <jing.d.chen@intel.com>
---
lib/librte_mbuf/rte_mbuf.h | 17 ++++++++++++++
lib/librte_pmd_i40e/i40e_rxtx.c | 46 +++++++++++++++++++++++++++++++++-----
2 files changed, 57 insertions(+), 6 deletions(-)
Comments
2014-10-11 13:55, Jijiang Liu:
> Support VxLAN Tx checksum offload, which include
> - outer L3(IP) checksum offload
> - inner L3(IP) checksum offload
> - inner L4(UDP, TCP and SCTP) checksum offload
[...]
> +
> + /* fields to support tunnelling packet TX offloads */
I know that previous comment is "fields to support TX offloads",
but I'd prefer "for TX offloading of tunnels".
Maybe that "encapsulation" is better than "tunnel".
Just my opinion.
> + union {
> + /**< combined inner l2/l3 lengths as single var */
> + uint16_t inner_l2_l3_len;
> +
> + struct {
> + /**< inner L3 (IP) Header Length. */
> + uint16_t inner_l3_len:9;
> +
> + /**< L2 (MAC) Header Length. */
> + uint16_t inner_l2_len:7;
> + };
> + };
I would like to highlight that you are using 2 bytes in the second cache line
of the mbuf.
It deserves at least a line in the commit log.
Actually I'd prefer a separate patch for mbuf modifications.
Thanks
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Friday, October 17, 2014 4:34 AM
> To: Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v5 7/8]i40e:support VxLAN Tx checksum
> offload
>
> 2014-10-11 13:55, Jijiang Liu:
> > Support VxLAN Tx checksum offload, which include
> > - outer L3(IP) checksum offload
> > - inner L3(IP) checksum offload
> > - inner L4(UDP, TCP and SCTP) checksum offload
> [...]
> > +
> > + /* fields to support tunnelling packet TX offloads */
>
> I know that previous comment is "fields to support TX offloads", but I'd
> prefer "for TX offloading of tunnels".
> Maybe that "encapsulation" is better than "tunnel".
> Just my opinion.
>
> > + union {
> > + /**< combined inner l2/l3 lengths as single var */
> > + uint16_t inner_l2_l3_len;
> > +
> > + struct {
> > + /**< inner L3 (IP) Header Length. */
> > + uint16_t inner_l3_len:9;
> > +
> > + /**< L2 (MAC) Header Length. */
> > + uint16_t inner_l2_len:7;
> > + };
> > + };
>
> I would like to highlight that you are using 2 bytes in the second cache line of
> the mbuf.
> It deserves at least a line in the commit log.
> Actually I'd prefer a separate patch for mbuf modifications.
>
> Thanks
> --
> Thomas
Ok, I will send a separate patch for mbuf modifications.
Thanks
Jijiang Liu
@@ -94,6 +94,7 @@ extern "C" {
#define PKT_TX_VLAN_PKT (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
#define PKT_TX_IP_CKSUM (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
+#define PKT_TX_VXLAN_CKSUM (1ULL << 50) /**< TX checksum of VxLAN computed by NIC */
#define PKT_TX_IPV4_CSUM PKT_TX_IP_CKSUM /**< Alias of PKT_TX_IP_CKSUM. */
#define PKT_TX_IPV4 PKT_RX_IPV4_HDR /**< IPv4 with no IP checksum offload. */
#define PKT_TX_IPV6 PKT_RX_IPV6_HDR /**< IPv6 packet */
@@ -196,6 +197,20 @@ struct rte_mbuf {
uint16_t l2_len:7; /**< L2 (MAC) Header Length. */
};
};
+
+ /* fields to support tunnelling packet TX offloads */
+ union {
+ /**< combined inner l2/l3 lengths as single var */
+ uint16_t inner_l2_l3_len;
+
+ struct {
+ /**< inner L3 (IP) Header Length. */
+ uint16_t inner_l3_len:9;
+
+ /**< L2 (MAC) Header Length. */
+ uint16_t inner_l2_len:7;
+ };
+ };
} __rte_cache_aligned;
/**
@@ -546,6 +561,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
m->next = NULL;
m->pkt_len = 0;
m->l2_l3_len = 0;
+ m->inner_l2_l3_len = 0;
m->vlan_tci = 0;
m->nb_segs = 1;
m->port = 0xff;
@@ -615,6 +631,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md)
mi->port = md->port;
mi->vlan_tci = md->vlan_tci;
mi->l2_l3_len = md->l2_l3_len;
+ mi->inner_l2_l3_len = md->inner_l2_l3_len;
mi->hash = md->hash;
mi->next = NULL;
@@ -411,11 +411,14 @@ i40e_rxd_ptype_to_pkt_flags(uint64_t qword)
}
static inline void
-i40e_txd_enable_checksum(uint32_t ol_flags,
+i40e_txd_enable_checksum(uint64_t ol_flags,
uint32_t *td_cmd,
uint32_t *td_offset,
uint8_t l2_len,
- uint8_t l3_len)
+ uint16_t l3_len,
+ uint8_t inner_l2_len,
+ uint16_t inner_l3_len,
+ uint32_t *cd_tunneling)
{
if (!l2_len) {
PMD_DRV_LOG(DEBUG, "L2 length set to 0");
@@ -428,6 +431,27 @@ i40e_txd_enable_checksum(uint32_t ol_flags,
return;
}
+ /* VxLAN packet TX checksum offload */
+ if (unlikely(ol_flags & PKT_TX_VXLAN_CKSUM)) {
+ uint8_t l4tun_len;
+
+ l4tun_len = ETHER_VXLAN_HLEN + inner_l2_len;
+
+ if (ol_flags & PKT_TX_IPV4_CSUM)
+ *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
+ else if (ol_flags & PKT_TX_IPV6)
+ *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
+
+ /* Now set the ctx descriptor fields */
+ *cd_tunneling |= (l3_len >> 2) <<
+ I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
+ I40E_TXD_CTX_UDP_TUNNELING |
+ (l4tun_len >> 1) <<
+ I40E_TXD_CTX_QW0_NATLEN_SHIFT;
+
+ l3_len = inner_l3_len;
+ }
+
/* Enable L3 checksum offloads */
if (ol_flags & PKT_TX_IPV4_CSUM) {
*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
@@ -1077,7 +1101,10 @@ i40e_recv_scattered_pkts(void *rx_queue,
static inline uint16_t
i40e_calc_context_desc(uint64_t flags)
{
- uint16_t mask = 0;
+ uint64_t mask = 0ULL;
+
+ if (flags | PKT_TX_VXLAN_CKSUM)
+ mask |= PKT_TX_VXLAN_CKSUM;
#ifdef RTE_LIBRTE_IEEE1588
mask |= PKT_TX_IEEE1588_TMST;
@@ -1098,6 +1125,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
volatile struct i40e_tx_desc *txr;
struct rte_mbuf *tx_pkt;
struct rte_mbuf *m_seg;
+ uint32_t cd_tunneling_params;
uint16_t tx_id;
uint16_t nb_tx;
uint32_t td_cmd;
@@ -1106,7 +1134,9 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
uint32_t td_tag;
uint64_t ol_flags;
uint8_t l2_len;
- uint8_t l3_len;
+ uint16_t l3_len;
+ uint8_t inner_l2_len;
+ uint16_t inner_l3_len;
uint16_t nb_used;
uint16_t nb_ctx;
uint16_t tx_last;
@@ -1134,7 +1164,9 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
ol_flags = tx_pkt->ol_flags;
l2_len = tx_pkt->l2_len;
+ inner_l2_len = tx_pkt->inner_l2_len;
l3_len = tx_pkt->l3_len;
+ inner_l3_len = tx_pkt->inner_l3_len;
/* Calculate the number of context descriptors needed. */
nb_ctx = i40e_calc_context_desc(ol_flags);
@@ -1182,15 +1214,17 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
td_cmd |= I40E_TX_DESC_CMD_ICRC;
/* Enable checksum offloading */
+ cd_tunneling_params = 0;
i40e_txd_enable_checksum(ol_flags, &td_cmd, &td_offset,
- l2_len, l3_len);
+ l2_len, l3_len, inner_l2_len,
+ inner_l3_len,
+ &cd_tunneling_params);
if (unlikely(nb_ctx)) {
/* Setup TX context descriptor if required */
volatile struct i40e_tx_context_desc *ctx_txd =
(volatile struct i40e_tx_context_desc *)\
&txr[tx_id];
- uint32_t cd_tunneling_params = 0;
uint16_t cd_l2tag2 = 0;
uint64_t cd_type_cmd_tso_mss =
I40E_TX_DESC_DTYPE_CONTEXT;