diff mbox series

[v4] examples/l3fwd: ipv4 and udp/tcp cksum verification through software

Message ID 20211014184322.5148-1-usama.nadeem@emumba.com (mailing list archive)
State New
Delegated to: Thomas Monjalon
Headers show
Series [v4] examples/l3fwd: ipv4 and udp/tcp cksum verification through software | expand

Checks

Context Check Description
ci/iol-aarch64-compile-testing success Testing PASS
ci/intel-Testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/github-robot: build success github build: passed
ci/checkpatch success coding style OK

Commit Message

Usama Nadeem Oct. 14, 2021, 6:43 p.m. UTC
checks if ipv4 and udptcp cksum offload capability available
If not available, cksum is verified through software
If cksum is corrupt, packet is dropped, rest of the packets
are forwarded back.

Bugzilla ID:545
Signed-off-by: Usama Nadeem <usama.nadeem@emumba.com>
---
 examples/l3fwd/l3fwd.h     |  6 ++++
 examples/l3fwd/l3fwd_lpm.c | 72 ++++++++++++++++++++++++++++++++++++--
 examples/l3fwd/main.c      | 33 +++++++++++++++--
 3 files changed, 105 insertions(+), 6 deletions(-)

Comments

Usama Nadeem Nov. 1, 2021, 8:33 a.m. UTC | #1
Hi,
Please have a look at this patch, submitted around 2 weeks ago. Let me know
if any further changes are required in this patch.

Thanks
-usama
Walsh, Conor Nov. 4, 2021, 11:11 a.m. UTC | #2
> From: dev <dev-bounces@dpdk.org> On Behalf Of Usama Nadeem
> Sent: Thursday 14 October 2021 19:43
> To: thomas@monjalon.net
> Cc: dev@dpdk.org; Usama Nadeem <usama.nadeem@emumba.com>
> Subject: [dpdk-dev] [PATCH v4] examples/l3fwd: ipv4 and udp/tcp cksum
> verification through software
> 
> checks if ipv4 and udptcp cksum offload capability available
> If not available, cksum is verified through software
> If cksum is corrupt, packet is dropped, rest of the packets
> are forwarded back.
> 
> Bugzilla ID:545
> Signed-off-by: Usama Nadeem <usama.nadeem@emumba.com>
> ---

Hi Usama,

This should be done in a generic way that allows all the lookup methods to support it not just LPM.
check_software_cksum should go in a common file and be called from LPM, FIB and possibly EM.

Thanks,
Conor.

>  examples/l3fwd/l3fwd.h     |  6 ++++
>  examples/l3fwd/l3fwd_lpm.c | 72
> ++++++++++++++++++++++++++++++++++++--
>  examples/l3fwd/main.c      | 33 +++++++++++++++--
>  3 files changed, 105 insertions(+), 6 deletions(-)
> 
> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
> index a808d60247..c2c21a91fb 100644
> --- a/examples/l3fwd/l3fwd.h
> +++ b/examples/l3fwd/l3fwd.h
> @@ -55,6 +55,8 @@
>  #define L3FWD_HASH_ENTRIES		(1024*1024*1)
>  #endif
>  #define HASH_ENTRY_NUMBER_DEFAULT	4
> +extern bool l3_sft_cksum;
> +extern bool l4_sft_cksum;
> 
>  struct mbuf_table {
>  	uint16_t len;
> @@ -210,6 +212,10 @@ em_main_loop(__rte_unused void *dummy);
>  int
>  lpm_main_loop(__rte_unused void *dummy);
> 
> +int
> +check_software_cksum(struct rte_mbuf **pkts_burst,
> +struct rte_mbuf **pkts_burst_to_send, int nb_rx);
> +
>  int
>  fib_main_loop(__rte_unused void *dummy);
> 
> diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
> index 232b606b54..ecaf323943 100644
> --- a/examples/l3fwd/l3fwd_lpm.c
> +++ b/examples/l3fwd/l3fwd_lpm.c
> @@ -26,6 +26,7 @@
>  #include <rte_udp.h>
>  #include <rte_lpm.h>
>  #include <rte_lpm6.h>
> +#include <rte_net.h>
> 
>  #include "l3fwd.h"
>  #include "l3fwd_event.h"
> @@ -139,16 +140,65 @@ lpm_get_dst_port_with_ipv4(const struct
> lcore_conf *qconf, struct rte_mbuf *pkt,
>  #include "l3fwd_lpm.h"
>  #endif
> 
> +
> +int check_software_cksum(struct rte_mbuf **pkts_burst,
> +struct rte_mbuf **pkts_burst_to_send, int nb_rx)
> +{
> +	int j;
> +	int i = 0;
> +	struct rte_net_hdr_lens hdr_lens;
> +	struct rte_ipv4_hdr *ipv4_hdr;
> +	void *l3_hdr;
> +	void *l4_hdr;
> +	rte_be16_t prev_cksum;
> +	int dropped_pkts_udp_tcp = 0;
> +	int dropped_pkts_ipv4 = 0;
> +	bool dropped;
> +	for (j = 0; j < nb_rx; j++) {
> +		dropped = false;
> +		rte_net_get_ptype(pkts_burst[j], &hdr_lens,
> RTE_PTYPE_ALL_MASK);
> +		l3_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
> +		void *, hdr_lens.l2_len);
> +		l4_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
> +		void *, hdr_lens.l2_len + hdr_lens.l3_len);
> +		ipv4_hdr = l3_hdr;
> +		prev_cksum = ipv4_hdr->hdr_checksum;
> +		ipv4_hdr->hdr_checksum = 0;
> +		ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
> +
> +		if (l3_sft_cksum && prev_cksum != ipv4_hdr-
> >hdr_checksum) {
> +			rte_pktmbuf_free(pkts_burst[j]);
> +			dropped_pkts_ipv4++;
> +			dropped = true;
> +		} else if (l4_sft_cksum &&
> +				rte_ipv4_udptcp_cksum_verify
> +				(l3_hdr, l4_hdr) != 0) {
> +
> +			rte_pktmbuf_free(pkts_burst[j]);
> +			dropped_pkts_udp_tcp++;
> +			dropped = true;
> +		}
> +		if (dropped == false) {
> +			pkts_burst_to_send[i] = pkts_burst[j];
> +			i++;
> +		}
> +
> +	}
> +	return dropped_pkts_udp_tcp+dropped_pkts_ipv4;
> +}
> +
>  /* main processing loop */
>  int
>  lpm_main_loop(__rte_unused void *dummy)
>  {
>  	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> +	struct rte_mbuf *pkts_burst_to_send[MAX_PKT_BURST];
>  	unsigned lcore_id;
>  	uint64_t prev_tsc, diff_tsc, cur_tsc;
>  	int i, nb_rx;
>  	uint16_t portid;
>  	uint8_t queueid;
> +	int dropped;
>  	struct lcore_conf *qconf;
>  	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
>  		US_PER_S * BURST_TX_DRAIN_US;
> @@ -209,19 +259,35 @@ lpm_main_loop(__rte_unused void *dummy)
>  			if (nb_rx == 0)
>  				continue;
> 
> +			if (l3_sft_cksum || l4_sft_cksum) {
> +				dropped =
> check_software_cksum(pkts_burst,
> +				pkts_burst_to_send,	nb_rx);
> +
> +				nb_rx = nb_rx-dropped;
> +			}
> +
> +
>  #if defined RTE_ARCH_X86 || defined __ARM_NEON \
>  			 || defined RTE_ARCH_PPC_64
> +		if (l3_sft_cksum == false && l4_sft_cksum == false)
>  			l3fwd_lpm_send_packets(nb_rx, pkts_burst,
>  						portid, qconf);
> +		else
> +			l3fwd_lpm_send_packets(nb_rx,
> pkts_burst_to_send,
> +						portid, qconf);
> +
>  #else
> -			l3fwd_lpm_no_opt_send_packets(nb_rx,
> pkts_burst,
> +			if (l3_sft_cksum == false && l4_sft_cksum == false)
> +				l3fwd_lpm_no_opt_send_packets(nb_rx,
> pkts_burst,
>  							portid, qconf);
> +			else
> +				l3fwd_lpm_no_opt_send_packets(nb_rx,
> +				pkts_burst_to_send, portid, qconf);
> +
>  #endif /* X86 */
>  		}
> -
>  		cur_tsc = rte_rdtsc();
>  	}
> -
>  	return 0;
>  }
> 
> diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
> index 00ac267af1..a54aca070d 100644
> --- a/examples/l3fwd/main.c
> +++ b/examples/l3fwd/main.c
> @@ -61,6 +61,9 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
>  /**< Ports set in promiscuous mode off by default. */
>  static int promiscuous_on;
> 
> +bool l3_sft_cksum;
> +bool l4_sft_cksum;
> +
>  /* Select Longest-Prefix, Exact match or Forwarding Information Base. */
>  enum L3FWD_LOOKUP_MODE {
>  	L3FWD_LOOKUP_DEFAULT,
> @@ -123,7 +126,6 @@ static struct rte_eth_conf port_conf = {
>  		.mq_mode = ETH_MQ_RX_RSS,
>  		.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
>  		.split_hdr_size = 0,
> -		.offloads = DEV_RX_OFFLOAD_CHECKSUM,
>  	},
>  	.rx_adv_conf = {
>  		.rss_conf = {
> @@ -981,6 +983,7 @@ prepare_ptype_parser(uint16_t portid, uint16_t
> queueid)
>  	return 0;
>  }
> 
> +
>  static void
>  l3fwd_poll_resource_setup(void)
>  {
> @@ -993,7 +996,8 @@ l3fwd_poll_resource_setup(void)
>  	unsigned int nb_ports;
>  	unsigned int lcore_id;
>  	int ret;
> -
> +	l3_sft_cksum = false;
> +	l4_sft_cksum = false;
>  	if (check_lcore_params() < 0)
>  		rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
> 
> @@ -1034,11 +1038,34 @@ l3fwd_poll_resource_setup(void)
>  			rte_exit(EXIT_FAILURE,
>  				"Error during getting device (port %u) info:
> %s\n",
>  				portid, strerror(-ret));
> -
>  		if (dev_info.tx_offload_capa &
> DEV_TX_OFFLOAD_MBUF_FAST_FREE)
>  			local_port_conf.txmode.offloads |=
>  				DEV_TX_OFFLOAD_MBUF_FAST_FREE;
> 
> +		if (dev_info.rx_offload_capa &
> DEV_RX_OFFLOAD_IPV4_CKSUM)
> +			local_port_conf.rxmode.offloads |=
> +			DEV_RX_OFFLOAD_IPV4_CKSUM;
> +		else {
> +			l3_sft_cksum = true;
> +			printf("WARNING: IPV4 checksum offload not
> available.\n");
> +			}
> +
> +		if (dev_info.rx_offload_capa &
> DEV_RX_OFFLOAD_UDP_CKSUM)
> +			local_port_conf.rxmode.offloads |=
> +				DEV_RX_OFFLOAD_UDP_CKSUM;
> +		else {
> +			l4_sft_cksum = true;
> +			printf("WARNING: UDP checksum offload not
> available.\n");
> +		}
> +
> +		if (dev_info.rx_offload_capa &
> DEV_RX_OFFLOAD_TCP_CKSUM)
> +			local_port_conf.rxmode.offloads |=
> +				DEV_RX_OFFLOAD_TCP_CKSUM;
> +		else {
> +			l4_sft_cksum = true;
> +			printf("WARNING: TCP checksum offload not
> available.\n");
> +		}
> +
>  		local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
>  			dev_info.flow_type_rss_offloads;
> 
> --
> 2.25.1
Ananyev, Konstantin Nov. 4, 2021, 1:19 p.m. UTC | #3
> checks if ipv4 and udptcp cksum offload capability available
> If not available, cksum is verified through software
> If cksum is corrupt, packet is dropped, rest of the packets
> are forwarded back.

From what I see right now l3fwd:
   a) enables HW RX cksum offload 
   b) simply ignores HW provided cksum status 
Which came as a real surprise to me. 
Feel free to correct me if I missed something obvious here.

So, I think first we need to add missing check first,
even though it might cause some perf drop. 
Then make changes to actually check provided by HW status and
when HW doesn't provide such offload do check in SW.

Another alternative would be to remove request for HW offloads
and document l3fwd that it doesn't check checksums at all,
but I don't think it is a good way.

> Bugzilla ID:545
> Signed-off-by: Usama Nadeem <usama.nadeem@emumba.com>
> ---
>  examples/l3fwd/l3fwd.h     |  6 ++++
>  examples/l3fwd/l3fwd_lpm.c | 72 ++++++++++++++++++++++++++++++++++++--
>  examples/l3fwd/main.c      | 33 +++++++++++++++--
>  3 files changed, 105 insertions(+), 6 deletions(-)
> 
> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
> index a808d60247..c2c21a91fb 100644
> --- a/examples/l3fwd/l3fwd.h
> +++ b/examples/l3fwd/l3fwd.h
> @@ -55,6 +55,8 @@
>  #define L3FWD_HASH_ENTRIES		(1024*1024*1)
>  #endif
>  #define HASH_ENTRY_NUMBER_DEFAULT	4
> +extern bool l3_sft_cksum;
> +extern bool l4_sft_cksum;

About the approach itself.
We have similar issue for HW PTYPE recognition - some HW doesn't support it.
So we check HW capabilities and if required we setup SW RX callbacks to do
determine PTYPE in SW. Note that for EM/LPM we have different callbacks.
I think for cksum checks we can do the same:
check HW capabilities, if they are missing add a new callback that would
calculate/check cksum and set  RTE_MBUF_F_RX_*_CKSUM_* flags.
That way it will HW/SW cksum will be transparent for the rest of l3fwd code.  

About cksums required: for LPM/FIB mode just IPv4 cksum seems enough.
For EM we probably need L4 cksum too, though not sure is it really needed.
Wonder what other people think here? 

 >  struct mbuf_table {
>  	uint16_t len;
> @@ -210,6 +212,10 @@ em_main_loop(__rte_unused void *dummy);
>  int
>  lpm_main_loop(__rte_unused void *dummy);
> 
> +int
> +check_software_cksum(struct rte_mbuf **pkts_burst,
> +struct rte_mbuf **pkts_burst_to_send, int nb_rx);
> +
>  int
>  fib_main_loop(__rte_unused void *dummy);
> 
> diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
> index 232b606b54..ecaf323943 100644
> --- a/examples/l3fwd/l3fwd_lpm.c
> +++ b/examples/l3fwd/l3fwd_lpm.c
> @@ -26,6 +26,7 @@
>  #include <rte_udp.h>
>  #include <rte_lpm.h>
>  #include <rte_lpm6.h>
> +#include <rte_net.h>
> 
>  #include "l3fwd.h"
>  #include "l3fwd_event.h"
> @@ -139,16 +140,65 @@ lpm_get_dst_port_with_ipv4(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
>  #include "l3fwd_lpm.h"
>  #endif
> 
> +
> +int check_software_cksum(struct rte_mbuf **pkts_burst,
> +struct rte_mbuf **pkts_burst_to_send, int nb_rx)
> +{
> +	int j;
> +	int i = 0;
> +	struct rte_net_hdr_lens hdr_lens;
> +	struct rte_ipv4_hdr *ipv4_hdr;
> +	void *l3_hdr;
> +	void *l4_hdr;
> +	rte_be16_t prev_cksum;
> +	int dropped_pkts_udp_tcp = 0;
> +	int dropped_pkts_ipv4 = 0;
> +	bool dropped;
> +	for (j = 0; j < nb_rx; j++) {
> +		dropped = false;
> +		rte_net_get_ptype(pkts_burst[j], &hdr_lens, RTE_PTYPE_ALL_MASK);
> +		l3_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
> +		void *, hdr_lens.l2_len);
> +		l4_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
> +		void *, hdr_lens.l2_len + hdr_lens.l3_len);
> +		ipv4_hdr = l3_hdr;
> +		prev_cksum = ipv4_hdr->hdr_checksum;
> +		ipv4_hdr->hdr_checksum = 0;
> +		ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
> +
> +		if (l3_sft_cksum && prev_cksum != ipv4_hdr->hdr_checksum) {
> +			rte_pktmbuf_free(pkts_burst[j]);
> +			dropped_pkts_ipv4++;
> +			dropped = true;
> +		} else if (l4_sft_cksum &&
> +				rte_ipv4_udptcp_cksum_verify
> +				(l3_hdr, l4_hdr) != 0) {
> +
> +			rte_pktmbuf_free(pkts_burst[j]);
> +			dropped_pkts_udp_tcp++;
> +			dropped = true;
> +		}
> +		if (dropped == false) {
> +			pkts_burst_to_send[i] = pkts_burst[j];
> +			i++;
> +		}
> +
> +	}
> +	return dropped_pkts_udp_tcp+dropped_pkts_ipv4;
> +}
> +
>  /* main processing loop */
>  int
>  lpm_main_loop(__rte_unused void *dummy)
>  {
>  	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> +	struct rte_mbuf *pkts_burst_to_send[MAX_PKT_BURST];
>  	unsigned lcore_id;
>  	uint64_t prev_tsc, diff_tsc, cur_tsc;
>  	int i, nb_rx;
>  	uint16_t portid;
>  	uint8_t queueid;
> +	int dropped;
>  	struct lcore_conf *qconf;
>  	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
>  		US_PER_S * BURST_TX_DRAIN_US;
> @@ -209,19 +259,35 @@ lpm_main_loop(__rte_unused void *dummy)
>  			if (nb_rx == 0)
>  				continue;
> 
> +			if (l3_sft_cksum || l4_sft_cksum) {
> +				dropped = check_software_cksum(pkts_burst,
> +				pkts_burst_to_send,	nb_rx);
> +
> +				nb_rx = nb_rx-dropped;
> +			}
> +
> +
>  #if defined RTE_ARCH_X86 || defined __ARM_NEON \
>  			 || defined RTE_ARCH_PPC_64
> +		if (l3_sft_cksum == false && l4_sft_cksum == false)
>  			l3fwd_lpm_send_packets(nb_rx, pkts_burst,
>  						portid, qconf);
> +		else
> +			l3fwd_lpm_send_packets(nb_rx, pkts_burst_to_send,
> +						portid, qconf);
> +
>  #else
> -			l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
> +			if (l3_sft_cksum == false && l4_sft_cksum == false)
> +				l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
>  							portid, qconf);
> +			else
> +				l3fwd_lpm_no_opt_send_packets(nb_rx,
> +				pkts_burst_to_send, portid, qconf);
> +
>  #endif /* X86 */
>  		}
> -
>  		cur_tsc = rte_rdtsc();
>  	}
> -
>  	return 0;
>  }
> 
> diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
> index 00ac267af1..a54aca070d 100644
> --- a/examples/l3fwd/main.c
> +++ b/examples/l3fwd/main.c
> @@ -61,6 +61,9 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
>  /**< Ports set in promiscuous mode off by default. */
>  static int promiscuous_on;
> 
> +bool l3_sft_cksum;
> +bool l4_sft_cksum;
> +
>  /* Select Longest-Prefix, Exact match or Forwarding Information Base. */
>  enum L3FWD_LOOKUP_MODE {
>  	L3FWD_LOOKUP_DEFAULT,
> @@ -123,7 +126,6 @@ static struct rte_eth_conf port_conf = {
>  		.mq_mode = ETH_MQ_RX_RSS,
>  		.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
>  		.split_hdr_size = 0,
> -		.offloads = DEV_RX_OFFLOAD_CHECKSUM,
>  	},
>  	.rx_adv_conf = {
>  		.rss_conf = {
> @@ -981,6 +983,7 @@ prepare_ptype_parser(uint16_t portid, uint16_t queueid)
>  	return 0;
>  }
> 
> +
>  static void
>  l3fwd_poll_resource_setup(void)
>  {
> @@ -993,7 +996,8 @@ l3fwd_poll_resource_setup(void)
>  	unsigned int nb_ports;
>  	unsigned int lcore_id;
>  	int ret;
> -
> +	l3_sft_cksum = false;
> +	l4_sft_cksum = false;
>  	if (check_lcore_params() < 0)
>  		rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
> 
> @@ -1034,11 +1038,34 @@ l3fwd_poll_resource_setup(void)
>  			rte_exit(EXIT_FAILURE,
>  				"Error during getting device (port %u) info: %s\n",
>  				portid, strerror(-ret));
> -
>  		if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
>  			local_port_conf.txmode.offloads |=
>  				DEV_TX_OFFLOAD_MBUF_FAST_FREE;
> 
> +		if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
> +			local_port_conf.rxmode.offloads |=
> +			DEV_RX_OFFLOAD_IPV4_CKSUM;
> +		else {
> +			l3_sft_cksum = true;
> +			printf("WARNING: IPV4 checksum offload not available.\n");
> +			}
> +
> +		if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
> +			local_port_conf.rxmode.offloads |=
> +				DEV_RX_OFFLOAD_UDP_CKSUM;
> +		else {
> +			l4_sft_cksum = true;
> +			printf("WARNING: UDP checksum offload not available.\n");
> +		}
> +
> +		if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
> +			local_port_conf.rxmode.offloads |=
> +				DEV_RX_OFFLOAD_TCP_CKSUM;
> +		else {
> +			l4_sft_cksum = true;
> +			printf("WARNING: TCP checksum offload not available.\n");
> +		}
> +
>  		local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
>  			dev_info.flow_type_rss_offloads;
> 
> --
> 2.25.1
Vladimir Medvedkin Nov. 4, 2021, 4:19 p.m. UTC | #4
Hi Usama,

On 04/11/2021 12:11, Walsh, Conor wrote:
>> From: dev <dev-bounces@dpdk.org> On Behalf Of Usama Nadeem
>> Sent: Thursday 14 October 2021 19:43
>> To: thomas@monjalon.net
>> Cc: dev@dpdk.org; Usama Nadeem <usama.nadeem@emumba.com>
>> Subject: [dpdk-dev] [PATCH v4] examples/l3fwd: ipv4 and udp/tcp cksum
>> verification through software
>>
>> checks if ipv4 and udptcp cksum offload capability available
>> If not available, cksum is verified through software
>> If cksum is corrupt, packet is dropped, rest of the packets
>> are forwarded back.
>>
>> Bugzilla ID:545
>> Signed-off-by: Usama Nadeem <usama.nadeem@emumba.com>
>> ---
> 
> Hi Usama,
> 
> This should be done in a generic way that allows all the lookup methods to support it not just LPM.
> check_software_cksum should go in a common file and be called from LPM, FIB and possibly EM.
> 
> Thanks,
> Conor.
> 
>>   examples/l3fwd/l3fwd.h     |  6 ++++
>>   examples/l3fwd/l3fwd_lpm.c | 72
>> ++++++++++++++++++++++++++++++++++++--
>>   examples/l3fwd/main.c      | 33 +++++++++++++++--
>>   3 files changed, 105 insertions(+), 6 deletions(-)
>>
>> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
>> index a808d60247..c2c21a91fb 100644
>> --- a/examples/l3fwd/l3fwd.h
>> +++ b/examples/l3fwd/l3fwd.h
>> @@ -55,6 +55,8 @@
>>   #define L3FWD_HASH_ENTRIES		(1024*1024*1)
>>   #endif
>>   #define HASH_ENTRY_NUMBER_DEFAULT	4
>> +extern bool l3_sft_cksum;
>> +extern bool l4_sft_cksum;
>>
>>   struct mbuf_table {
>>   	uint16_t len;
>> @@ -210,6 +212,10 @@ em_main_loop(__rte_unused void *dummy);
>>   int
>>   lpm_main_loop(__rte_unused void *dummy);
>>
>> +int
>> +check_software_cksum(struct rte_mbuf **pkts_burst,
>> +struct rte_mbuf **pkts_burst_to_send, int nb_rx);
>> +
>>   int
>>   fib_main_loop(__rte_unused void *dummy);
>>
>> diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
>> index 232b606b54..ecaf323943 100644
>> --- a/examples/l3fwd/l3fwd_lpm.c
>> +++ b/examples/l3fwd/l3fwd_lpm.c
>> @@ -26,6 +26,7 @@
>>   #include <rte_udp.h>
>>   #include <rte_lpm.h>
>>   #include <rte_lpm6.h>
>> +#include <rte_net.h>
>>
>>   #include "l3fwd.h"
>>   #include "l3fwd_event.h"
>> @@ -139,16 +140,65 @@ lpm_get_dst_port_with_ipv4(const struct
>> lcore_conf *qconf, struct rte_mbuf *pkt,
>>   #include "l3fwd_lpm.h"
>>   #endif
>>
>> +
>> +int check_software_cksum(struct rte_mbuf **pkts_burst,
>> +struct rte_mbuf **pkts_burst_to_send, int nb_rx)
>> +{
>> +	int j;
>> +	int i = 0;
>> +	struct rte_net_hdr_lens hdr_lens;
>> +	struct rte_ipv4_hdr *ipv4_hdr;
>> +	void *l3_hdr;
>> +	void *l4_hdr;
>> +	rte_be16_t prev_cksum;
>> +	int dropped_pkts_udp_tcp = 0;
>> +	int dropped_pkts_ipv4 = 0;

Why do you need two separate counters if you eventually summing them up?

>> +	bool dropped;
>> +	for (j = 0; j < nb_rx; j++) {
>> +		dropped = false;
>> +		rte_net_get_ptype(pkts_burst[j], &hdr_lens,
>> RTE_PTYPE_ALL_MASK);
>> +		l3_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
>> +		void *, hdr_lens.l2_len);
>> +		l4_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
>> +		void *, hdr_lens.l2_len + hdr_lens.l3_len);

here hdr_lens.l3_len could be non initialized, for example in case of 
MPLS packet.

>> +		ipv4_hdr = l3_hdr;
>> +		prev_cksum = ipv4_hdr->hdr_checksum;

it could be non IPv4 packet.

>> +		ipv4_hdr->hdr_checksum = 0;
>> +		ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);

same here and below, it can be IPv6 for example.

>> +
>> +		if (l3_sft_cksum && prev_cksum != ipv4_hdr-
>>> hdr_checksum) {
>> +			rte_pktmbuf_free(pkts_burst[j]);
>> +			dropped_pkts_ipv4++;
>> +			dropped = true;

Do you need "dropped" value + the the final if statement at all? Maybe 
it's better to just
...
     continue;
}
here...

>> +		} else if (l4_sft_cksum &&
>> +				rte_ipv4_udptcp_cksum_verify
>> +				(l3_hdr, l4_hdr) != 0) {
>> +
>> +			rte_pktmbuf_free(pkts_burst[j]);
>> +			dropped_pkts_udp_tcp++;
>> +			dropped = true;
>> +		}
>> +		if (dropped == false) { >> +			pkts_burst_to_send[i] = pkts_burst[j];
>> +			i++;

...and execute this code unconditionally?

>> +		}
>> +
>> +	}
>> +	return dropped_pkts_udp_tcp+dropped_pkts_ipv4;
>> +}
>> +
>>   /* main processing loop */
>>   int
>>   lpm_main_loop(__rte_unused void *dummy)
>>   {
>>   	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
>> +	struct rte_mbuf *pkts_burst_to_send[MAX_PKT_BURST];
>>   	unsigned lcore_id;
>>   	uint64_t prev_tsc, diff_tsc, cur_tsc;
>>   	int i, nb_rx;
>>   	uint16_t portid;
>>   	uint8_t queueid;
>> +	int dropped;
>>   	struct lcore_conf *qconf;
>>   	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
>>   		US_PER_S * BURST_TX_DRAIN_US;
>> @@ -209,19 +259,35 @@ lpm_main_loop(__rte_unused void *dummy)
>>   			if (nb_rx == 0)
>>   				continue;
>>
>> +			if (l3_sft_cksum || l4_sft_cksum) {
>> +				dropped =
>> check_software_cksum(pkts_burst,

You are calling this function fight after rte_eth_rx_burst(), so 
pkts_burst[] can have any possible packet proto, but current 
check_software_cksum() implementation if purely IPv4.

>> +				pkts_burst_to_send,	nb_rx);
>> +
>> +				nb_rx = nb_rx-dropped;
>> +			}
>> +
>> +
>>   #if defined RTE_ARCH_X86 || defined __ARM_NEON \
>>   			 || defined RTE_ARCH_PPC_64
>> +		if (l3_sft_cksum == false && l4_sft_cksum == false)
>>   			l3fwd_lpm_send_packets(nb_rx, pkts_burst,
>>   						portid, qconf);
>> +		else
>> +			l3fwd_lpm_send_packets(nb_rx,
>> pkts_burst_to_send,
>> +						portid, qconf);
>> +
>>   #else
>> -			l3fwd_lpm_no_opt_send_packets(nb_rx,
>> pkts_burst,
>> +			if (l3_sft_cksum == false && l4_sft_cksum == false)

While those if statements are perfectly predictable, it is still better 
to avoid branching in hot path if possible. You can implement 
check_software_cksum() to work with a single pkts_burst[] modifying it 
in place and throw away pkts_burst_to_send[] and corresponding branches.

>> +				l3fwd_lpm_no_opt_send_packets(nb_rx,
>> pkts_burst,
>>   							portid, qconf);
>> +			else
>> +				l3fwd_lpm_no_opt_send_packets(nb_rx,
>> +				pkts_burst_to_send, portid, qconf);
>> +
>>   #endif /* X86 */
>>   		}
>> -
>>   		cur_tsc = rte_rdtsc();
>>   	}
>> -
>>   	return 0;
>>   }
>>
>> diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
>> index 00ac267af1..a54aca070d 100644
>> --- a/examples/l3fwd/main.c
>> +++ b/examples/l3fwd/main.c
>> @@ -61,6 +61,9 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
>>   /**< Ports set in promiscuous mode off by default. */
>>   static int promiscuous_on;
>>
>> +bool l3_sft_cksum;
>> +bool l4_sft_cksum;
>> +
>>   /* Select Longest-Prefix, Exact match or Forwarding Information Base. */
>>   enum L3FWD_LOOKUP_MODE {
>>   	L3FWD_LOOKUP_DEFAULT,
>> @@ -123,7 +126,6 @@ static struct rte_eth_conf port_conf = {
>>   		.mq_mode = ETH_MQ_RX_RSS,
>>   		.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
>>   		.split_hdr_size = 0,
>> -		.offloads = DEV_RX_OFFLOAD_CHECKSUM,
>>   	},
>>   	.rx_adv_conf = {
>>   		.rss_conf = {
>> @@ -981,6 +983,7 @@ prepare_ptype_parser(uint16_t portid, uint16_t
>> queueid)
>>   	return 0;
>>   }
>>
>> +
>>   static void
>>   l3fwd_poll_resource_setup(void)
>>   {
>> @@ -993,7 +996,8 @@ l3fwd_poll_resource_setup(void)
>>   	unsigned int nb_ports;
>>   	unsigned int lcore_id;
>>   	int ret;
>> -
>> +	l3_sft_cksum = false;
>> +	l4_sft_cksum = false;
>>   	if (check_lcore_params() < 0)
>>   		rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
>>
>> @@ -1034,11 +1038,34 @@ l3fwd_poll_resource_setup(void)
>>   			rte_exit(EXIT_FAILURE,
>>   				"Error during getting device (port %u) info:
>> %s\n",
>>   				portid, strerror(-ret));
>> -
>>   		if (dev_info.tx_offload_capa &
>> DEV_TX_OFFLOAD_MBUF_FAST_FREE)
>>   			local_port_conf.txmode.offloads |=
>>   				DEV_TX_OFFLOAD_MBUF_FAST_FREE;
>>
>> +		if (dev_info.rx_offload_capa &
>> DEV_RX_OFFLOAD_IPV4_CKSUM)
>> +			local_port_conf.rxmode.offloads |=
>> +			DEV_RX_OFFLOAD_IPV4_CKSUM;
>> +		else {
>> +			l3_sft_cksum = true;
>> +			printf("WARNING: IPV4 checksum offload not
>> available.\n");
>> +			}
>> +
>> +		if (dev_info.rx_offload_capa &
>> DEV_RX_OFFLOAD_UDP_CKSUM)
>> +			local_port_conf.rxmode.offloads |=
>> +				DEV_RX_OFFLOAD_UDP_CKSUM;
>> +		else {
>> +			l4_sft_cksum = true;
>> +			printf("WARNING: UDP checksum offload not
>> available.\n");
>> +		}
>> +
>> +		if (dev_info.rx_offload_capa &
>> DEV_RX_OFFLOAD_TCP_CKSUM)
>> +			local_port_conf.rxmode.offloads |=
>> +				DEV_RX_OFFLOAD_TCP_CKSUM;
>> +		else {
>> +			l4_sft_cksum = true;
>> +			printf("WARNING: TCP checksum offload not
>> available.\n");
>> +		}
>> +
>>   		local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
>>   			dev_info.flow_type_rss_offloads;
>>
>> --
>> 2.25.1
> 

generalizing:
1. As Conor said earlier, make it generic, not only LPM and not only for 
run-to-completion mode, it should be done for event mode as well.
2. Function must work not only with IPv4.
3. There should be no performance degradation if NIC supports CSUM offload.
Usama Nadeem Nov. 16, 2021, 5:18 a.m. UTC | #5
Hi Ananyev, Konstantin
<https://patches.dpdk.org/project/dpdk/list/?submitter=33>,

Yes, you understand it right.

The approach you discussed, we have been doing the same thing in our patch.
At first, we check the HW offload capabilities.

> + if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)

> + local_port_conf.rxmode.offloads |=

> + DEV_RX_OFFLOAD_IPV4_CKSUM;

> + else {

> + l3_sft_cksum = true;

> + printf("WARNING: IPV4 checksum offload not available.\n");

> + }

If cksum HW offload capability is missing, we call SFT cksum function, by
setting a bool variable to true. We will look into setting up the SFT
callbacks in the next version of patch.


On Thu, Nov 4, 2021 at 6:19 PM Ananyev, Konstantin <
konstantin.ananyev@intel.com> wrote:

> > checks if ipv4 and udptcp cksum offload capability available
> > If not available, cksum is verified through software
> > If cksum is corrupt, packet is dropped, rest of the packets
> > are forwarded back.
>
> From what I see right now l3fwd:
>    a) enables HW RX cksum offload
>    b) simply ignores HW provided cksum status
> Which came as a real surprise to me.
> Feel free to correct me if I missed something obvious here.
>
> So, I think first we need to add missing check first,
> even though it might cause some perf drop.
> Then make changes to actually check provided by HW status and
> when HW doesn't provide such offload do check in SW.
>
> Another alternative would be to remove request for HW offloads
> and document l3fwd that it doesn't check checksums at all,
> but I don't think it is a good way.
>
> > Bugzilla ID:545
> > Signed-off-by: Usama Nadeem <usama.nadeem@emumba.com>
> > ---
> >  examples/l3fwd/l3fwd.h     |  6 ++++
> >  examples/l3fwd/l3fwd_lpm.c | 72 ++++++++++++++++++++++++++++++++++++--
> >  examples/l3fwd/main.c      | 33 +++++++++++++++--
> >  3 files changed, 105 insertions(+), 6 deletions(-)
> >
> > diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
> > index a808d60247..c2c21a91fb 100644
> > --- a/examples/l3fwd/l3fwd.h
> > +++ b/examples/l3fwd/l3fwd.h
> > @@ -55,6 +55,8 @@
> >  #define L3FWD_HASH_ENTRIES           (1024*1024*1)
> >  #endif
> >  #define HASH_ENTRY_NUMBER_DEFAULT    4
> > +extern bool l3_sft_cksum;
> > +extern bool l4_sft_cksum;
>
> About the approach itself.
> We have similar issue for HW PTYPE recognition - some HW doesn't support
> it.
> So we check HW capabilities and if required we setup SW RX callbacks to do
> determine PTYPE in SW. Note that for EM/LPM we have different callbacks.
> I think for cksum checks we can do the same:
> check HW capabilities, if they are missing add a new callback that would
> calculate/check cksum and set  RTE_MBUF_F_RX_*_CKSUM_* flags.
> That way it will HW/SW cksum will be transparent for the rest of l3fwd
> code.
>
> About cksums required: for LPM/FIB mode just IPv4 cksum seems enough.
> For EM we probably need L4 cksum too, though not sure is it really needed.
> Wonder what other people think here?
>
>  >  struct mbuf_table {
> >       uint16_t len;
> > @@ -210,6 +212,10 @@ em_main_loop(__rte_unused void *dummy);
> >  int
> >  lpm_main_loop(__rte_unused void *dummy);
> >
> > +int
> > +check_software_cksum(struct rte_mbuf **pkts_burst,
> > +struct rte_mbuf **pkts_burst_to_send, int nb_rx);
> > +
> >  int
> >  fib_main_loop(__rte_unused void *dummy);
> >
> > diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
> > index 232b606b54..ecaf323943 100644
> > --- a/examples/l3fwd/l3fwd_lpm.c
> > +++ b/examples/l3fwd/l3fwd_lpm.c
> > @@ -26,6 +26,7 @@
> >  #include <rte_udp.h>
> >  #include <rte_lpm.h>
> >  #include <rte_lpm6.h>
> > +#include <rte_net.h>
> >
> >  #include "l3fwd.h"
> >  #include "l3fwd_event.h"
> > @@ -139,16 +140,65 @@ lpm_get_dst_port_with_ipv4(const struct lcore_conf
> *qconf, struct rte_mbuf *pkt,
> >  #include "l3fwd_lpm.h"
> >  #endif
> >
> > +
> > +int check_software_cksum(struct rte_mbuf **pkts_burst,
> > +struct rte_mbuf **pkts_burst_to_send, int nb_rx)
> > +{
> > +     int j;
> > +     int i = 0;
> > +     struct rte_net_hdr_lens hdr_lens;
> > +     struct rte_ipv4_hdr *ipv4_hdr;
> > +     void *l3_hdr;
> > +     void *l4_hdr;
> > +     rte_be16_t prev_cksum;
> > +     int dropped_pkts_udp_tcp = 0;
> > +     int dropped_pkts_ipv4 = 0;
> > +     bool dropped;
> > +     for (j = 0; j < nb_rx; j++) {
> > +             dropped = false;
> > +             rte_net_get_ptype(pkts_burst[j], &hdr_lens,
> RTE_PTYPE_ALL_MASK);
> > +             l3_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
> > +             void *, hdr_lens.l2_len);
> > +             l4_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
> > +             void *, hdr_lens.l2_len + hdr_lens.l3_len);
> > +             ipv4_hdr = l3_hdr;
> > +             prev_cksum = ipv4_hdr->hdr_checksum;
> > +             ipv4_hdr->hdr_checksum = 0;
> > +             ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
> > +
> > +             if (l3_sft_cksum && prev_cksum != ipv4_hdr->hdr_checksum) {
> > +                     rte_pktmbuf_free(pkts_burst[j]);
> > +                     dropped_pkts_ipv4++;
> > +                     dropped = true;
> > +             } else if (l4_sft_cksum &&
> > +                             rte_ipv4_udptcp_cksum_verify
> > +                             (l3_hdr, l4_hdr) != 0) {
> > +
> > +                     rte_pktmbuf_free(pkts_burst[j]);
> > +                     dropped_pkts_udp_tcp++;
> > +                     dropped = true;
> > +             }
> > +             if (dropped == false) {
> > +                     pkts_burst_to_send[i] = pkts_burst[j];
> > +                     i++;
> > +             }
> > +
> > +     }
> > +     return dropped_pkts_udp_tcp+dropped_pkts_ipv4;
> > +}
> > +
> >  /* main processing loop */
> >  int
> >  lpm_main_loop(__rte_unused void *dummy)
> >  {
> >       struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> > +     struct rte_mbuf *pkts_burst_to_send[MAX_PKT_BURST];
> >       unsigned lcore_id;
> >       uint64_t prev_tsc, diff_tsc, cur_tsc;
> >       int i, nb_rx;
> >       uint16_t portid;
> >       uint8_t queueid;
> > +     int dropped;
> >       struct lcore_conf *qconf;
> >       const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
> >               US_PER_S * BURST_TX_DRAIN_US;
> > @@ -209,19 +259,35 @@ lpm_main_loop(__rte_unused void *dummy)
> >                       if (nb_rx == 0)
> >                               continue;
> >
> > +                     if (l3_sft_cksum || l4_sft_cksum) {
> > +                             dropped = check_software_cksum(pkts_burst,
> > +                             pkts_burst_to_send,     nb_rx);
> > +
> > +                             nb_rx = nb_rx-dropped;
> > +                     }
> > +
> > +
> >  #if defined RTE_ARCH_X86 || defined __ARM_NEON \
> >                        || defined RTE_ARCH_PPC_64
> > +             if (l3_sft_cksum == false && l4_sft_cksum == false)
> >                       l3fwd_lpm_send_packets(nb_rx, pkts_burst,
> >                                               portid, qconf);
> > +             else
> > +                     l3fwd_lpm_send_packets(nb_rx, pkts_burst_to_send,
> > +                                             portid, qconf);
> > +
> >  #else
> > -                     l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
> > +                     if (l3_sft_cksum == false && l4_sft_cksum == false)
> > +                             l3fwd_lpm_no_opt_send_packets(nb_rx,
> pkts_burst,
> >                                                       portid, qconf);
> > +                     else
> > +                             l3fwd_lpm_no_opt_send_packets(nb_rx,
> > +                             pkts_burst_to_send, portid, qconf);
> > +
> >  #endif /* X86 */
> >               }
> > -
> >               cur_tsc = rte_rdtsc();
> >       }
> > -
> >       return 0;
> >  }
> >
> > diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
> > index 00ac267af1..a54aca070d 100644
> > --- a/examples/l3fwd/main.c
> > +++ b/examples/l3fwd/main.c
> > @@ -61,6 +61,9 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
> >  /**< Ports set in promiscuous mode off by default. */
> >  static int promiscuous_on;
> >
> > +bool l3_sft_cksum;
> > +bool l4_sft_cksum;
> > +
> >  /* Select Longest-Prefix, Exact match or Forwarding Information Base. */
> >  enum L3FWD_LOOKUP_MODE {
> >       L3FWD_LOOKUP_DEFAULT,
> > @@ -123,7 +126,6 @@ static struct rte_eth_conf port_conf = {
> >               .mq_mode = ETH_MQ_RX_RSS,
> >               .max_rx_pkt_len = RTE_ETHER_MAX_LEN,
> >               .split_hdr_size = 0,
> > -             .offloads = DEV_RX_OFFLOAD_CHECKSUM,
> >       },
> >       .rx_adv_conf = {
> >               .rss_conf = {
> > @@ -981,6 +983,7 @@ prepare_ptype_parser(uint16_t portid, uint16_t
> queueid)
> >       return 0;
> >  }
> >
> > +
> >  static void
> >  l3fwd_poll_resource_setup(void)
> >  {
> > @@ -993,7 +996,8 @@ l3fwd_poll_resource_setup(void)
> >       unsigned int nb_ports;
> >       unsigned int lcore_id;
> >       int ret;
> > -
> > +     l3_sft_cksum = false;
> > +     l4_sft_cksum = false;
> >       if (check_lcore_params() < 0)
> >               rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
> >
> > @@ -1034,11 +1038,34 @@ l3fwd_poll_resource_setup(void)
> >                       rte_exit(EXIT_FAILURE,
> >                               "Error during getting device (port %u)
> info: %s\n",
> >                               portid, strerror(-ret));
> > -
> >               if (dev_info.tx_offload_capa &
> DEV_TX_OFFLOAD_MBUF_FAST_FREE)
> >                       local_port_conf.txmode.offloads |=
> >                               DEV_TX_OFFLOAD_MBUF_FAST_FREE;
> >
> > +             if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
> > +                     local_port_conf.rxmode.offloads |=
> > +                     DEV_RX_OFFLOAD_IPV4_CKSUM;
> > +             else {
> > +                     l3_sft_cksum = true;
> > +                     printf("WARNING: IPV4 checksum offload not
> available.\n");
> > +                     }
> > +
> > +             if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
> > +                     local_port_conf.rxmode.offloads |=
> > +                             DEV_RX_OFFLOAD_UDP_CKSUM;
> > +             else {
> > +                     l4_sft_cksum = true;
> > +                     printf("WARNING: UDP checksum offload not
> available.\n");
> > +             }
> > +
> > +             if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
> > +                     local_port_conf.rxmode.offloads |=
> > +                             DEV_RX_OFFLOAD_TCP_CKSUM;
> > +             else {
> > +                     l4_sft_cksum = true;
> > +                     printf("WARNING: TCP checksum offload not
> available.\n");
> > +             }
> > +
> >               local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
> >                       dev_info.flow_type_rss_offloads;
> >
> > --
> > 2.25.1
>
>
Usama Nadeem Nov. 16, 2021, 5:20 a.m. UTC | #6
Hi Medvedkin, Vladimir,


Thank you for your suggestions. Two counters aren't really necessary. We
also don't need the "dropped" variable. "continue" can also be used to
implement logic. We will update the logic in the next patch version.

This patch supports only IPV4 packets in LPM. It does not support other
lookup methods, neither does it support IPV6 packets for now. If the
current patch is satisfactory, we intend to begin work on those as well.

Regarding perf drop, I will submit a new version of the patch, containing
the callback.


On Thu, Nov 4, 2021 at 9:19 PM Medvedkin, Vladimir <
vladimir.medvedkin@intel.com> wrote:

> Hi Usama,
>
> On 04/11/2021 12:11, Walsh, Conor wrote:
> >> From: dev <dev-bounces@dpdk.org> On Behalf Of Usama Nadeem
> >> Sent: Thursday 14 October 2021 19:43
> >> To: thomas@monjalon.net
> >> Cc: dev@dpdk.org; Usama Nadeem <usama.nadeem@emumba.com>
> >> Subject: [dpdk-dev] [PATCH v4] examples/l3fwd: ipv4 and udp/tcp cksum
> >> verification through software
> >>
> >> checks if ipv4 and udptcp cksum offload capability available
> >> If not available, cksum is verified through software
> >> If cksum is corrupt, packet is dropped, rest of the packets
> >> are forwarded back.
> >>
> >> Bugzilla ID:545
> >> Signed-off-by: Usama Nadeem <usama.nadeem@emumba.com>
> >> ---
> >
> > Hi Usama,
> >
> > This should be done in a generic way that allows all the lookup methods
> to support it not just LPM.
> > check_software_cksum should go in a common file and be called from LPM,
> FIB and possibly EM.
> >
> > Thanks,
> > Conor.
> >
> >>   examples/l3fwd/l3fwd.h     |  6 ++++
> >>   examples/l3fwd/l3fwd_lpm.c | 72
> >> ++++++++++++++++++++++++++++++++++++--
> >>   examples/l3fwd/main.c      | 33 +++++++++++++++--
> >>   3 files changed, 105 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
> >> index a808d60247..c2c21a91fb 100644
> >> --- a/examples/l3fwd/l3fwd.h
> >> +++ b/examples/l3fwd/l3fwd.h
> >> @@ -55,6 +55,8 @@
> >>   #define L3FWD_HASH_ENTRIES         (1024*1024*1)
> >>   #endif
> >>   #define HASH_ENTRY_NUMBER_DEFAULT  4
> >> +extern bool l3_sft_cksum;
> >> +extern bool l4_sft_cksum;
> >>
> >>   struct mbuf_table {
> >>      uint16_t len;
> >> @@ -210,6 +212,10 @@ em_main_loop(__rte_unused void *dummy);
> >>   int
> >>   lpm_main_loop(__rte_unused void *dummy);
> >>
> >> +int
> >> +check_software_cksum(struct rte_mbuf **pkts_burst,
> >> +struct rte_mbuf **pkts_burst_to_send, int nb_rx);
> >> +
> >>   int
> >>   fib_main_loop(__rte_unused void *dummy);
> >>
> >> diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
> >> index 232b606b54..ecaf323943 100644
> >> --- a/examples/l3fwd/l3fwd_lpm.c
> >> +++ b/examples/l3fwd/l3fwd_lpm.c
> >> @@ -26,6 +26,7 @@
> >>   #include <rte_udp.h>
> >>   #include <rte_lpm.h>
> >>   #include <rte_lpm6.h>
> >> +#include <rte_net.h>
> >>
> >>   #include "l3fwd.h"
> >>   #include "l3fwd_event.h"
> >> @@ -139,16 +140,65 @@ lpm_get_dst_port_with_ipv4(const struct
> >> lcore_conf *qconf, struct rte_mbuf *pkt,
> >>   #include "l3fwd_lpm.h"
> >>   #endif
> >>
> >> +
> >> +int check_software_cksum(struct rte_mbuf **pkts_burst,
> >> +struct rte_mbuf **pkts_burst_to_send, int nb_rx)
> >> +{
> >> +    int j;
> >> +    int i = 0;
> >> +    struct rte_net_hdr_lens hdr_lens;
> >> +    struct rte_ipv4_hdr *ipv4_hdr;
> >> +    void *l3_hdr;
> >> +    void *l4_hdr;
> >> +    rte_be16_t prev_cksum;
> >> +    int dropped_pkts_udp_tcp = 0;
> >> +    int dropped_pkts_ipv4 = 0;
>
> Why do you need two separate counters if you eventually summing them up?
>
> >> +    bool dropped;
> >> +    for (j = 0; j < nb_rx; j++) {
> >> +            dropped = false;
> >> +            rte_net_get_ptype(pkts_burst[j], &hdr_lens,
> >> RTE_PTYPE_ALL_MASK);
> >> +            l3_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
> >> +            void *, hdr_lens.l2_len);
> >> +            l4_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
> >> +            void *, hdr_lens.l2_len + hdr_lens.l3_len);
>
> here hdr_lens.l3_len could be non initialized, for example in case of
> MPLS packet.
>
> >> +            ipv4_hdr = l3_hdr;
> >> +            prev_cksum = ipv4_hdr->hdr_checksum;
>
> it could be non IPv4 packet.
>
> >> +            ipv4_hdr->hdr_checksum = 0;
> >> +            ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
>
> same here and below, it can be IPv6 for example.
>
> >> +
> >> +            if (l3_sft_cksum && prev_cksum != ipv4_hdr-
> >>> hdr_checksum) {
> >> +                    rte_pktmbuf_free(pkts_burst[j]);
> >> +                    dropped_pkts_ipv4++;
> >> +                    dropped = true;
>
> Do you need "dropped" value + the the final if statement at all? Maybe
> it's better to just
> ...
>      continue;
> }
> here...
>
> >> +            } else if (l4_sft_cksum &&
> >> +                            rte_ipv4_udptcp_cksum_verify
> >> +                            (l3_hdr, l4_hdr) != 0) {
> >> +
> >> +                    rte_pktmbuf_free(pkts_burst[j]);
> >> +                    dropped_pkts_udp_tcp++;
> >> +                    dropped = true;
> >> +            }
> >> +            if (dropped == false) { >> +
> pkts_burst_to_send[i] = pkts_burst[j];
> >> +                    i++;
>
> ...and execute this code unconditionally?
>
> >> +            }
> >> +
> >> +    }
> >> +    return dropped_pkts_udp_tcp+dropped_pkts_ipv4;
> >> +}
> >> +
> >>   /* main processing loop */
> >>   int
> >>   lpm_main_loop(__rte_unused void *dummy)
> >>   {
> >>      struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> >> +    struct rte_mbuf *pkts_burst_to_send[MAX_PKT_BURST];
> >>      unsigned lcore_id;
> >>      uint64_t prev_tsc, diff_tsc, cur_tsc;
> >>      int i, nb_rx;
> >>      uint16_t portid;
> >>      uint8_t queueid;
> >> +    int dropped;
> >>      struct lcore_conf *qconf;
> >>      const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
> >>              US_PER_S * BURST_TX_DRAIN_US;
> >> @@ -209,19 +259,35 @@ lpm_main_loop(__rte_unused void *dummy)
> >>                      if (nb_rx == 0)
> >>                              continue;
> >>
> >> +                    if (l3_sft_cksum || l4_sft_cksum) {
> >> +                            dropped =
> >> check_software_cksum(pkts_burst,
>
> You are calling this function fight after rte_eth_rx_burst(), so
> pkts_burst[] can have any possible packet proto, but current
> check_software_cksum() implementation if purely IPv4.
>
> >> +                            pkts_burst_to_send,     nb_rx);
> >> +
> >> +                            nb_rx = nb_rx-dropped;
> >> +                    }
> >> +
> >> +
> >>   #if defined RTE_ARCH_X86 || defined __ARM_NEON \
> >>                       || defined RTE_ARCH_PPC_64
> >> +            if (l3_sft_cksum == false && l4_sft_cksum == false)
> >>                      l3fwd_lpm_send_packets(nb_rx, pkts_burst,
> >>                                              portid, qconf);
> >> +            else
> >> +                    l3fwd_lpm_send_packets(nb_rx,
> >> pkts_burst_to_send,
> >> +                                            portid, qconf);
> >> +
> >>   #else
> >> -                    l3fwd_lpm_no_opt_send_packets(nb_rx,
> >> pkts_burst,
> >> +                    if (l3_sft_cksum == false && l4_sft_cksum == false)
>
> While those if statements are perfectly predictable, it is still better
> to avoid branching in hot path if possible. You can implement
> check_software_cksum() to work with a single pkts_burst[] modifying it
> in place and throw away pkts_burst_to_send[] and corresponding branches.
>
> >> +                            l3fwd_lpm_no_opt_send_packets(nb_rx,
> >> pkts_burst,
> >>                                                      portid, qconf);
> >> +                    else
> >> +                            l3fwd_lpm_no_opt_send_packets(nb_rx,
> >> +                            pkts_burst_to_send, portid, qconf);
> >> +
> >>   #endif /* X86 */
> >>              }
> >> -
> >>              cur_tsc = rte_rdtsc();
> >>      }
> >> -
> >>      return 0;
> >>   }
> >>
> >> diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
> >> index 00ac267af1..a54aca070d 100644
> >> --- a/examples/l3fwd/main.c
> >> +++ b/examples/l3fwd/main.c
> >> @@ -61,6 +61,9 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
> >>   /**< Ports set in promiscuous mode off by default. */
> >>   static int promiscuous_on;
> >>
> >> +bool l3_sft_cksum;
> >> +bool l4_sft_cksum;
> >> +
> >>   /* Select Longest-Prefix, Exact match or Forwarding Information Base.
> */
> >>   enum L3FWD_LOOKUP_MODE {
> >>      L3FWD_LOOKUP_DEFAULT,
> >> @@ -123,7 +126,6 @@ static struct rte_eth_conf port_conf = {
> >>              .mq_mode = ETH_MQ_RX_RSS,
> >>              .max_rx_pkt_len = RTE_ETHER_MAX_LEN,
> >>              .split_hdr_size = 0,
> >> -            .offloads = DEV_RX_OFFLOAD_CHECKSUM,
> >>      },
> >>      .rx_adv_conf = {
> >>              .rss_conf = {
> >> @@ -981,6 +983,7 @@ prepare_ptype_parser(uint16_t portid, uint16_t
> >> queueid)
> >>      return 0;
> >>   }
> >>
> >> +
> >>   static void
> >>   l3fwd_poll_resource_setup(void)
> >>   {
> >> @@ -993,7 +996,8 @@ l3fwd_poll_resource_setup(void)
> >>      unsigned int nb_ports;
> >>      unsigned int lcore_id;
> >>      int ret;
> >> -
> >> +    l3_sft_cksum = false;
> >> +    l4_sft_cksum = false;
> >>      if (check_lcore_params() < 0)
> >>              rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
> >>
> >> @@ -1034,11 +1038,34 @@ l3fwd_poll_resource_setup(void)
> >>                      rte_exit(EXIT_FAILURE,
> >>                              "Error during getting device (port %u)
> info:
> >> %s\n",
> >>                              portid, strerror(-ret));
> >> -
> >>              if (dev_info.tx_offload_capa &
> >> DEV_TX_OFFLOAD_MBUF_FAST_FREE)
> >>                      local_port_conf.txmode.offloads |=
> >>                              DEV_TX_OFFLOAD_MBUF_FAST_FREE;
> >>
> >> +            if (dev_info.rx_offload_capa &
> >> DEV_RX_OFFLOAD_IPV4_CKSUM)
> >> +                    local_port_conf.rxmode.offloads |=
> >> +                    DEV_RX_OFFLOAD_IPV4_CKSUM;
> >> +            else {
> >> +                    l3_sft_cksum = true;
> >> +                    printf("WARNING: IPV4 checksum offload not
> >> available.\n");
> >> +                    }
> >> +
> >> +            if (dev_info.rx_offload_capa &
> >> DEV_RX_OFFLOAD_UDP_CKSUM)
> >> +                    local_port_conf.rxmode.offloads |=
> >> +                            DEV_RX_OFFLOAD_UDP_CKSUM;
> >> +            else {
> >> +                    l4_sft_cksum = true;
> >> +                    printf("WARNING: UDP checksum offload not
> >> available.\n");
> >> +            }
> >> +
> >> +            if (dev_info.rx_offload_capa &
> >> DEV_RX_OFFLOAD_TCP_CKSUM)
> >> +                    local_port_conf.rxmode.offloads |=
> >> +                            DEV_RX_OFFLOAD_TCP_CKSUM;
> >> +            else {
> >> +                    l4_sft_cksum = true;
> >> +                    printf("WARNING: TCP checksum offload not
> >> available.\n");
> >> +            }
> >> +
> >>              local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
> >>                      dev_info.flow_type_rss_offloads;
> >>
> >> --
> >> 2.25.1
> >
>
> generalizing:
> 1. As Conor said earlier, make it generic, not only LPM and not only for
> run-to-completion mode, it should be done for event mode as well.
> 2. Function must work not only with IPv4.
> 3. There should be no performance degradation if NIC supports CSUM offload.
>
> --
> Regards,
> Vladimir
>
Usama Nadeem Nov. 16, 2021, 5:21 a.m. UTC | #7
Hi Walsh, Conor <https://patches.dpdk.org/project/dpdk/list/?submitter=1935>
,

This, I agree, should be done for LPM, FIB, and EM. Only LPM is completed
in this patch. For the time being, you can think about this one. I will
look into FIB and EM in separate patches.

Thanks
-usama

On Thu, Nov 4, 2021 at 4:11 PM Walsh, Conor <conor.walsh@intel.com> wrote:

> > From: dev <dev-bounces@dpdk.org> On Behalf Of Usama Nadeem
> > Sent: Thursday 14 October 2021 19:43
> > To: thomas@monjalon.net
> > Cc: dev@dpdk.org; Usama Nadeem <usama.nadeem@emumba.com>
> > Subject: [dpdk-dev] [PATCH v4] examples/l3fwd: ipv4 and udp/tcp cksum
> > verification through software
> >
> > checks if ipv4 and udptcp cksum offload capability available
> > If not available, cksum is verified through software
> > If cksum is corrupt, packet is dropped, rest of the packets
> > are forwarded back.
> >
> > Bugzilla ID:545
> > Signed-off-by: Usama Nadeem <usama.nadeem@emumba.com>
> > ---
>
> Hi Usama,
>
> This should be done in a generic way that allows all the lookup methods to
> support it not just LPM.
> check_software_cksum should go in a common file and be called from LPM,
> FIB and possibly EM.
>
> Thanks,
> Conor.
>
> >  examples/l3fwd/l3fwd.h     |  6 ++++
> >  examples/l3fwd/l3fwd_lpm.c | 72
> > ++++++++++++++++++++++++++++++++++++--
> >  examples/l3fwd/main.c      | 33 +++++++++++++++--
> >  3 files changed, 105 insertions(+), 6 deletions(-)
> >
> > diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
> > index a808d60247..c2c21a91fb 100644
> > --- a/examples/l3fwd/l3fwd.h
> > +++ b/examples/l3fwd/l3fwd.h
> > @@ -55,6 +55,8 @@
> >  #define L3FWD_HASH_ENTRIES           (1024*1024*1)
> >  #endif
> >  #define HASH_ENTRY_NUMBER_DEFAULT    4
> > +extern bool l3_sft_cksum;
> > +extern bool l4_sft_cksum;
> >
> >  struct mbuf_table {
> >       uint16_t len;
> > @@ -210,6 +212,10 @@ em_main_loop(__rte_unused void *dummy);
> >  int
> >  lpm_main_loop(__rte_unused void *dummy);
> >
> > +int
> > +check_software_cksum(struct rte_mbuf **pkts_burst,
> > +struct rte_mbuf **pkts_burst_to_send, int nb_rx);
> > +
> >  int
> >  fib_main_loop(__rte_unused void *dummy);
> >
> > diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
> > index 232b606b54..ecaf323943 100644
> > --- a/examples/l3fwd/l3fwd_lpm.c
> > +++ b/examples/l3fwd/l3fwd_lpm.c
> > @@ -26,6 +26,7 @@
> >  #include <rte_udp.h>
> >  #include <rte_lpm.h>
> >  #include <rte_lpm6.h>
> > +#include <rte_net.h>
> >
> >  #include "l3fwd.h"
> >  #include "l3fwd_event.h"
> > @@ -139,16 +140,65 @@ lpm_get_dst_port_with_ipv4(const struct
> > lcore_conf *qconf, struct rte_mbuf *pkt,
> >  #include "l3fwd_lpm.h"
> >  #endif
> >
> > +
> > +int check_software_cksum(struct rte_mbuf **pkts_burst,
> > +struct rte_mbuf **pkts_burst_to_send, int nb_rx)
> > +{
> > +     int j;
> > +     int i = 0;
> > +     struct rte_net_hdr_lens hdr_lens;
> > +     struct rte_ipv4_hdr *ipv4_hdr;
> > +     void *l3_hdr;
> > +     void *l4_hdr;
> > +     rte_be16_t prev_cksum;
> > +     int dropped_pkts_udp_tcp = 0;
> > +     int dropped_pkts_ipv4 = 0;
> > +     bool dropped;
> > +     for (j = 0; j < nb_rx; j++) {
> > +             dropped = false;
> > +             rte_net_get_ptype(pkts_burst[j], &hdr_lens,
> > RTE_PTYPE_ALL_MASK);
> > +             l3_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
> > +             void *, hdr_lens.l2_len);
> > +             l4_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
> > +             void *, hdr_lens.l2_len + hdr_lens.l3_len);
> > +             ipv4_hdr = l3_hdr;
> > +             prev_cksum = ipv4_hdr->hdr_checksum;
> > +             ipv4_hdr->hdr_checksum = 0;
> > +             ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
> > +
> > +             if (l3_sft_cksum && prev_cksum != ipv4_hdr-
> > >hdr_checksum) {
> > +                     rte_pktmbuf_free(pkts_burst[j]);
> > +                     dropped_pkts_ipv4++;
> > +                     dropped = true;
> > +             } else if (l4_sft_cksum &&
> > +                             rte_ipv4_udptcp_cksum_verify
> > +                             (l3_hdr, l4_hdr) != 0) {
> > +
> > +                     rte_pktmbuf_free(pkts_burst[j]);
> > +                     dropped_pkts_udp_tcp++;
> > +                     dropped = true;
> > +             }
> > +             if (dropped == false) {
> > +                     pkts_burst_to_send[i] = pkts_burst[j];
> > +                     i++;
> > +             }
> > +
> > +     }
> > +     return dropped_pkts_udp_tcp+dropped_pkts_ipv4;
> > +}
> > +
> >  /* main processing loop */
> >  int
> >  lpm_main_loop(__rte_unused void *dummy)
> >  {
> >       struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> > +     struct rte_mbuf *pkts_burst_to_send[MAX_PKT_BURST];
> >       unsigned lcore_id;
> >       uint64_t prev_tsc, diff_tsc, cur_tsc;
> >       int i, nb_rx;
> >       uint16_t portid;
> >       uint8_t queueid;
> > +     int dropped;
> >       struct lcore_conf *qconf;
> >       const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
> >               US_PER_S * BURST_TX_DRAIN_US;
> > @@ -209,19 +259,35 @@ lpm_main_loop(__rte_unused void *dummy)
> >                       if (nb_rx == 0)
> >                               continue;
> >
> > +                     if (l3_sft_cksum || l4_sft_cksum) {
> > +                             dropped =
> > check_software_cksum(pkts_burst,
> > +                             pkts_burst_to_send,     nb_rx);
> > +
> > +                             nb_rx = nb_rx-dropped;
> > +                     }
> > +
> > +
> >  #if defined RTE_ARCH_X86 || defined __ARM_NEON \
> >                        || defined RTE_ARCH_PPC_64
> > +             if (l3_sft_cksum == false && l4_sft_cksum == false)
> >                       l3fwd_lpm_send_packets(nb_rx, pkts_burst,
> >                                               portid, qconf);
> > +             else
> > +                     l3fwd_lpm_send_packets(nb_rx,
> > pkts_burst_to_send,
> > +                                             portid, qconf);
> > +
> >  #else
> > -                     l3fwd_lpm_no_opt_send_packets(nb_rx,
> > pkts_burst,
> > +                     if (l3_sft_cksum == false && l4_sft_cksum == false)
> > +                             l3fwd_lpm_no_opt_send_packets(nb_rx,
> > pkts_burst,
> >                                                       portid, qconf);
> > +                     else
> > +                             l3fwd_lpm_no_opt_send_packets(nb_rx,
> > +                             pkts_burst_to_send, portid, qconf);
> > +
> >  #endif /* X86 */
> >               }
> > -
> >               cur_tsc = rte_rdtsc();
> >       }
> > -
> >       return 0;
> >  }
> >
> > diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
> > index 00ac267af1..a54aca070d 100644
> > --- a/examples/l3fwd/main.c
> > +++ b/examples/l3fwd/main.c
> > @@ -61,6 +61,9 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
> >  /**< Ports set in promiscuous mode off by default. */
> >  static int promiscuous_on;
> >
> > +bool l3_sft_cksum;
> > +bool l4_sft_cksum;
> > +
> >  /* Select Longest-Prefix, Exact match or Forwarding Information Base. */
> >  enum L3FWD_LOOKUP_MODE {
> >       L3FWD_LOOKUP_DEFAULT,
> > @@ -123,7 +126,6 @@ static struct rte_eth_conf port_conf = {
> >               .mq_mode = ETH_MQ_RX_RSS,
> >               .max_rx_pkt_len = RTE_ETHER_MAX_LEN,
> >               .split_hdr_size = 0,
> > -             .offloads = DEV_RX_OFFLOAD_CHECKSUM,
> >       },
> >       .rx_adv_conf = {
> >               .rss_conf = {
> > @@ -981,6 +983,7 @@ prepare_ptype_parser(uint16_t portid, uint16_t
> > queueid)
> >       return 0;
> >  }
> >
> > +
> >  static void
> >  l3fwd_poll_resource_setup(void)
> >  {
> > @@ -993,7 +996,8 @@ l3fwd_poll_resource_setup(void)
> >       unsigned int nb_ports;
> >       unsigned int lcore_id;
> >       int ret;
> > -
> > +     l3_sft_cksum = false;
> > +     l4_sft_cksum = false;
> >       if (check_lcore_params() < 0)
> >               rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
> >
> > @@ -1034,11 +1038,34 @@ l3fwd_poll_resource_setup(void)
> >                       rte_exit(EXIT_FAILURE,
> >                               "Error during getting device (port %u)
> info:
> > %s\n",
> >                               portid, strerror(-ret));
> > -
> >               if (dev_info.tx_offload_capa &
> > DEV_TX_OFFLOAD_MBUF_FAST_FREE)
> >                       local_port_conf.txmode.offloads |=
> >                               DEV_TX_OFFLOAD_MBUF_FAST_FREE;
> >
> > +             if (dev_info.rx_offload_capa &
> > DEV_RX_OFFLOAD_IPV4_CKSUM)
> > +                     local_port_conf.rxmode.offloads |=
> > +                     DEV_RX_OFFLOAD_IPV4_CKSUM;
> > +             else {
> > +                     l3_sft_cksum = true;
> > +                     printf("WARNING: IPV4 checksum offload not
> > available.\n");
> > +                     }
> > +
> > +             if (dev_info.rx_offload_capa &
> > DEV_RX_OFFLOAD_UDP_CKSUM)
> > +                     local_port_conf.rxmode.offloads |=
> > +                             DEV_RX_OFFLOAD_UDP_CKSUM;
> > +             else {
> > +                     l4_sft_cksum = true;
> > +                     printf("WARNING: UDP checksum offload not
> > available.\n");
> > +             }
> > +
> > +             if (dev_info.rx_offload_capa &
> > DEV_RX_OFFLOAD_TCP_CKSUM)
> > +                     local_port_conf.rxmode.offloads |=
> > +                             DEV_RX_OFFLOAD_TCP_CKSUM;
> > +             else {
> > +                     l4_sft_cksum = true;
> > +                     printf("WARNING: TCP checksum offload not
> > available.\n");
> > +             }
> > +
> >               local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
> >                       dev_info.flow_type_rss_offloads;
> >
> > --
> > 2.25.1
>
>
diff mbox series

Patch

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index a808d60247..c2c21a91fb 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -55,6 +55,8 @@ 
 #define L3FWD_HASH_ENTRIES		(1024*1024*1)
 #endif
 #define HASH_ENTRY_NUMBER_DEFAULT	4
+extern bool l3_sft_cksum;
+extern bool l4_sft_cksum;
 
 struct mbuf_table {
 	uint16_t len;
@@ -210,6 +212,10 @@  em_main_loop(__rte_unused void *dummy);
 int
 lpm_main_loop(__rte_unused void *dummy);
 
+int
+check_software_cksum(struct rte_mbuf **pkts_burst,
+struct rte_mbuf **pkts_burst_to_send, int nb_rx);
+
 int
 fib_main_loop(__rte_unused void *dummy);
 
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index 232b606b54..ecaf323943 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -26,6 +26,7 @@ 
 #include <rte_udp.h>
 #include <rte_lpm.h>
 #include <rte_lpm6.h>
+#include <rte_net.h>
 
 #include "l3fwd.h"
 #include "l3fwd_event.h"
@@ -139,16 +140,65 @@  lpm_get_dst_port_with_ipv4(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
 #include "l3fwd_lpm.h"
 #endif
 
+
+int check_software_cksum(struct rte_mbuf **pkts_burst,
+struct rte_mbuf **pkts_burst_to_send, int nb_rx)
+{
+	int j;
+	int i = 0;
+	struct rte_net_hdr_lens hdr_lens;
+	struct rte_ipv4_hdr *ipv4_hdr;
+	void *l3_hdr;
+	void *l4_hdr;
+	rte_be16_t prev_cksum;
+	int dropped_pkts_udp_tcp = 0;
+	int dropped_pkts_ipv4 = 0;
+	bool dropped;
+	for (j = 0; j < nb_rx; j++) {
+		dropped = false;
+		rte_net_get_ptype(pkts_burst[j], &hdr_lens, RTE_PTYPE_ALL_MASK);
+		l3_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
+		void *, hdr_lens.l2_len);
+		l4_hdr = rte_pktmbuf_mtod_offset(pkts_burst[j],
+		void *, hdr_lens.l2_len + hdr_lens.l3_len);
+		ipv4_hdr = l3_hdr;
+		prev_cksum = ipv4_hdr->hdr_checksum;
+		ipv4_hdr->hdr_checksum = 0;
+		ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
+
+		if (l3_sft_cksum && prev_cksum != ipv4_hdr->hdr_checksum) {
+			rte_pktmbuf_free(pkts_burst[j]);
+			dropped_pkts_ipv4++;
+			dropped = true;
+		} else if (l4_sft_cksum &&
+				rte_ipv4_udptcp_cksum_verify
+				(l3_hdr, l4_hdr) != 0) {
+
+			rte_pktmbuf_free(pkts_burst[j]);
+			dropped_pkts_udp_tcp++;
+			dropped = true;
+		}
+		if (dropped == false) {
+			pkts_burst_to_send[i] = pkts_burst[j];
+			i++;
+		}
+
+	}
+	return dropped_pkts_udp_tcp+dropped_pkts_ipv4;
+}
+
 /* main processing loop */
 int
 lpm_main_loop(__rte_unused void *dummy)
 {
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+	struct rte_mbuf *pkts_burst_to_send[MAX_PKT_BURST];
 	unsigned lcore_id;
 	uint64_t prev_tsc, diff_tsc, cur_tsc;
 	int i, nb_rx;
 	uint16_t portid;
 	uint8_t queueid;
+	int dropped;
 	struct lcore_conf *qconf;
 	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
 		US_PER_S * BURST_TX_DRAIN_US;
@@ -209,19 +259,35 @@  lpm_main_loop(__rte_unused void *dummy)
 			if (nb_rx == 0)
 				continue;
 
+			if (l3_sft_cksum || l4_sft_cksum) {
+				dropped = check_software_cksum(pkts_burst,
+				pkts_burst_to_send,	nb_rx);
+
+				nb_rx = nb_rx-dropped;
+			}
+
+
 #if defined RTE_ARCH_X86 || defined __ARM_NEON \
 			 || defined RTE_ARCH_PPC_64
+		if (l3_sft_cksum == false && l4_sft_cksum == false)
 			l3fwd_lpm_send_packets(nb_rx, pkts_burst,
 						portid, qconf);
+		else
+			l3fwd_lpm_send_packets(nb_rx, pkts_burst_to_send,
+						portid, qconf);
+
 #else
-			l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
+			if (l3_sft_cksum == false && l4_sft_cksum == false)
+				l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
 							portid, qconf);
+			else
+				l3fwd_lpm_no_opt_send_packets(nb_rx,
+				pkts_burst_to_send, portid, qconf);
+
 #endif /* X86 */
 		}
-
 		cur_tsc = rte_rdtsc();
 	}
-
 	return 0;
 }
 
diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
index 00ac267af1..a54aca070d 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -61,6 +61,9 @@  static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
 /**< Ports set in promiscuous mode off by default. */
 static int promiscuous_on;
 
+bool l3_sft_cksum;
+bool l4_sft_cksum;
+
 /* Select Longest-Prefix, Exact match or Forwarding Information Base. */
 enum L3FWD_LOOKUP_MODE {
 	L3FWD_LOOKUP_DEFAULT,
@@ -123,7 +126,6 @@  static struct rte_eth_conf port_conf = {
 		.mq_mode = ETH_MQ_RX_RSS,
 		.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
 		.split_hdr_size = 0,
-		.offloads = DEV_RX_OFFLOAD_CHECKSUM,
 	},
 	.rx_adv_conf = {
 		.rss_conf = {
@@ -981,6 +983,7 @@  prepare_ptype_parser(uint16_t portid, uint16_t queueid)
 	return 0;
 }
 
+
 static void
 l3fwd_poll_resource_setup(void)
 {
@@ -993,7 +996,8 @@  l3fwd_poll_resource_setup(void)
 	unsigned int nb_ports;
 	unsigned int lcore_id;
 	int ret;
-
+	l3_sft_cksum = false;
+	l4_sft_cksum = false;
 	if (check_lcore_params() < 0)
 		rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
 
@@ -1034,11 +1038,34 @@  l3fwd_poll_resource_setup(void)
 			rte_exit(EXIT_FAILURE,
 				"Error during getting device (port %u) info: %s\n",
 				portid, strerror(-ret));
-
 		if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
 			local_port_conf.txmode.offloads |=
 				DEV_TX_OFFLOAD_MBUF_FAST_FREE;
 
+		if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
+			local_port_conf.rxmode.offloads |=
+			DEV_RX_OFFLOAD_IPV4_CKSUM;
+		else {
+			l3_sft_cksum = true;
+			printf("WARNING: IPV4 checksum offload not available.\n");
+			}
+
+		if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
+			local_port_conf.rxmode.offloads |=
+				DEV_RX_OFFLOAD_UDP_CKSUM;
+		else {
+			l4_sft_cksum = true;
+			printf("WARNING: UDP checksum offload not available.\n");
+		}
+
+		if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
+			local_port_conf.rxmode.offloads |=
+				DEV_RX_OFFLOAD_TCP_CKSUM;
+		else {
+			l4_sft_cksum = true;
+			printf("WARNING: TCP checksum offload not available.\n");
+		}
+
 		local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
 			dev_info.flow_type_rss_offloads;