[15/21] net/cpfl: add AVX512 data path for single queue model

Message ID 20221223015558.3143279-16-mingxia.liu@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series add support for cpfl PMD in DPDK |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Liu, Mingxia Dec. 23, 2022, 1:55 a.m. UTC
  Add support of AVX512 vector data path for single queue model.

Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>
Signed-off-by: Mingxia Liu <mingxia.liu@intel.com>
---
 doc/guides/nics/cpfl.rst                | 24 +++++-
 drivers/net/cpfl/cpfl_ethdev.c          |  3 +-
 drivers/net/cpfl/cpfl_rxtx.c            | 85 +++++++++++++++++++++
 drivers/net/cpfl/cpfl_rxtx_vec_common.h | 99 +++++++++++++++++++++++++
 drivers/net/cpfl/meson.build            | 25 ++++++-
 5 files changed, 233 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/cpfl/cpfl_rxtx_vec_common.h
  

Patch

diff --git a/doc/guides/nics/cpfl.rst b/doc/guides/nics/cpfl.rst
index 064c69ba7d..489a2d6153 100644
--- a/doc/guides/nics/cpfl.rst
+++ b/doc/guides/nics/cpfl.rst
@@ -63,4 +63,26 @@  Runtime Config Options
 Driver compilation and testing
 ------------------------------
 
-Refer to the document :doc:`build_and_test` for details.
\ No newline at end of file
+Refer to the document :doc:`build_and_test` for details.
+
+Features
+--------
+
+Vector PMD
+~~~~~~~~~~
+
+Vector path for Rx and Tx path are selected automatically.
+The paths are chosen based on 2 conditions:
+
+- ``CPU``
+
+  On the x86 platform, the driver checks if the CPU supports AVX512.
+  If the CPU supports AVX512 and EAL argument ``--force-max-simd-bitwidth``
+  is set to 512, AVX512 paths will be chosen.
+
+- ``Offload features``
+
+  The supported HW offload features are described in the document cpfl.ini,
+  A value "P" means the offload feature is not supported by vector path.
+  If any not supported features are used, cpfl vector PMD is disabled
+  and the scalar paths are chosen.
diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
index f684d7cff5..5fe800f27c 100644
--- a/drivers/net/cpfl/cpfl_ethdev.c
+++ b/drivers/net/cpfl/cpfl_ethdev.c
@@ -111,7 +111,8 @@  cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 		RTE_ETH_TX_OFFLOAD_TCP_CKSUM		|
 		RTE_ETH_TX_OFFLOAD_SCTP_CKSUM		|
 		RTE_ETH_TX_OFFLOAD_TCP_TSO		|
-		RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
+		RTE_ETH_TX_OFFLOAD_MULTI_SEGS		|
+		RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
 
 	dev_info->default_txconf = (struct rte_eth_txconf) {
 		.tx_free_thresh = CPFL_DEFAULT_TX_FREE_THRESH,
diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c
index 0e053f4434..63f474a79b 100644
--- a/drivers/net/cpfl/cpfl_rxtx.c
+++ b/drivers/net/cpfl/cpfl_rxtx.c
@@ -8,6 +8,7 @@ 
 
 #include "cpfl_ethdev.h"
 #include "cpfl_rxtx.h"
+#include "cpfl_rxtx_vec_common.h"
 
 static uint64_t
 cpfl_rx_offload_convert(uint64_t offload)
@@ -739,22 +740,106 @@  void
 cpfl_set_rx_function(struct rte_eth_dev *dev)
 {
 	struct idpf_vport *vport = dev->data->dev_private;
+#ifdef RTE_ARCH_X86
+	struct idpf_rx_queue *rxq;
+	int i;
+
+	if (cpfl_rx_vec_dev_check_default(dev) == CPFL_VECTOR_PATH &&
+	    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+		vport->rx_vec_allowed = true;
+
+		if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
+#ifdef CC_AVX512_SUPPORT
+			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+			    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 &&
+			    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512DQ))
+				vport->rx_use_avx512 = true;
+#else
+		PMD_DRV_LOG(NOTICE,
+			    "AVX512 is not supported in build env");
+#endif /* CC_AVX512_SUPPORT */
+	} else {
+		vport->rx_vec_allowed = false;
+	}
+#endif /* RTE_ARCH_X86 */
+
+#ifdef RTE_ARCH_X86
+	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
+		dev->rx_pkt_burst = idpf_splitq_recv_pkts;
+	} else {
+		if (vport->rx_vec_allowed) {
+			for (i = 0; i < dev->data->nb_tx_queues; i++) {
+				rxq = dev->data->rx_queues[i];
+				(void)idpf_singleq_rx_vec_setup(rxq);
+			}
+#ifdef CC_AVX512_SUPPORT
+			if (vport->rx_use_avx512) {
+				dev->rx_pkt_burst = idpf_singleq_recv_pkts_avx512;
+				return;
+			}
+#endif /* CC_AVX512_SUPPORT */
+		}
 
+		dev->rx_pkt_burst = idpf_singleq_recv_pkts;
+	}
+#else
 	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT)
 		dev->rx_pkt_burst = idpf_splitq_recv_pkts;
 	else
 		dev->rx_pkt_burst = idpf_singleq_recv_pkts;
+#endif /* RTE_ARCH_X86 */
 }
 
 void
 cpfl_set_tx_function(struct rte_eth_dev *dev)
 {
 	struct idpf_vport *vport = dev->data->dev_private;
+#ifdef RTE_ARCH_X86
+#ifdef CC_AVX512_SUPPORT
+	struct idpf_tx_queue *txq;
+	int i;
+#endif /* CC_AVX512_SUPPORT */
+
+	if (cpfl_tx_vec_dev_check_default(dev) == CPFL_VECTOR_PATH &&
+	    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+		vport->tx_vec_allowed = true;
+		if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
+#ifdef CC_AVX512_SUPPORT
+		{
+			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+			    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)
+				vport->tx_use_avx512 = true;
+			if (vport->tx_use_avx512) {
+				for (i = 0; i < dev->data->nb_tx_queues; i++) {
+					txq = dev->data->tx_queues[i];
+					idpf_tx_vec_setup_avx512(txq);
+				}
+			}
+		}
+#else
+		PMD_DRV_LOG(NOTICE,
+			    "AVX512 is not supported in build env");
+#endif /* CC_AVX512_SUPPORT */
+	} else {
+		vport->tx_vec_allowed = false;
+	}
+#endif /* RTE_ARCH_X86 */
 
 	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
 		dev->tx_pkt_burst = idpf_splitq_xmit_pkts;
 		dev->tx_pkt_prepare = idpf_prep_pkts;
 	} else {
+#ifdef RTE_ARCH_X86
+		if (vport->tx_vec_allowed) {
+#ifdef CC_AVX512_SUPPORT
+			if (vport->tx_use_avx512) {
+				dev->tx_pkt_burst = idpf_singleq_xmit_pkts_avx512;
+				dev->tx_pkt_prepare = idpf_prep_pkts;
+				return;
+			}
+#endif /* CC_AVX512_SUPPORT */
+		}
+#endif /* RTE_ARCH_X86 */
 		dev->tx_pkt_burst = idpf_singleq_xmit_pkts;
 		dev->tx_pkt_prepare = idpf_prep_pkts;
 	}
diff --git a/drivers/net/cpfl/cpfl_rxtx_vec_common.h b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
new file mode 100644
index 0000000000..a411cf6a32
--- /dev/null
+++ b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
@@ -0,0 +1,99 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _CPFL_RXTX_VEC_COMMON_H_
+#define _CPFL_RXTX_VEC_COMMON_H_
+#include <stdint.h>
+#include <ethdev_driver.h>
+#include <rte_malloc.h>
+
+#include "cpfl_ethdev.h"
+#include "cpfl_rxtx.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#define CPFL_VECTOR_PATH		0
+#define ICE_RX_NO_VECTOR_FLAGS (		\
+		RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |	\
+		RTE_ETH_RX_OFFLOAD_UDP_CKSUM |	\
+		RTE_ETH_RX_OFFLOAD_TCP_CKSUM |	\
+		RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM |	\
+		RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+#define ICE_TX_NO_VECTOR_FLAGS (		\
+		RTE_ETH_TX_OFFLOAD_TCP_TSO |	\
+		RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
+
+static inline int
+cpfl_rx_vec_queue_default(struct idpf_rx_queue *rxq)
+{
+	if (rxq == NULL)
+		return -1;
+
+	if (rte_is_power_of_2(rxq->nb_rx_desc) == 0)
+		return -1;
+
+	if (rxq->rx_free_thresh < IDPF_VPMD_RX_MAX_BURST)
+		return -1;
+
+	if ((rxq->nb_rx_desc % rxq->rx_free_thresh) != 0)
+		return -1;
+
+	if ((rxq->offloads & ICE_RX_NO_VECTOR_FLAGS) != 0)
+		return -1;
+
+	return CPFL_VECTOR_PATH;
+}
+
+static inline int
+cpfl_tx_vec_queue_default(struct idpf_tx_queue *txq)
+{
+	if (txq == NULL)
+		return -1;
+
+	if (txq->rs_thresh < IDPF_VPMD_TX_MAX_BURST ||
+	    (txq->rs_thresh & 3) != 0)
+		return -1;
+
+	if ((txq->offloads & ICE_TX_NO_VECTOR_FLAGS) != 0)
+		return -1;
+
+	return CPFL_VECTOR_PATH;
+}
+
+static inline int
+cpfl_rx_vec_dev_check_default(struct rte_eth_dev *dev)
+{
+	struct idpf_rx_queue *rxq;
+	int i, ret = 0;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		rxq = dev->data->rx_queues[i];
+		ret = (cpfl_rx_vec_queue_default(rxq));
+		if (ret < 0)
+			return -1;
+	}
+
+	return CPFL_VECTOR_PATH;
+}
+
+static inline int
+cpfl_tx_vec_dev_check_default(struct rte_eth_dev *dev)
+{
+	int i;
+	struct idpf_tx_queue *txq;
+	int ret = 0;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		txq = dev->data->tx_queues[i];
+		ret = cpfl_tx_vec_queue_default(txq);
+		if (ret < 0)
+			return -1;
+	}
+
+	return CPFL_VECTOR_PATH;
+}
+
+#endif /*_CPFL_RXTX_VEC_COMMON_H_*/
diff --git a/drivers/net/cpfl/meson.build b/drivers/net/cpfl/meson.build
index 3ccee15703..40ed8dbb7b 100644
--- a/drivers/net/cpfl/meson.build
+++ b/drivers/net/cpfl/meson.build
@@ -7,9 +7,32 @@  if is_windows
     subdir_done()
 endif
 
+if dpdk_conf.get('RTE_IOVA_AS_PA') == 0
+    build = false
+    reason = 'driver does not support disabling IOVA as PA mode'
+    subdir_done()
+endif
+
 deps += ['common_idpf']
 
 sources = files(
         'cpfl_ethdev.c',
         'cpfl_rxtx.c',
-)
\ No newline at end of file
+)
+
+if arch_subdir == 'x86'
+    cpfl_avx512_cpu_support = (
+        cc.get_define('__AVX512F__', args: machine_args) != '' and
+        cc.get_define('__AVX512BW__', args: machine_args) != ''
+    )
+
+    cpfl_avx512_cc_support = (
+        not machine_args.contains('-mno-avx512f') and
+        cc.has_argument('-mavx512f') and
+        cc.has_argument('-mavx512bw')
+    )
+
+    if cpfl_avx512_cpu_support == true or cpfl_avx512_cc_support == true
+        cflags += ['-DCC_AVX512_SUPPORT']
+    endif
+endif