[v4] net/i40e: add interface to choose latest vector path
Checks
Commit Message
Right now, vector path is limited to only use on later platform.
This patch adds a devarg use-latest-vec to allow the users to
use the latest vector path that the platform supported. Namely,
using AVX2 vector path on broadwell is possible.
Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
---
v4:
* Polish the codes.
v3:
* Polish the doc and commit log.
v2:
* Correct the calling of the wrong function last time.
* Fix seg fault bug.
---
doc/guides/nics/i40e.rst | 8 ++
doc/guides/rel_notes/release_18_11.rst | 4 +
drivers/net/i40e/i40e_ethdev.c | 46 ++++++++++-
drivers/net/i40e/i40e_ethdev.h | 3 +
drivers/net/i40e/i40e_rxtx.c | 103 ++++++++++++++++---------
5 files changed, 128 insertions(+), 36 deletions(-)
Comments
> -----Original Message-----
> From: Li, Xiaoyun
> Sent: Monday, September 10, 2018 6:18 PM
> To: Xing, Beilei <beilei.xing@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>
> Cc: dev@dpdk.org; Yang, Zhiyong <zhiyong.yang@intel.com>; Richardson,
> Bruce <bruce.richardson@intel.com>; Hunt, David <david.hunt@intel.com>; Li,
> Xiaoyun <xiaoyun.li@intel.com>
> Subject: [PATCH v4] net/i40e: add interface to choose latest vector path
>
> Right now, vector path is limited to only use on later platform.
> This patch adds a devarg use-latest-vec to allow the users to use the latest
> vector path that the platform supported. Namely, using AVX2 vector path on
> broadwell is possible.
>
> Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
> ---
> v4:
> * Polish the codes.
> v3:
> * Polish the doc and commit log.
> v2:
> * Correct the calling of the wrong function last time.
> * Fix seg fault bug.
> ---
> doc/guides/nics/i40e.rst | 8 ++
> doc/guides/rel_notes/release_18_11.rst | 4 +
> drivers/net/i40e/i40e_ethdev.c | 46 ++++++++++-
> drivers/net/i40e/i40e_ethdev.h | 3 +
> drivers/net/i40e/i40e_rxtx.c | 103 ++++++++++++++++---------
> 5 files changed, 128 insertions(+), 36 deletions(-)
>
> diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst index
> 65d87f869..643e6a062 100644
> --- a/doc/guides/nics/i40e.rst
> +++ b/doc/guides/nics/i40e.rst
> @@ -163,6 +163,14 @@ Runtime Config Options
> Currently hot-plugging of representor ports is not supported so all required
> representors must be specified on the creation of the PF.
>
> +- ``Use latest vector`` (default ``disable``)
> +
> + Vector path was limited to use only on later platform. But users may
> + want the latest vector path. For example, VPP users may want to use
> + AVX2 vector path on HSW/BDW because it can get better perf. So
> + ``devargs`` parameter ``use-latest-vec`` is introduced, for example::
> + -w 84:00.0,use-latest-vec=1
> +
> Driver compilation and testing
> ------------------------------
>
> diff --git a/doc/guides/rel_notes/release_18_11.rst
> b/doc/guides/rel_notes/release_18_11.rst
> index 3ae6b3f58..34af591a2 100644
> --- a/doc/guides/rel_notes/release_18_11.rst
> +++ b/doc/guides/rel_notes/release_18_11.rst
> @@ -54,6 +54,10 @@ New Features
> Also, make sure to start the actual text at the margin.
> =========================================================
>
> +* **Added a devarg to use the latest vector path.**
> + A new devarg ``use-latest-vec`` was introduced to allow users to
> +choose
> + the latest vector path that the platform supported. For example, VPP
> +users
> + can use AVX2 vector path on BDW/HSW to get better performance.
>
> API Changes
> -----------
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 85a6a867f..72377d0b6 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -44,6 +44,7 @@
> #define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list"
> #define ETH_I40E_SUPPORT_MULTI_DRIVER "support-multi-driver"
> #define ETH_I40E_QUEUE_NUM_PER_VF_ARG "queue-num-per-vf"
> +#define ETH_I40E_USE_LATEST_VEC "use-latest-vec"
>
> #define I40E_CLEAR_PXE_WAIT_MS 200
>
> @@ -408,6 +409,7 @@ static const char *const valid_keys[] = {
> ETH_I40E_FLOATING_VEB_LIST_ARG,
> ETH_I40E_SUPPORT_MULTI_DRIVER,
> ETH_I40E_QUEUE_NUM_PER_VF_ARG,
> + ETH_I40E_USE_LATEST_VEC,
> NULL};
>
> static const struct rte_pci_id pci_id_i40e_map[] = { @@ -1201,6 +1203,46 @@
> i40e_aq_debug_write_global_register(struct i40e_hw *hw,
> return i40e_aq_debug_write_register(hw, reg_addr, reg_val,
> cmd_details); }
>
> +static int
> +i40e_parse_latest_vec(struct rte_eth_dev *dev) {
> + struct i40e_adapter *ad =
> + I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> + int kvargs_count, use_latest_vec;
> + struct rte_kvargs *kvlist;
> +
> + ad->use_latest_vec = false;
> +
> + if (!dev->device->devargs)
> + return 0;
> +
> + kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
> + if (!kvlist)
> + return -EINVAL;
> +
> + kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_USE_LATEST_VEC);
> + if (!kvargs_count) {
> + rte_kvargs_free(kvlist);
> + return 0;
> + }
> +
> + if (kvargs_count > 1)
> + PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and
> only "
> + "the first one is used !",
> + ETH_I40E_USE_LATEST_VEC);
> +
> + use_latest_vec = atoi((&kvlist->pairs[0])->value);
> +
> + rte_kvargs_free(kvlist);
> +
> + if (use_latest_vec != 0 && use_latest_vec != 1)
> + PMD_DRV_LOG(WARNING, "Value should be 0 or 1, set it as 1!");
> +
> + ad->use_latest_vec = (bool)use_latest_vec;
> +
> + return 0;
> +}
> +
> static int
> eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
> { @@ -1263,6 +1305,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void
> *init_params __rte_unused)
>
> /* Check if need to support multi-driver */
> i40e_support_multi_driver(dev);
> + i40e_parse_latest_vec(dev);
>
> /* Make sure all is clean before doing PF reset */
> i40e_clear_hw(hw);
> @@ -12527,4 +12570,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_i40e,
> ETH_I40E_FLOATING_VEB_ARG "=1"
> ETH_I40E_FLOATING_VEB_LIST_ARG "=<string>"
> ETH_I40E_QUEUE_NUM_PER_VF_ARG "=1|2|4|8|16"
> - ETH_I40E_SUPPORT_MULTI_DRIVER "=1");
> + ETH_I40E_SUPPORT_MULTI_DRIVER "=1"
> + ETH_I40E_USE_LATEST_VEC "=1");
> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> index 3fffe5a55..140c92b84 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -1078,6 +1078,9 @@ struct i40e_adapter {
> uint64_t pctypes_tbl[I40E_FLOW_TYPE_MAX] __rte_cache_min_aligned;
> uint64_t flow_types_mask;
> uint64_t pctypes_mask;
> +
> + /* For devargs */
> + bool use_latest_vec;
> };
>
> /**
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
> 2a28ee348..e9fa7ed90 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -2909,6 +2909,34 @@ i40e_txq_info_get(struct rte_eth_dev *dev,
> uint16_t queue_id,
> qinfo->conf.offloads = txq->offloads;
> }
>
> +static eth_rx_burst_t
> +i40e_get_latest_rx_vec(bool scatter)
> +{
> +#ifdef RTE_ARCH_X86
> + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> + return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
> + i40e_recv_pkts_vec_avx2;
> +#endif
> + return scatter ? i40e_recv_scattered_pkts_vec :
> + i40e_recv_pkts_vec;
> +}
> +
> +static eth_rx_burst_t
> +i40e_get_recommend_rx_vec(bool scatter) { #ifdef RTE_ARCH_X86
> + /*
> + * since AVX frequency can be different to base frequency, limit
> + * use of AVX2 version to later plaforms, not all those that could
> + * theoretically run it.
> + */
> + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> + return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
> + i40e_recv_pkts_vec_avx2;
> +#endif
> + return scatter ? i40e_recv_scattered_pkts_vec :
> + i40e_recv_pkts_vec;
> +}
> void __attribute__((cold))
> i40e_set_rx_function(struct rte_eth_dev *dev) { @@ -2948,19 +2976,12
> @@ i40e_set_rx_function(struct rte_eth_dev *dev)
> PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
> "callback (port=%d).",
> dev->data->port_id);
> -
> - dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec;
> -#ifdef RTE_ARCH_X86
> - /*
> - * since AVX frequency can be different to base
> - * frequency, limit use of AVX2 version to later
> - * plaforms, not all those that could theoretically
> - * run it.
> - */
> - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> + if (ad->use_latest_vec)
> dev->rx_pkt_burst =
> - i40e_recv_scattered_pkts_vec_avx2;
> -#endif
> + i40e_get_latest_rx_vec(true);
> + else
> + dev->rx_pkt_burst =
> + i40e_get_recommend_rx_vec(true);
> } else {
> PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
> "allocation callback (port=%d).", @@ -2978,18
> +2999,10 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
> "burst size no less than %d (port=%d).",
> RTE_I40E_DESCS_PER_LOOP,
> dev->data->port_id);
> -
> - dev->rx_pkt_burst = i40e_recv_pkts_vec;
> -#ifdef RTE_ARCH_X86
> - /*
> - * since AVX frequency can be different to base
> - * frequency, limit use of AVX2 version to later
> - * plaforms, not all those that could theoretically
> - * run it.
> - */
> - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> - dev->rx_pkt_burst = i40e_recv_pkts_vec_avx2;
> -#endif
> + if (ad->use_latest_vec)
> + dev->rx_pkt_burst = i40e_get_latest_rx_vec(false);
> + else
> + dev->rx_pkt_burst = i40e_get_recommend_rx_vec(false);
How about simplify the code as below?
/* default */
dev->rx_pkt_burst = dev->data->scattered_rx ?
i40e_recv_scattered_pkts : i40e_recv_pkts;
if (ad->rx_vec_allowed) {
/* overwrite by vec path*/
if (ad->use_latest_vec)
dev->rx_pkt_burst = i40e_get_latest_rx_vec(dev->data->scattered_rx);
else
dev->rx_pkt_burst = i40e_get_recommend_rx_vec(dev->data->scattered_rx);
} else if (ad->rx_bulk_alloc_allowed) {
/* or overwrite by bulk alloc */
dev->rx_pkt_burst = i40e_recv_pkts_bulk_alloc;
}
> } else if (ad->rx_bulk_alloc_allowed) {
> PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
> "satisfied. Rx Burst Bulk Alloc function "
> @@ -3049,6 +3062,31 @@ i40e_set_tx_function_flag(struct rte_eth_dev *dev,
> struct i40e_tx_queue *txq)
> txq->queue_id);
> }
>
> +static eth_tx_burst_t
> +i40e_get_latest_tx_vec(void)
> +{
> +#ifdef RTE_ARCH_X86
> + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> + return i40e_xmit_pkts_vec_avx2;
> +#endif
> + return i40e_xmit_pkts_vec;
> +}
> +
> +static eth_tx_burst_t
> +i40e_get_recommend_tx_vec(void)
> +{
> +#ifdef RTE_ARCH_X86
> + /*
> + * since AVX frequency can be different to base frequency, limit
> + * use of AVX2 version to later plaforms, not all those that could
> + * theoretically run it.
> + */
> + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> + return i40e_xmit_pkts_vec_avx2;
> +#endif
> + return i40e_xmit_pkts_vec;
> +}
> +
> void __attribute__((cold))
> i40e_set_tx_function(struct rte_eth_dev *dev) { @@ -3073,17 +3111,12
> @@ i40e_set_tx_function(struct rte_eth_dev *dev)
> if (ad->tx_simple_allowed) {
> if (ad->tx_vec_allowed) {
> PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
> - dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> -#ifdef RTE_ARCH_X86
> - /*
> - * since AVX frequency can be different to base
> - * frequency, limit use of AVX2 version to later
> - * plaforms, not all those that could theoretically
> - * run it.
> - */
> - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> - dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx2;
> -#endif
> + if (ad->use_latest_vec)
> + dev->tx_pkt_burst =
> + i40e_get_latest_tx_vec();
> + else
> + dev->tx_pkt_burst =
> + i40e_get_recommend_tx_vec();
> } else {
> PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
> dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> --
> 2.17.1
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Zhang, Qi Z
> Sent: Wednesday, September 12, 2018 3:45 PM
> To: Li, Xiaoyun <xiaoyun.li@intel.com>; Xing, Beilei <beilei.xing@intel.com>
> Cc: dev@dpdk.org; Yang, Zhiyong <zhiyong.yang@intel.com>; Richardson,
> Bruce <bruce.richardson@intel.com>; Hunt, David <david.hunt@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v4] net/i40e: add interface to choose latest
> vector path
>
>
>
> > -----Original Message-----
> > From: Li, Xiaoyun
> > Sent: Monday, September 10, 2018 6:18 PM
> > To: Xing, Beilei <beilei.xing@intel.com>; Zhang, Qi Z
> > <qi.z.zhang@intel.com>
> > Cc: dev@dpdk.org; Yang, Zhiyong <zhiyong.yang@intel.com>; Richardson,
> > Bruce <bruce.richardson@intel.com>; Hunt, David
> > <david.hunt@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>
> > Subject: [PATCH v4] net/i40e: add interface to choose latest vector
> > path
> >
> > Right now, vector path is limited to only use on later platform.
> > This patch adds a devarg use-latest-vec to allow the users to use the
> > latest vector path that the platform supported. Namely, using AVX2
> > vector path on broadwell is possible.
> >
> > Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
> > ---
> > v4:
> > * Polish the codes.
> > v3:
> > * Polish the doc and commit log.
> > v2:
> > * Correct the calling of the wrong function last time.
> > * Fix seg fault bug.
> > ---
> > doc/guides/nics/i40e.rst | 8 ++
> > doc/guides/rel_notes/release_18_11.rst | 4 +
> > drivers/net/i40e/i40e_ethdev.c | 46 ++++++++++-
> > drivers/net/i40e/i40e_ethdev.h | 3 +
> > drivers/net/i40e/i40e_rxtx.c | 103 ++++++++++++++++---------
> > 5 files changed, 128 insertions(+), 36 deletions(-)
> >
> > diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst index
> > 65d87f869..643e6a062 100644
> > --- a/doc/guides/nics/i40e.rst
> > +++ b/doc/guides/nics/i40e.rst
> > @@ -163,6 +163,14 @@ Runtime Config Options
> > Currently hot-plugging of representor ports is not supported so all
> required
> > representors must be specified on the creation of the PF.
> >
> > +- ``Use latest vector`` (default ``disable``)
> > +
> > + Vector path was limited to use only on later platform. But users
> > + may want the latest vector path. For example, VPP users may want to
> > + use
> > + AVX2 vector path on HSW/BDW because it can get better perf. So
> > + ``devargs`` parameter ``use-latest-vec`` is introduced, for example::
> > + -w 84:00.0,use-latest-vec=1
> > +
> > Driver compilation and testing
> > ------------------------------
> >
> > diff --git a/doc/guides/rel_notes/release_18_11.rst
> > b/doc/guides/rel_notes/release_18_11.rst
> > index 3ae6b3f58..34af591a2 100644
> > --- a/doc/guides/rel_notes/release_18_11.rst
> > +++ b/doc/guides/rel_notes/release_18_11.rst
> > @@ -54,6 +54,10 @@ New Features
> > Also, make sure to start the actual text at the margin.
> > =========================================================
> >
> > +* **Added a devarg to use the latest vector path.**
> > + A new devarg ``use-latest-vec`` was introduced to allow users to
> > +choose
> > + the latest vector path that the platform supported. For example,
> > +VPP users
> > + can use AVX2 vector path on BDW/HSW to get better performance.
> >
> > API Changes
> > -----------
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index 85a6a867f..72377d0b6 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -44,6 +44,7 @@
> > #define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list"
> > #define ETH_I40E_SUPPORT_MULTI_DRIVER "support-multi-driver"
> > #define ETH_I40E_QUEUE_NUM_PER_VF_ARG "queue-num-per-vf"
> > +#define ETH_I40E_USE_LATEST_VEC "use-latest-vec"
> >
> > #define I40E_CLEAR_PXE_WAIT_MS 200
> >
> > @@ -408,6 +409,7 @@ static const char *const valid_keys[] = {
> > ETH_I40E_FLOATING_VEB_LIST_ARG,
> > ETH_I40E_SUPPORT_MULTI_DRIVER,
> > ETH_I40E_QUEUE_NUM_PER_VF_ARG,
> > + ETH_I40E_USE_LATEST_VEC,
> > NULL};
> >
> > static const struct rte_pci_id pci_id_i40e_map[] = { @@ -1201,6
> > +1203,46 @@ i40e_aq_debug_write_global_register(struct i40e_hw *hw,
> > return i40e_aq_debug_write_register(hw, reg_addr, reg_val,
> > cmd_details); }
> >
> > +static int
> > +i40e_parse_latest_vec(struct rte_eth_dev *dev) {
> > + struct i40e_adapter *ad =
> > + I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> > + int kvargs_count, use_latest_vec;
> > + struct rte_kvargs *kvlist;
> > +
> > + ad->use_latest_vec = false;
> > +
> > + if (!dev->device->devargs)
> > + return 0;
> > +
> > + kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
> > + if (!kvlist)
> > + return -EINVAL;
> > +
> > + kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_USE_LATEST_VEC);
> > + if (!kvargs_count) {
> > + rte_kvargs_free(kvlist);
> > + return 0;
> > + }
> > +
> > + if (kvargs_count > 1)
> > + PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and
> > only "
> > + "the first one is used !",
> > + ETH_I40E_USE_LATEST_VEC);
> > +
> > + use_latest_vec = atoi((&kvlist->pairs[0])->value);
> > +
> > + rte_kvargs_free(kvlist);
> > +
> > + if (use_latest_vec != 0 && use_latest_vec != 1)
> > + PMD_DRV_LOG(WARNING, "Value should be 0 or 1, set it as 1!");
> > +
> > + ad->use_latest_vec = (bool)use_latest_vec;
> > +
> > + return 0;
> > +}
> > +
> > static int
> > eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params
> > __rte_unused) { @@ -1263,6 +1305,7 @@ eth_i40e_dev_init(struct
> > rte_eth_dev *dev, void *init_params __rte_unused)
> >
> > /* Check if need to support multi-driver */
> > i40e_support_multi_driver(dev);
> > + i40e_parse_latest_vec(dev);
> >
> > /* Make sure all is clean before doing PF reset */
> > i40e_clear_hw(hw);
> > @@ -12527,4 +12570,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_i40e,
> > ETH_I40E_FLOATING_VEB_ARG "=1"
> > ETH_I40E_FLOATING_VEB_LIST_ARG "=<string>"
> > ETH_I40E_QUEUE_NUM_PER_VF_ARG "=1|2|4|8|16"
> > - ETH_I40E_SUPPORT_MULTI_DRIVER "=1");
> > + ETH_I40E_SUPPORT_MULTI_DRIVER "=1"
> > + ETH_I40E_USE_LATEST_VEC "=1");
> > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > b/drivers/net/i40e/i40e_ethdev.h index 3fffe5a55..140c92b84 100644
> > --- a/drivers/net/i40e/i40e_ethdev.h
> > +++ b/drivers/net/i40e/i40e_ethdev.h
> > @@ -1078,6 +1078,9 @@ struct i40e_adapter {
> > uint64_t pctypes_tbl[I40E_FLOW_TYPE_MAX] __rte_cache_min_aligned;
> > uint64_t flow_types_mask;
> > uint64_t pctypes_mask;
> > +
> > + /* For devargs */
> > + bool use_latest_vec;
> > };
> >
> > /**
> > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > b/drivers/net/i40e/i40e_rxtx.c index
> > 2a28ee348..e9fa7ed90 100644
> > --- a/drivers/net/i40e/i40e_rxtx.c
> > +++ b/drivers/net/i40e/i40e_rxtx.c
> > @@ -2909,6 +2909,34 @@ i40e_txq_info_get(struct rte_eth_dev *dev,
> > uint16_t queue_id,
> > qinfo->conf.offloads = txq->offloads; }
> >
> > +static eth_rx_burst_t
> > +i40e_get_latest_rx_vec(bool scatter)
> > +{
> > +#ifdef RTE_ARCH_X86
> > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> > + return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
> > + i40e_recv_pkts_vec_avx2;
> > +#endif
> > + return scatter ? i40e_recv_scattered_pkts_vec :
> > + i40e_recv_pkts_vec;
> > +}
> > +
> > +static eth_rx_burst_t
> > +i40e_get_recommend_rx_vec(bool scatter) { #ifdef RTE_ARCH_X86
> > + /*
> > + * since AVX frequency can be different to base frequency, limit
> > + * use of AVX2 version to later plaforms, not all those that could
> > + * theoretically run it.
> > + */
> > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> > + return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
> > + i40e_recv_pkts_vec_avx2;
> > +#endif
> > + return scatter ? i40e_recv_scattered_pkts_vec :
> > + i40e_recv_pkts_vec;
> > +}
> > void __attribute__((cold))
> > i40e_set_rx_function(struct rte_eth_dev *dev) { @@ -2948,19 +2976,12
> > @@ i40e_set_rx_function(struct rte_eth_dev *dev)
> > PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
> > "callback (port=%d).",
> > dev->data->port_id);
> > -
> > - dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec;
> > -#ifdef RTE_ARCH_X86
> > - /*
> > - * since AVX frequency can be different to base
> > - * frequency, limit use of AVX2 version to later
> > - * plaforms, not all those that could theoretically
> > - * run it.
> > - */
> > - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> > + if (ad->use_latest_vec)
> > dev->rx_pkt_burst =
> > - i40e_recv_scattered_pkts_vec_avx2;
> > -#endif
> > + i40e_get_latest_rx_vec(true);
> > + else
> > + dev->rx_pkt_burst =
> > + i40e_get_recommend_rx_vec(true);
> > } else {
> > PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
> > "allocation callback (port=%d).", @@ -2978,18
> > +2999,10 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
> > "burst size no less than %d (port=%d).",
> > RTE_I40E_DESCS_PER_LOOP,
> > dev->data->port_id);
> > -
> > - dev->rx_pkt_burst = i40e_recv_pkts_vec;
> > -#ifdef RTE_ARCH_X86
> > - /*
> > - * since AVX frequency can be different to base
> > - * frequency, limit use of AVX2 version to later
> > - * plaforms, not all those that could theoretically
> > - * run it.
> > - */
> > - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> > - dev->rx_pkt_burst = i40e_recv_pkts_vec_avx2;
> > -#endif
> > + if (ad->use_latest_vec)
> > + dev->rx_pkt_burst = i40e_get_latest_rx_vec(false);
> > + else
> > + dev->rx_pkt_burst = i40e_get_recommend_rx_vec(false);
>
>
> How about simplify the code as below?
>
> /* default */
> dev->rx_pkt_burst = dev->data->scattered_rx ?
> i40e_recv_scattered_pkts : i40e_recv_pkts;
>
> if (ad->rx_vec_allowed) {
> /* overwrite by vec path*/
> if (ad->use_latest_vec)
> dev->rx_pkt_burst =
> i40e_get_latest_rx_vec(dev->data->scattered_rx);
> else
> dev->rx_pkt_burst =
> i40e_get_recommend_rx_vec(dev->data->scattered_rx);
> } else if (ad->rx_bulk_alloc_allowed) {
Sorry, I think it should be:
else if (ad->rx_bulk_alloc_allowed && ! dev->data->scattered_rx) {
> /* or overwrite by bulk alloc */
> dev->rx_pkt_burst = i40e_recv_pkts_bulk_alloc; }
>
>
> > } else if (ad->rx_bulk_alloc_allowed) {
> > PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
> > "satisfied. Rx Burst Bulk Alloc function "
> > @@ -3049,6 +3062,31 @@ i40e_set_tx_function_flag(struct rte_eth_dev
> > *dev, struct i40e_tx_queue *txq)
> > txq->queue_id);
> > }
> >
> > +static eth_tx_burst_t
> > +i40e_get_latest_tx_vec(void)
> > +{
> > +#ifdef RTE_ARCH_X86
> > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> > + return i40e_xmit_pkts_vec_avx2;
> > +#endif
> > + return i40e_xmit_pkts_vec;
> > +}
> > +
> > +static eth_tx_burst_t
> > +i40e_get_recommend_tx_vec(void)
> > +{
> > +#ifdef RTE_ARCH_X86
> > + /*
> > + * since AVX frequency can be different to base frequency, limit
> > + * use of AVX2 version to later plaforms, not all those that could
> > + * theoretically run it.
> > + */
> > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> > + return i40e_xmit_pkts_vec_avx2;
> > +#endif
> > + return i40e_xmit_pkts_vec;
> > +}
> > +
> > void __attribute__((cold))
> > i40e_set_tx_function(struct rte_eth_dev *dev) { @@ -3073,17 +3111,12
> > @@ i40e_set_tx_function(struct rte_eth_dev *dev)
> > if (ad->tx_simple_allowed) {
> > if (ad->tx_vec_allowed) {
> > PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
> > - dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> > -#ifdef RTE_ARCH_X86
> > - /*
> > - * since AVX frequency can be different to base
> > - * frequency, limit use of AVX2 version to later
> > - * plaforms, not all those that could theoretically
> > - * run it.
> > - */
> > - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> > - dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx2;
> > -#endif
> > + if (ad->use_latest_vec)
> > + dev->tx_pkt_burst =
> > + i40e_get_latest_tx_vec();
> > + else
> > + dev->tx_pkt_burst =
> > + i40e_get_recommend_tx_vec();
> > } else {
> > PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
> > dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> > --
> > 2.17.1
@@ -163,6 +163,14 @@ Runtime Config Options
Currently hot-plugging of representor ports is not supported so all required
representors must be specified on the creation of the PF.
+- ``Use latest vector`` (default ``disable``)
+
+ Vector path was limited to use only on later platform. But users may want the
+ latest vector path. For example, VPP users may want to use AVX2 vector path on HSW/BDW
+ because it can get better perf. So ``devargs`` parameter ``use-latest-vec`` is
+ introduced, for example::
+ -w 84:00.0,use-latest-vec=1
+
Driver compilation and testing
------------------------------
@@ -54,6 +54,10 @@ New Features
Also, make sure to start the actual text at the margin.
=========================================================
+* **Added a devarg to use the latest vector path.**
+ A new devarg ``use-latest-vec`` was introduced to allow users to choose
+ the latest vector path that the platform supported. For example, VPP users
+ can use AVX2 vector path on BDW/HSW to get better performance.
API Changes
-----------
@@ -44,6 +44,7 @@
#define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list"
#define ETH_I40E_SUPPORT_MULTI_DRIVER "support-multi-driver"
#define ETH_I40E_QUEUE_NUM_PER_VF_ARG "queue-num-per-vf"
+#define ETH_I40E_USE_LATEST_VEC "use-latest-vec"
#define I40E_CLEAR_PXE_WAIT_MS 200
@@ -408,6 +409,7 @@ static const char *const valid_keys[] = {
ETH_I40E_FLOATING_VEB_LIST_ARG,
ETH_I40E_SUPPORT_MULTI_DRIVER,
ETH_I40E_QUEUE_NUM_PER_VF_ARG,
+ ETH_I40E_USE_LATEST_VEC,
NULL};
static const struct rte_pci_id pci_id_i40e_map[] = {
@@ -1201,6 +1203,46 @@ i40e_aq_debug_write_global_register(struct i40e_hw *hw,
return i40e_aq_debug_write_register(hw, reg_addr, reg_val, cmd_details);
}
+static int
+i40e_parse_latest_vec(struct rte_eth_dev *dev)
+{
+ struct i40e_adapter *ad =
+ I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+ int kvargs_count, use_latest_vec;
+ struct rte_kvargs *kvlist;
+
+ ad->use_latest_vec = false;
+
+ if (!dev->device->devargs)
+ return 0;
+
+ kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
+ if (!kvlist)
+ return -EINVAL;
+
+ kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_USE_LATEST_VEC);
+ if (!kvargs_count) {
+ rte_kvargs_free(kvlist);
+ return 0;
+ }
+
+ if (kvargs_count > 1)
+ PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only "
+ "the first one is used !",
+ ETH_I40E_USE_LATEST_VEC);
+
+ use_latest_vec = atoi((&kvlist->pairs[0])->value);
+
+ rte_kvargs_free(kvlist);
+
+ if (use_latest_vec != 0 && use_latest_vec != 1)
+ PMD_DRV_LOG(WARNING, "Value should be 0 or 1, set it as 1!");
+
+ ad->use_latest_vec = (bool)use_latest_vec;
+
+ return 0;
+}
+
static int
eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
{
@@ -1263,6 +1305,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
/* Check if need to support multi-driver */
i40e_support_multi_driver(dev);
+ i40e_parse_latest_vec(dev);
/* Make sure all is clean before doing PF reset */
i40e_clear_hw(hw);
@@ -12527,4 +12570,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_i40e,
ETH_I40E_FLOATING_VEB_ARG "=1"
ETH_I40E_FLOATING_VEB_LIST_ARG "=<string>"
ETH_I40E_QUEUE_NUM_PER_VF_ARG "=1|2|4|8|16"
- ETH_I40E_SUPPORT_MULTI_DRIVER "=1");
+ ETH_I40E_SUPPORT_MULTI_DRIVER "=1"
+ ETH_I40E_USE_LATEST_VEC "=1");
@@ -1078,6 +1078,9 @@ struct i40e_adapter {
uint64_t pctypes_tbl[I40E_FLOW_TYPE_MAX] __rte_cache_min_aligned;
uint64_t flow_types_mask;
uint64_t pctypes_mask;
+
+ /* For devargs */
+ bool use_latest_vec;
};
/**
@@ -2909,6 +2909,34 @@ i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
qinfo->conf.offloads = txq->offloads;
}
+static eth_rx_burst_t
+i40e_get_latest_rx_vec(bool scatter)
+{
+#ifdef RTE_ARCH_X86
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+ return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
+ i40e_recv_pkts_vec_avx2;
+#endif
+ return scatter ? i40e_recv_scattered_pkts_vec :
+ i40e_recv_pkts_vec;
+}
+
+static eth_rx_burst_t
+i40e_get_recommend_rx_vec(bool scatter)
+{
+#ifdef RTE_ARCH_X86
+ /*
+ * since AVX frequency can be different to base frequency, limit
+ * use of AVX2 version to later plaforms, not all those that could
+ * theoretically run it.
+ */
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+ return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
+ i40e_recv_pkts_vec_avx2;
+#endif
+ return scatter ? i40e_recv_scattered_pkts_vec :
+ i40e_recv_pkts_vec;
+}
void __attribute__((cold))
i40e_set_rx_function(struct rte_eth_dev *dev)
{
@@ -2948,19 +2976,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
"callback (port=%d).",
dev->data->port_id);
-
- dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec;
-#ifdef RTE_ARCH_X86
- /*
- * since AVX frequency can be different to base
- * frequency, limit use of AVX2 version to later
- * plaforms, not all those that could theoretically
- * run it.
- */
- if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+ if (ad->use_latest_vec)
dev->rx_pkt_burst =
- i40e_recv_scattered_pkts_vec_avx2;
-#endif
+ i40e_get_latest_rx_vec(true);
+ else
+ dev->rx_pkt_burst =
+ i40e_get_recommend_rx_vec(true);
} else {
PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
"allocation callback (port=%d).",
@@ -2978,18 +2999,10 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
"burst size no less than %d (port=%d).",
RTE_I40E_DESCS_PER_LOOP,
dev->data->port_id);
-
- dev->rx_pkt_burst = i40e_recv_pkts_vec;
-#ifdef RTE_ARCH_X86
- /*
- * since AVX frequency can be different to base
- * frequency, limit use of AVX2 version to later
- * plaforms, not all those that could theoretically
- * run it.
- */
- if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
- dev->rx_pkt_burst = i40e_recv_pkts_vec_avx2;
-#endif
+ if (ad->use_latest_vec)
+ dev->rx_pkt_burst = i40e_get_latest_rx_vec(false);
+ else
+ dev->rx_pkt_burst = i40e_get_recommend_rx_vec(false);
} else if (ad->rx_bulk_alloc_allowed) {
PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
"satisfied. Rx Burst Bulk Alloc function "
@@ -3049,6 +3062,31 @@ i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct i40e_tx_queue *txq)
txq->queue_id);
}
+static eth_tx_burst_t
+i40e_get_latest_tx_vec(void)
+{
+#ifdef RTE_ARCH_X86
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+ return i40e_xmit_pkts_vec_avx2;
+#endif
+ return i40e_xmit_pkts_vec;
+}
+
+static eth_tx_burst_t
+i40e_get_recommend_tx_vec(void)
+{
+#ifdef RTE_ARCH_X86
+ /*
+ * since AVX frequency can be different to base frequency, limit
+ * use of AVX2 version to later plaforms, not all those that could
+ * theoretically run it.
+ */
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+ return i40e_xmit_pkts_vec_avx2;
+#endif
+ return i40e_xmit_pkts_vec;
+}
+
void __attribute__((cold))
i40e_set_tx_function(struct rte_eth_dev *dev)
{
@@ -3073,17 +3111,12 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
if (ad->tx_simple_allowed) {
if (ad->tx_vec_allowed) {
PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
- dev->tx_pkt_burst = i40e_xmit_pkts_vec;
-#ifdef RTE_ARCH_X86
- /*
- * since AVX frequency can be different to base
- * frequency, limit use of AVX2 version to later
- * plaforms, not all those that could theoretically
- * run it.
- */
- if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
- dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx2;
-#endif
+ if (ad->use_latest_vec)
+ dev->tx_pkt_burst =
+ i40e_get_latest_tx_vec();
+ else
+ dev->tx_pkt_burst =
+ i40e_get_recommend_tx_vec();
} else {
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;