[dpdk-dev] ether: add support for vtune task tracing

Message ID 1497892689-27494-1-git-send-email-ilia.kurakin@intel.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

ilia.kurakin@intel.com June 19, 2017, 5:18 p.m. UTC
From: Ilia Kurakin <ilia.kurakin@intel.com>

The patch adds tracing of loop iterations that yielded no packets in a DPDK
application. It is using ITT task API:
    https://software.intel.com/en-us/node/544206

We suppose the flow of using this tracing would assume the user has ITT lib
and header on his machine and re-build DPDK with additional make parameters:

    make EXTRA_CFLAGS=-I<path to ittnotify.h>
         EXTRA_LDLIBS="-L<path to libittnotify.a> -littnotify"

Signed-off-by: Ilia Kurakin <ilia.kurakin@intel.com>
---
 config/common_base             |  1 +
 lib/librte_ether/Makefile      |  1 +
 lib/librte_ether/rte_eth_itt.h | 69 ++++++++++++++++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.c  |  7 +++++
 lib/librte_ether/rte_ethdev.h  | 26 ++++++++++++++++
 5 files changed, 104 insertions(+)
 create mode 100644 lib/librte_ether/rte_eth_itt.h
  

Comments

Ananyev, Konstantin June 22, 2017, 9:42 a.m. UTC | #1
Hi Ilia,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of ilia.kurakin@intel.com
> Sent: Monday, June 19, 2017 6:18 PM
> To: dev@dpdk.org
> Cc: Kurakin, Ilia <ilia.kurakin@intel.com>
> Subject: [dpdk-dev] [PATCH] ether: add support for vtune task tracing
> 
> From: Ilia Kurakin <ilia.kurakin@intel.com>
> 
> The patch adds tracing of loop iterations that yielded no packets in a DPDK
> application. It is using ITT task API:
>     https://software.intel.com/en-us/node/544206
> 
> We suppose the flow of using this tracing would assume the user has ITT lib
> and header on his machine and re-build DPDK with additional make parameters:
> 
>     make EXTRA_CFLAGS=-I<path to ittnotify.h>
>          EXTRA_LDLIBS="-L<path to libittnotify.a> -littnotify"

There are few things that worry me with that patch:
1. We add new config variable and add extra dependency here.
    Usually we try not to do that without really compelling reason.
2. We pollute rte_ethdev with the code that has nothing to do with
     it major functionality.

That makes me wonder why this vtune data collection has to be done
inside rx_burst() function?
 Why it can't be done on the application layer, i.e. straight after rx_burst()
is finished?
Something like:
n = rte_eth_rx_burst(port, queue, ....);
itt_rx_collect_data(port, queue, n, ....);
?

Or as alternative, user can install this vtune collection routine as
rx callback function.

Konstantin

> 
> Signed-off-by: Ilia Kurakin <ilia.kurakin@intel.com>
> ---
>  config/common_base             |  1 +
>  lib/librte_ether/Makefile      |  1 +
>  lib/librte_ether/rte_eth_itt.h | 69 ++++++++++++++++++++++++++++++++++++++++++
>  lib/librte_ether/rte_ethdev.c  |  7 +++++
>  lib/librte_ether/rte_ethdev.h  | 26 ++++++++++++++++
>  5 files changed, 104 insertions(+)
>  create mode 100644 lib/librte_ether/rte_eth_itt.h
> 
> diff --git a/config/common_base b/config/common_base
> index f6aafd1..60d8b63 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -135,6 +135,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
>  CONFIG_RTE_LIBRTE_IEEE1588=n
>  CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
>  CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
> +CONFIG_RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS=n
> 
>  #
>  # Turn off Tx preparation stage
> diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile
> index 93fdde1..c10153a 100644
> --- a/lib/librte_ether/Makefile
> +++ b/lib/librte_ether/Makefile
> @@ -56,5 +56,6 @@ SYMLINK-y-include += rte_eth_ctrl.h
>  SYMLINK-y-include += rte_dev_info.h
>  SYMLINK-y-include += rte_flow.h
>  SYMLINK-y-include += rte_flow_driver.h
> +SYMLINK-${CONFIG_RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS}-include += rte_eth_itt.h
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_ether/rte_eth_itt.h b/lib/librte_ether/rte_eth_itt.h
> new file mode 100644
> index 0000000..e7984fb
> --- /dev/null
> +++ b/lib/librte_ether/rte_eth_itt.h
> @@ -0,0 +1,69 @@
> +#ifndef _RTE_ETH_ITT_H_
> +#define _RTE_ETH_ITT_H_
> +
> +#include <ittnotify.h>
> +#include <rte_config.h>
> +
> +#define ITT_MAX_NAME_LEN (100)
> +
> +/**
> + * Auxiliary ITT structure belonging to port and using to:
> + *   -  track queue state to determine whether it is wasting loop iterations
> + *   -  begin or end ITT task using task domain and name
> + */
> +struct rte_eth_itt_aux_data {
> +	/**
> +	 * ITT domains for each queue.
> +	 */
> +	__itt_domain *wasted_iteration_itt_domains[RTE_MAX_QUEUES_PER_PORT];
> +	/**
> +	 * ITT task names for each queue.
> +	 */
> +	__itt_string_handle *wasted_iteration_itt_handles[RTE_MAX_QUEUES_PER_PORT];
> +	/**
> +	 * Flags indicating the queues state. Possible values:
> +	 * 1 - queue is wasting iterations, 0 - otherwise.
> +	 */
> +	uint8_t queue_is_wasting_iterations[RTE_MAX_QUEUES_PER_PORT];
> +};
> +
> +/**
> + * The pool of *rte_eth_itt_aux_data* structures.
> + */
> +struct rte_eth_itt_aux_data itt_aux_data[RTE_MAX_ETHPORTS];
> +
> +/**
> + * Initialization of rte_eth_itt_aux_data for a given port.
> + * This function must be invoked when ethernet device is being configured.
> + * Result will be stored in the global array *itt_aux_data*.
> + *
> + * @param port_id
> + *  The port identifier of the Ethernet device.
> + * @param port_name
> + *  The name of the Ethernet device.
> + * @param queue_num
> + *  The number of queues on specified port.
> + */
> +static inline void
> +rte_eth_init_itt(uint8_t port_id, char *port_name, uint8_t queue_num) {
> +	uint16_t q_id;
> +	for (q_id = 0; q_id < queue_num; ++q_id) {
> +		char domain_name[ITT_MAX_NAME_LEN];
> +		snprintf(domain_name, sizeof(domain_name),
> +			"RXBurst.WastedIterations.Port_%s.Queue_%d",
> +			port_name, q_id);
> +		itt_aux_data[port_id].wasted_iteration_itt_domains[q_id]
> +			= __itt_domain_create(domain_name);
> +
> +		char task_name[ITT_MAX_NAME_LEN];
> +		snprintf(task_name, sizeof(task_name),
> +			"port id: %d; queue id: %d",
> +			port_id, q_id);
> +		itt_aux_data[port_id].wasted_iteration_itt_handles[q_id]
> +			= __itt_string_handle_create(task_name);
> +
> +		itt_aux_data[port_id].queue_is_wasting_iterations[q_id] = 0;
> +	}
> +}
> +
> +#endif
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index 81a45c0..9e5ac01 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -818,6 +818,13 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  		return diag;
>  	}
> 
> +#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
> +	/**
> +	* See rte_eth_itt.h to find comments on code below.
> +	*/
> +	rte_eth_init_itt(port_id, dev->data->name, nb_rx_q);
> +#endif
> +
>  	return 0;
>  }
> 
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index f6e6c74..4ba90d2 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -186,6 +186,10 @@ extern "C" {
>  #include "rte_eth_ctrl.h"
>  #include "rte_dev_info.h"
> 
> +#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
> +#include "rte_eth_itt.h"
> +#endif
> +
>  struct rte_mbuf;
> 
>  /**
> @@ -2710,6 +2714,28 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
>  	int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
>  			rx_pkts, nb_pkts);
> 
> +#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
> +	/**
> +	* See rte_eth_itt.h to find comments on code below.
> +	*/
> +	if (unlikely(nb_rx == 0)) {
> +		if (!itt_aux_data[port_id].queue_is_wasting_iterations[queue_id]) {
> +			__itt_task_begin(
> +				itt_aux_data[port_id].wasted_iteration_itt_domains[queue_id],
> +				__itt_null, __itt_null,
> +				itt_aux_data[port_id].wasted_iteration_itt_handles[queue_id]);
> +			itt_aux_data[port_id].queue_is_wasting_iterations[queue_id] = 1;
> +		}
> +	}
> +	else {
> +		if (unlikely(itt_aux_data[port_id].queue_is_wasting_iterations[queue_id])) {
> +			__itt_task_end(
> +				itt_aux_data[port_id].wasted_iteration_itt_domains[queue_id]);
> +			itt_aux_data[port_id].queue_is_wasting_iterations[queue_id] = 0;
> +		}
> +	}
> +#endif
> +
>  #ifdef RTE_ETHDEV_RXTX_CALLBACKS
>  	struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
> 
> --
> 2.7.4
> 
> 
> --------------------------------------------------------------------
> Joint Stock Company Intel A/O
> Registered legal address: Krylatsky Hills Business Park,
> 17 Krylatskaya Str., Bldg 4, Moscow 121614,
> Russian Federation
> 
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
  
ilia.kurakin@intel.com June 22, 2017, 12:12 p.m. UTC | #2
Hi Konstantin,

Adding Dmitry to this thread

Ilia

-----Original Message-----
From: Ananyev, Konstantin 
Sent: Thursday, June 22, 2017 12:42 PM
To: Kurakin, Ilia <ilia.kurakin@intel.com>; dev@dpdk.org
Cc: Kurakin, Ilia <ilia.kurakin@intel.com>
Subject: RE: [dpdk-dev] [PATCH] ether: add support for vtune task tracing

Hi Ilia,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of 
> ilia.kurakin@intel.com
> Sent: Monday, June 19, 2017 6:18 PM
> To: dev@dpdk.org
> Cc: Kurakin, Ilia <ilia.kurakin@intel.com>
> Subject: [dpdk-dev] [PATCH] ether: add support for vtune task tracing
> 
> From: Ilia Kurakin <ilia.kurakin@intel.com>
> 
> The patch adds tracing of loop iterations that yielded no packets in a 
> DPDK application. It is using ITT task API:
>     https://software.intel.com/en-us/node/544206
> 
> We suppose the flow of using this tracing would assume the user has 
> ITT lib and header on his machine and re-build DPDK with additional make parameters:
> 
>     make EXTRA_CFLAGS=-I<path to ittnotify.h>
>          EXTRA_LDLIBS="-L<path to libittnotify.a> -littnotify"

There are few things that worry me with that patch:
1. We add new config variable and add extra dependency here.
    Usually we try not to do that without really compelling reason.
2. We pollute rte_ethdev with the code that has nothing to do with
     it major functionality.

That makes me wonder why this vtune data collection has to be done inside rx_burst() function?
 Why it can't be done on the application layer, i.e. straight after rx_burst() is finished?
Something like:
n = rte_eth_rx_burst(port, queue, ....); itt_rx_collect_data(port, queue, n, ....); ?

Or as alternative, user can install this vtune collection routine as rx callback function.

Konstantin

> 
> Signed-off-by: Ilia Kurakin <ilia.kurakin@intel.com>
> ---
>  config/common_base             |  1 +
>  lib/librte_ether/Makefile      |  1 +
>  lib/librte_ether/rte_eth_itt.h | 69 
> ++++++++++++++++++++++++++++++++++++++++++
>  lib/librte_ether/rte_ethdev.c  |  7 +++++  
> lib/librte_ether/rte_ethdev.h  | 26 ++++++++++++++++
>  5 files changed, 104 insertions(+)
>  create mode 100644 lib/librte_ether/rte_eth_itt.h
> 
> diff --git a/config/common_base b/config/common_base index 
> f6aafd1..60d8b63 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -135,6 +135,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
>  CONFIG_RTE_LIBRTE_IEEE1588=n
>  CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
>  CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
> +CONFIG_RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS=n
> 
>  #
>  # Turn off Tx preparation stage
> diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile 
> index 93fdde1..c10153a 100644
> --- a/lib/librte_ether/Makefile
> +++ b/lib/librte_ether/Makefile
> @@ -56,5 +56,6 @@ SYMLINK-y-include += rte_eth_ctrl.h  
> SYMLINK-y-include += rte_dev_info.h  SYMLINK-y-include += rte_flow.h  
> SYMLINK-y-include += rte_flow_driver.h
> +SYMLINK-${CONFIG_RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS}-include += 
> +rte_eth_itt.h
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_ether/rte_eth_itt.h 
> b/lib/librte_ether/rte_eth_itt.h new file mode 100644 index 
> 0000000..e7984fb
> --- /dev/null
> +++ b/lib/librte_ether/rte_eth_itt.h
> @@ -0,0 +1,69 @@
> +#ifndef _RTE_ETH_ITT_H_
> +#define _RTE_ETH_ITT_H_
> +
> +#include <ittnotify.h>
> +#include <rte_config.h>
> +
> +#define ITT_MAX_NAME_LEN (100)
> +
> +/**
> + * Auxiliary ITT structure belonging to port and using to:
> + *   -  track queue state to determine whether it is wasting loop iterations
> + *   -  begin or end ITT task using task domain and name
> + */
> +struct rte_eth_itt_aux_data {
> +	/**
> +	 * ITT domains for each queue.
> +	 */
> +	__itt_domain *wasted_iteration_itt_domains[RTE_MAX_QUEUES_PER_PORT];
> +	/**
> +	 * ITT task names for each queue.
> +	 */
> +	__itt_string_handle *wasted_iteration_itt_handles[RTE_MAX_QUEUES_PER_PORT];
> +	/**
> +	 * Flags indicating the queues state. Possible values:
> +	 * 1 - queue is wasting iterations, 0 - otherwise.
> +	 */
> +	uint8_t queue_is_wasting_iterations[RTE_MAX_QUEUES_PER_PORT];
> +};
> +
> +/**
> + * The pool of *rte_eth_itt_aux_data* structures.
> + */
> +struct rte_eth_itt_aux_data itt_aux_data[RTE_MAX_ETHPORTS];
> +
> +/**
> + * Initialization of rte_eth_itt_aux_data for a given port.
> + * This function must be invoked when ethernet device is being configured.
> + * Result will be stored in the global array *itt_aux_data*.
> + *
> + * @param port_id
> + *  The port identifier of the Ethernet device.
> + * @param port_name
> + *  The name of the Ethernet device.
> + * @param queue_num
> + *  The number of queues on specified port.
> + */
> +static inline void
> +rte_eth_init_itt(uint8_t port_id, char *port_name, uint8_t queue_num) {
> +	uint16_t q_id;
> +	for (q_id = 0; q_id < queue_num; ++q_id) {
> +		char domain_name[ITT_MAX_NAME_LEN];
> +		snprintf(domain_name, sizeof(domain_name),
> +			"RXBurst.WastedIterations.Port_%s.Queue_%d",
> +			port_name, q_id);
> +		itt_aux_data[port_id].wasted_iteration_itt_domains[q_id]
> +			= __itt_domain_create(domain_name);
> +
> +		char task_name[ITT_MAX_NAME_LEN];
> +		snprintf(task_name, sizeof(task_name),
> +			"port id: %d; queue id: %d",
> +			port_id, q_id);
> +		itt_aux_data[port_id].wasted_iteration_itt_handles[q_id]
> +			= __itt_string_handle_create(task_name);
> +
> +		itt_aux_data[port_id].queue_is_wasting_iterations[q_id] = 0;
> +	}
> +}
> +
> +#endif
> diff --git a/lib/librte_ether/rte_ethdev.c 
> b/lib/librte_ether/rte_ethdev.c index 81a45c0..9e5ac01 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -818,6 +818,13 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  		return diag;
>  	}
> 
> +#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
> +	/**
> +	* See rte_eth_itt.h to find comments on code below.
> +	*/
> +	rte_eth_init_itt(port_id, dev->data->name, nb_rx_q); #endif
> +
>  	return 0;
>  }
> 
> diff --git a/lib/librte_ether/rte_ethdev.h 
> b/lib/librte_ether/rte_ethdev.h index f6e6c74..4ba90d2 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -186,6 +186,10 @@ extern "C" {
>  #include "rte_eth_ctrl.h"
>  #include "rte_dev_info.h"
> 
> +#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
> +#include "rte_eth_itt.h"
> +#endif
> +
>  struct rte_mbuf;
> 
>  /**
> @@ -2710,6 +2714,28 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
>  	int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
>  			rx_pkts, nb_pkts);
> 
> +#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
> +	/**
> +	* See rte_eth_itt.h to find comments on code below.
> +	*/
> +	if (unlikely(nb_rx == 0)) {
> +		if (!itt_aux_data[port_id].queue_is_wasting_iterations[queue_id]) {
> +			__itt_task_begin(
> +				itt_aux_data[port_id].wasted_iteration_itt_domains[queue_id],
> +				__itt_null, __itt_null,
> +				itt_aux_data[port_id].wasted_iteration_itt_handles[queue_id]);
> +			itt_aux_data[port_id].queue_is_wasting_iterations[queue_id] = 1;
> +		}
> +	}
> +	else {
> +		if (unlikely(itt_aux_data[port_id].queue_is_wasting_iterations[queue_id])) {
> +			__itt_task_end(
> +				itt_aux_data[port_id].wasted_iteration_itt_domains[queue_id]);
> +			itt_aux_data[port_id].queue_is_wasting_iterations[queue_id] = 0;
> +		}
> +	}
> +#endif
> +
>  #ifdef RTE_ETHDEV_RXTX_CALLBACKS
>  	struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
> 
> --
> 2.7.4
> 
> 
> --------------------------------------------------------------------
> Joint Stock Company Intel A/O
> Registered legal address: Krylatsky Hills Business Park,
> 17 Krylatskaya Str., Bldg 4, Moscow 121614, Russian Federation
> 
> This e-mail and any attachments may contain confidential material for 
> the sole use of the intended recipient(s). Any review or distribution 
> by others is strictly prohibited. If you are not the intended 
> recipient, please contact the sender and delete all copies.


--------------------------------------------------------------------
Joint Stock Company Intel A/O
Registered legal address: Krylatsky Hills Business Park,
17 Krylatskaya Str., Bldg 4, Moscow 121614,
Russian Federation

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
  
Galanov, Dmitry June 22, 2017, 4:46 p.m. UTC | #3
Hi Konstantin,
We are planning to use Intel ITT https://software.intel.com/en-us/node/544195  for tracing wasted cycle iterations.
The dependency on that library will only be required if user decides to enable that tracing by setting that in CONFIG_RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS=y. We currently assume in this case user has itt libraries on his machine, otherwise we should somehow include it with DPDK in the form of sources or prebuilt binaries.
We are using ITT because it gives better than simple txt trace performance, compression and is already used in various Intel's open-sourced products (TBB, OpenCL)
Asking a user to modify code of application is inconvenient because we should somehow educate them to do it that is decreasing chances that users will try this functionality. It's much easier if a user gets wasted cycle tracing just by recompiling the source code with an option.

Thanks, Dmitry

-----Original Message-----
From: Kurakin, Ilia 
Sent: Thursday, June 22, 2017 3:13 PM
To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Galanov, Dmitry <dmitry.galanov@intel.com>
Cc: dev@dpdk.org
Subject: RE: [dpdk-dev] [PATCH] ether: add support for vtune task tracing

Hi Konstantin,

Adding Dmitry to this thread

Ilia

-----Original Message-----
From: Ananyev, Konstantin
Sent: Thursday, June 22, 2017 12:42 PM
To: Kurakin, Ilia <ilia.kurakin@intel.com>; dev@dpdk.org\
Cc: Kurakin, Ilia <ilia.kurakin@intel.com>
Subject: RE: [dpdk-dev] [PATCH] ether: add support for vtune task tracing

Hi Ilia,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of 
> ilia.kurakin@intel.com
> Sent: Monday, June 19, 2017 6:18 PM
> To: dev@dpdk.org
> Cc: Kurakin, Ilia <ilia.kurakin@intel.com>
> Subject: [dpdk-dev] [PATCH] ether: add support for vtune task tracing
> 
> From: Ilia Kurakin <ilia.kurakin@intel.com>
> 
> The patch adds tracing of loop iterations that yielded no packets in a 
> DPDK application. It is using ITT task API:
>     https://software.intel.com/en-us/node/544206
> 
> We suppose the flow of using this tracing would assume the user has 
> ITT lib and header on his machine and re-build DPDK with additional make parameters:
> 
>     make EXTRA_CFLAGS=-I<path to ittnotify.h>
>          EXTRA_LDLIBS="-L<path to libittnotify.a> -littnotify"

There are few things that worry me with that patch:
1. We add new config variable and add extra dependency here.
    Usually we try not to do that without really compelling reason.
2. We pollute rte_ethdev with the code that has nothing to do with
     it major functionality.

That makes me wonder why this vtune data collection has to be done inside rx_burst() function?
 Why it can't be done on the application layer, i.e. straight after rx_burst() is finished?
Something like:
n = rte_eth_rx_burst(port, queue, ....); itt_rx_collect_data(port, queue, n, ....); ?

Or as alternative, user can install this vtune collection routine as rx callback function.

Konstantin

> 
> Signed-off-by: Ilia Kurakin <ilia.kurakin@intel.com>
> ---
>  config/common_base             |  1 +
>  lib/librte_ether/Makefile      |  1 +
>  lib/librte_ether/rte_eth_itt.h | 69
> ++++++++++++++++++++++++++++++++++++++++++
>  lib/librte_ether/rte_ethdev.c  |  7 +++++ 
> lib/librte_ether/rte_ethdev.h  | 26 ++++++++++++++++
>  5 files changed, 104 insertions(+)
>  create mode 100644 lib/librte_ether/rte_eth_itt.h
> 
> diff --git a/config/common_base b/config/common_base index
> f6aafd1..60d8b63 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -135,6 +135,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
>  CONFIG_RTE_LIBRTE_IEEE1588=n
>  CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
>  CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
> +CONFIG_RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS=n
> 
>  #
>  # Turn off Tx preparation stage
> diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile 
> index 93fdde1..c10153a 100644
> --- a/lib/librte_ether/Makefile
> +++ b/lib/librte_ether/Makefile
> @@ -56,5 +56,6 @@ SYMLINK-y-include += rte_eth_ctrl.h 
> SYMLINK-y-include += rte_dev_info.h  SYMLINK-y-include += rte_flow.h 
> SYMLINK-y-include += rte_flow_driver.h
> +SYMLINK-${CONFIG_RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS}-include += 
> +rte_eth_itt.h
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_ether/rte_eth_itt.h 
> b/lib/librte_ether/rte_eth_itt.h new file mode 100644 index 
> 0000000..e7984fb
> --- /dev/null
> +++ b/lib/librte_ether/rte_eth_itt.h
> @@ -0,0 +1,69 @@
> +#ifndef _RTE_ETH_ITT_H_
> +#define _RTE_ETH_ITT_H_
> +
> +#include <ittnotify.h>
> +#include <rte_config.h>
> +
> +#define ITT_MAX_NAME_LEN (100)
> +
> +/**
> + * Auxiliary ITT structure belonging to port and using to:
> + *   -  track queue state to determine whether it is wasting loop iterations
> + *   -  begin or end ITT task using task domain and name
> + */
> +struct rte_eth_itt_aux_data {
> +	/**
> +	 * ITT domains for each queue.
> +	 */
> +	__itt_domain *wasted_iteration_itt_domains[RTE_MAX_QUEUES_PER_PORT];
> +	/**
> +	 * ITT task names for each queue.
> +	 */
> +	__itt_string_handle *wasted_iteration_itt_handles[RTE_MAX_QUEUES_PER_PORT];
> +	/**
> +	 * Flags indicating the queues state. Possible values:
> +	 * 1 - queue is wasting iterations, 0 - otherwise.
> +	 */
> +	uint8_t queue_is_wasting_iterations[RTE_MAX_QUEUES_PER_PORT];
> +};
> +
> +/**
> + * The pool of *rte_eth_itt_aux_data* structures.
> + */
> +struct rte_eth_itt_aux_data itt_aux_data[RTE_MAX_ETHPORTS];
> +
> +/**
> + * Initialization of rte_eth_itt_aux_data for a given port.
> + * This function must be invoked when ethernet device is being configured.
> + * Result will be stored in the global array *itt_aux_data*.
> + *
> + * @param port_id
> + *  The port identifier of the Ethernet device.
> + * @param port_name
> + *  The name of the Ethernet device.
> + * @param queue_num
> + *  The number of queues on specified port.
> + */
> +static inline void
> +rte_eth_init_itt(uint8_t port_id, char *port_name, uint8_t queue_num) {
> +	uint16_t q_id;
> +	for (q_id = 0; q_id < queue_num; ++q_id) {
> +		char domain_name[ITT_MAX_NAME_LEN];
> +		snprintf(domain_name, sizeof(domain_name),
> +			"RXBurst.WastedIterations.Port_%s.Queue_%d",
> +			port_name, q_id);
> +		itt_aux_data[port_id].wasted_iteration_itt_domains[q_id]
> +			= __itt_domain_create(domain_name);
> +
> +		char task_name[ITT_MAX_NAME_LEN];
> +		snprintf(task_name, sizeof(task_name),
> +			"port id: %d; queue id: %d",
> +			port_id, q_id);
> +		itt_aux_data[port_id].wasted_iteration_itt_handles[q_id]
> +			= __itt_string_handle_create(task_name);
> +
> +		itt_aux_data[port_id].queue_is_wasting_iterations[q_id] = 0;
> +	}
> +}
> +
> +#endif
> diff --git a/lib/librte_ether/rte_ethdev.c 
> b/lib/librte_ether/rte_ethdev.c index 81a45c0..9e5ac01 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -818,6 +818,13 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  		return diag;
>  	}
> 
> +#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
> +	/**
> +	* See rte_eth_itt.h to find comments on code below.
> +	*/
> +	rte_eth_init_itt(port_id, dev->data->name, nb_rx_q); #endif
> +
>  	return 0;
>  }
> 
> diff --git a/lib/librte_ether/rte_ethdev.h 
> b/lib/librte_ether/rte_ethdev.h index f6e6c74..4ba90d2 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -186,6 +186,10 @@ extern "C" {
>  #include "rte_eth_ctrl.h"
>  #include "rte_dev_info.h"
> 
> +#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
> +#include "rte_eth_itt.h"
> +#endif
> +
>  struct rte_mbuf;
> 
>  /**
> @@ -2710,6 +2714,28 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
>  	int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
>  			rx_pkts, nb_pkts);
> 
> +#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
> +	/**
> +	* See rte_eth_itt.h to find comments on code below.
> +	*/
> +	if (unlikely(nb_rx == 0)) {
> +		if (!itt_aux_data[port_id].queue_is_wasting_iterations[queue_id]) {
> +			__itt_task_begin(
> +				itt_aux_data[port_id].wasted_iteration_itt_domains[queue_id],
> +				__itt_null, __itt_null,
> +				itt_aux_data[port_id].wasted_iteration_itt_handles[queue_id]);
> +			itt_aux_data[port_id].queue_is_wasting_iterations[queue_id] = 1;
> +		}
> +	}
> +	else {
> +		if (unlikely(itt_aux_data[port_id].queue_is_wasting_iterations[queue_id])) {
> +			__itt_task_end(
> +				itt_aux_data[port_id].wasted_iteration_itt_domains[queue_id]);
> +			itt_aux_data[port_id].queue_is_wasting_iterations[queue_id] = 0;
> +		}
> +	}
> +#endif
> +
>  #ifdef RTE_ETHDEV_RXTX_CALLBACKS
>  	struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
> 
> --
> 2.7.4
> 
> 
> --------------------------------------------------------------------
> Joint Stock Company Intel A/O
> Registered legal address: Krylatsky Hills Business Park,
> 17 Krylatskaya Str., Bldg 4, Moscow 121614, Russian Federation
> 
> This e-mail and any attachments may contain confidential material for 
> the sole use of the intended recipient(s). Any review or distribution 
> by others is strictly prohibited. If you are not the intended 
> recipient, please contact the sender and delete all copies.


--------------------------------------------------------------------
Joint Stock Company Intel A/O
Registered legal address: Krylatsky Hills Business Park,
17 Krylatskaya Str., Bldg 4, Moscow 121614,
Russian Federation

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
  

Patch

diff --git a/config/common_base b/config/common_base
index f6aafd1..60d8b63 100644
--- a/config/common_base
+++ b/config/common_base
@@ -135,6 +135,7 @@  CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS=n
 
 #
 # Turn off Tx preparation stage
diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile
index 93fdde1..c10153a 100644
--- a/lib/librte_ether/Makefile
+++ b/lib/librte_ether/Makefile
@@ -56,5 +56,6 @@  SYMLINK-y-include += rte_eth_ctrl.h
 SYMLINK-y-include += rte_dev_info.h
 SYMLINK-y-include += rte_flow.h
 SYMLINK-y-include += rte_flow_driver.h
+SYMLINK-${CONFIG_RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS}-include += rte_eth_itt.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ether/rte_eth_itt.h b/lib/librte_ether/rte_eth_itt.h
new file mode 100644
index 0000000..e7984fb
--- /dev/null
+++ b/lib/librte_ether/rte_eth_itt.h
@@ -0,0 +1,69 @@ 
+#ifndef _RTE_ETH_ITT_H_
+#define _RTE_ETH_ITT_H_
+
+#include <ittnotify.h>
+#include <rte_config.h>
+
+#define ITT_MAX_NAME_LEN (100)
+
+/**
+ * Auxiliary ITT structure belonging to port and using to:
+ *   -  track queue state to determine whether it is wasting loop iterations
+ *   -  begin or end ITT task using task domain and name
+ */
+struct rte_eth_itt_aux_data {
+	/**
+	 * ITT domains for each queue.
+	 */
+	__itt_domain *wasted_iteration_itt_domains[RTE_MAX_QUEUES_PER_PORT];
+	/**
+	 * ITT task names for each queue.
+	 */
+	__itt_string_handle *wasted_iteration_itt_handles[RTE_MAX_QUEUES_PER_PORT];
+	/**
+	 * Flags indicating the queues state. Possible values:
+	 * 1 - queue is wasting iterations, 0 - otherwise.
+	 */
+	uint8_t queue_is_wasting_iterations[RTE_MAX_QUEUES_PER_PORT];
+};
+
+/**
+ * The pool of *rte_eth_itt_aux_data* structures.
+ */
+struct rte_eth_itt_aux_data itt_aux_data[RTE_MAX_ETHPORTS];
+
+/**
+ * Initialization of rte_eth_itt_aux_data for a given port.
+ * This function must be invoked when ethernet device is being configured.
+ * Result will be stored in the global array *itt_aux_data*.
+ *
+ * @param port_id
+ *  The port identifier of the Ethernet device.
+ * @param port_name
+ *  The name of the Ethernet device.
+ * @param queue_num
+ *  The number of queues on specified port.
+ */
+static inline void
+rte_eth_init_itt(uint8_t port_id, char *port_name, uint8_t queue_num) {
+	uint16_t q_id;
+	for (q_id = 0; q_id < queue_num; ++q_id) {
+		char domain_name[ITT_MAX_NAME_LEN];
+		snprintf(domain_name, sizeof(domain_name),
+			"RXBurst.WastedIterations.Port_%s.Queue_%d",
+			port_name, q_id);
+		itt_aux_data[port_id].wasted_iteration_itt_domains[q_id]
+			= __itt_domain_create(domain_name);
+
+		char task_name[ITT_MAX_NAME_LEN];
+		snprintf(task_name, sizeof(task_name),
+			"port id: %d; queue id: %d",
+			port_id, q_id);
+		itt_aux_data[port_id].wasted_iteration_itt_handles[q_id]
+			= __itt_string_handle_create(task_name);
+
+		itt_aux_data[port_id].queue_is_wasting_iterations[q_id] = 0;
+	}
+}
+
+#endif
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 81a45c0..9e5ac01 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -818,6 +818,13 @@  rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		return diag;
 	}
 
+#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
+	/**
+	* See rte_eth_itt.h to find comments on code below.
+	*/
+	rte_eth_init_itt(port_id, dev->data->name, nb_rx_q);
+#endif
+
 	return 0;
 }
 
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index f6e6c74..4ba90d2 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -186,6 +186,10 @@  extern "C" {
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
 
+#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
+#include "rte_eth_itt.h"
+#endif
+
 struct rte_mbuf;
 
 /**
@@ -2710,6 +2714,28 @@  rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
 	int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
 			rx_pkts, nb_pkts);
 
+#ifdef RTE_ETHDEV_TRACE_WASTED_RX_ITERATIONS
+	/**
+	* See rte_eth_itt.h to find comments on code below.
+	*/
+	if (unlikely(nb_rx == 0)) {
+		if (!itt_aux_data[port_id].queue_is_wasting_iterations[queue_id]) {
+			__itt_task_begin(
+				itt_aux_data[port_id].wasted_iteration_itt_domains[queue_id],
+				__itt_null, __itt_null,
+				itt_aux_data[port_id].wasted_iteration_itt_handles[queue_id]);
+			itt_aux_data[port_id].queue_is_wasting_iterations[queue_id] = 1;
+		}
+	}
+	else {
+		if (unlikely(itt_aux_data[port_id].queue_is_wasting_iterations[queue_id])) {
+			__itt_task_end(
+				itt_aux_data[port_id].wasted_iteration_itt_domains[queue_id]);
+			itt_aux_data[port_id].queue_is_wasting_iterations[queue_id] = 0;
+		}
+	}
+#endif
+
 #ifdef RTE_ETHDEV_RXTX_CALLBACKS
 	struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];