[dpdk-dev,v2] pipeline: add statistics for librte_pipeline ports and tables

Message ID 1430396143-10936-1-git-send-email-michalx.k.jastrzebski@intel.com (mailing list archive)
State Changes Requested, archived
Headers

Commit Message

Michal Jastrzebski April 30, 2015, 12:15 p.m. UTC
From: Pawel Wodkowski <pawelx.wdkowski@intel.com>

This patch adds statistics collection for librte_pipeline.
Those statistics ale disabled by default during build time.

Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
---
 config/common_bsdapp               |    1 +
 config/common_linuxapp             |    1 +
 lib/librte_pipeline/rte_pipeline.c |  185 +++++++++++++++++++++++++++++++++---
 lib/librte_pipeline/rte_pipeline.h |   99 +++++++++++++++++++
 4 files changed, 275 insertions(+), 11 deletions(-)
  

Comments

Cristian Dumitrescu May 5, 2015, 3:11 p.m. UTC | #1
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Michal Jastrzebski
> Sent: Thursday, April 30, 2015 1:16 PM
> To: dev@dpdk.org
> Cc: Wodkowski, PawelX
> Subject: [dpdk-dev] [PATCH v2] pipeline: add statistics for librte_pipeline
> ports and tables
> 
> From: Pawel Wodkowski <pawelx.wdkowski@intel.com>
> 
> This patch adds statistics collection for librte_pipeline.
> Those statistics ale disabled by default during build time.
> 
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> ---
>  config/common_bsdapp               |    1 +
>  config/common_linuxapp             |    1 +
>  lib/librte_pipeline/rte_pipeline.c |  185
> +++++++++++++++++++++++++++++++++---
>  lib/librte_pipeline/rte_pipeline.h |   99 +++++++++++++++++++
>  4 files changed, 275 insertions(+), 11 deletions(-)
> 
> diff --git a/config/common_bsdapp b/config/common_bsdapp
> index 1d0f5b2..e4f0bf5 100644
> --- a/config/common_bsdapp
> +++ b/config/common_bsdapp
> @@ -414,6 +414,7 @@ CONFIG_RTE_TABLE_LPM_STATS_COLLECT=n
>  # Compile librte_pipeline
>  #
>  CONFIG_RTE_LIBRTE_PIPELINE=y
> +CONFIG_RTE_PIPELINE_STATS_COLLECT=n
> 
>  #
>  # Compile librte_kni
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 8b01ca9..05553d9 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -421,6 +421,7 @@ CONFIG_RTE_TABLE_LPM_STATS_COLLECT=n
>  # Compile librte_pipeline
>  #
>  CONFIG_RTE_LIBRTE_PIPELINE=y
> +CONFIG_RTE_PIPELINE_STATS_COLLECT=n
> 
>  #
>  # Compile librte_kni
> diff --git a/lib/librte_pipeline/rte_pipeline.c
> b/lib/librte_pipeline/rte_pipeline.c
> index 36d92c9..69bf003 100644
> --- a/lib/librte_pipeline/rte_pipeline.c
> +++ b/lib/librte_pipeline/rte_pipeline.c
> @@ -48,6 +48,17 @@
> 
>  #define RTE_TABLE_INVALID                                 UINT32_MAX
> 
> +#ifdef RTE_PIPELINE_STATS_COLLECT
> +#define RTE_PIPELINE_STATS_ADD(counter, val) \
> +	({ (counter) += (val); })
> +
> +#define RTE_PIPELINE_STATS_ADD_M(counter, mask) \
> +	({ (counter) += __builtin_popcountll(mask); })
> +#else
> +#define RTE_PIPELINE_STATS_ADD(counter, val)
> +#define RTE_PIPELINE_STATS_ADD_M(counter, mask)
> +#endif
> +
>  struct rte_port_in {
>  	/* Input parameters */
>  	struct rte_port_in_ops ops;
> @@ -63,6 +74,8 @@ struct rte_port_in {
> 
>  	/* List of enabled ports */
>  	struct rte_port_in *next;
> +
> +	uint64_t n_pkts_dropped_by_ah;
>  };
> 
>  struct rte_port_out {
> @@ -74,6 +87,8 @@ struct rte_port_out {
> 
>  	/* Handle to low-level port */
>  	void *h_port;
> +
> +	uint64_t n_pkts_dropped_by_ah;
>  };
> 
>  struct rte_table {
> @@ -90,6 +105,12 @@ struct rte_table {
> 
>  	/* Handle to the low-level table object */
>  	void *h_table;
> +
> +	/* Stats for this table. */
> +	uint64_t n_pkts_dropped_by_lkp_hit_ah;
> +	uint64_t n_pkts_dropped_by_lkp_miss_ah;
> +	uint64_t n_pkts_dropped_lkp_hit;
> +	uint64_t n_pkts_dropped_lkp_miss;
>  };
> 
>  #define RTE_PIPELINE_MAX_NAME_SZ                           124
> @@ -1040,6 +1061,8 @@ rte_pipeline_action_handler_port_bulk(struct
> rte_pipeline *p,
> 
>  		port_out->f_action_bulk(p->pkts, &pkts_mask, port_out-
> >arg_ah);
>  		p->action_mask0[RTE_PIPELINE_ACTION_DROP] |=
> pkts_mask ^  mask;
> +		RTE_PIPELINE_STATS_ADD_M(port_out-
> >n_pkts_dropped_by_ah,
> +				pkts_mask ^  mask);
>  	}
> 
>  	/* Output port TX */
> @@ -1071,6 +1094,9 @@ rte_pipeline_action_handler_port(struct
> rte_pipeline *p, uint64_t pkts_mask)
>  				p-
> >action_mask0[RTE_PIPELINE_ACTION_DROP] |=
>  					(pkt_mask ^ 1LLU) << i;
> 
> +				RTE_PIPELINE_STATS_ADD(port_out-
> >n_pkts_dropped_by_ah,
> +						pkt_mask ^ 1LLU);
> +
>  				/* Output port TX */
>  				if (pkt_mask != 0)
>  					port_out->ops.f_tx(port_out-
> >h_port,
> @@ -1104,6 +1130,9 @@ rte_pipeline_action_handler_port(struct
> rte_pipeline *p, uint64_t pkts_mask)
>  				p-
> >action_mask0[RTE_PIPELINE_ACTION_DROP] |=
>  					(pkt_mask ^ 1LLU) << i;
> 
> +				RTE_PIPELINE_STATS_ADD(port_out-
> >n_pkts_dropped_by_ah,
> +						pkt_mask ^ 1LLU);
> +
>  				/* Output port TX */
>  				if (pkt_mask != 0)
>  					port_out->ops.f_tx(port_out-
> >h_port,
> @@ -1140,6 +1169,9 @@ rte_pipeline_action_handler_port_meta(struct
> rte_pipeline *p,
>  				p-
> >action_mask0[RTE_PIPELINE_ACTION_DROP] |=
>  					(pkt_mask ^ 1LLU) << i;
> 
> +				RTE_PIPELINE_STATS_ADD(port_out-
> >n_pkts_dropped_by_ah,
> +						pkt_mask ^ 1ULL);
> +
>  				/* Output port TX */
>  				if (pkt_mask != 0)
>  					port_out->ops.f_tx(port_out-
> >h_port,
> @@ -1174,6 +1206,9 @@ rte_pipeline_action_handler_port_meta(struct
> rte_pipeline *p,
>  				p-
> >action_mask0[RTE_PIPELINE_ACTION_DROP] |=
>  					(pkt_mask ^ 1LLU) << i;
> 
> +				RTE_PIPELINE_STATS_ADD(port_out-
> >n_pkts_dropped_by_ah,
> +						pkt_mask ^ 1ULL);
> +
>  				/* Output port TX */
>  				if (pkt_mask != 0)
>  					port_out->ops.f_tx(port_out-
> >h_port,
> @@ -1232,10 +1267,10 @@ rte_pipeline_run(struct rte_pipeline *p)
>  		if (port_in->f_action != NULL) {
>  			uint64_t mask = pkts_mask;
> 
> -			port_in->f_action(p->pkts, n_pkts, &pkts_mask,
> -				port_in->arg_ah);
> -			p->action_mask0[RTE_PIPELINE_ACTION_DROP] |=
> -				pkts_mask ^ mask;
> +			port_in->f_action(p->pkts, n_pkts, &pkts_mask,
> port_in->arg_ah);
> +			mask ^= pkts_mask;
> +			p->action_mask0[RTE_PIPELINE_ACTION_DROP] |=
> mask;
> +			RTE_PIPELINE_STATS_ADD_M(port_in-
> >n_pkts_dropped_by_ah, mask);
>  		}
> 
>  		/* Table */
> @@ -1261,9 +1296,10 @@ rte_pipeline_run(struct rte_pipeline *p)
>  					table->f_action_miss(p->pkts,
>  						&lookup_miss_mask,
>  						default_entry, table-
> >arg_ah);
> -					p->action_mask0[
> -
> 	RTE_PIPELINE_ACTION_DROP] |=
> -						lookup_miss_mask ^ mask;
> +					mask ^= lookup_miss_mask;
> +					p-
> >action_mask0[RTE_PIPELINE_ACTION_DROP] |= mask;
> +					RTE_PIPELINE_STATS_ADD_M(
> +						table-
> >n_pkts_dropped_by_lkp_miss_ah, mask);
>  				}
> 
>  				/* Table reserved actions */
> @@ -1277,6 +1313,10 @@ rte_pipeline_run(struct rte_pipeline *p)
>  					uint32_t pos = default_entry->action;
> 
>  					p->action_mask0[pos] =
> lookup_miss_mask;
> +					if (pos ==
> RTE_PIPELINE_ACTION_DROP) {
> +
> 	RTE_PIPELINE_STATS_ADD_M(table->n_pkts_dropped_lkp_miss,
> +							lookup_miss_mask);
> +					}
>  				}
>  			}
> 
> @@ -1289,9 +1329,10 @@ rte_pipeline_run(struct rte_pipeline *p)
>  					table->f_action_hit(p->pkts,
>  						&lookup_hit_mask,
>  						p->entries, table->arg_ah);
> -					p->action_mask0[
> -
> 	RTE_PIPELINE_ACTION_DROP] |=
> -						lookup_hit_mask ^ mask;
> +					mask ^= lookup_hit_mask;
> +					p-
> >action_mask0[RTE_PIPELINE_ACTION_DROP] |= mask;
> +					RTE_PIPELINE_STATS_ADD_M(
> +						table-
> >n_pkts_dropped_by_lkp_hit_ah, mask);
>  				}
> 
>  				/* Table reserved actions */
> @@ -1308,6 +1349,9 @@ rte_pipeline_run(struct rte_pipeline *p)
>  				p-
> >action_mask0[RTE_PIPELINE_ACTION_TABLE] |=
>  					p->action_mask1[
> 
> 	RTE_PIPELINE_ACTION_TABLE];
> +
> +				RTE_PIPELINE_STATS_ADD_M(table-
> >n_pkts_dropped_lkp_hit,
> +						p-
> >action_mask1[RTE_PIPELINE_ACTION_DROP]);
>  			}
> 
>  			/* Prepare for next iteration */
> @@ -1370,9 +1414,128 @@ rte_pipeline_port_out_packet_insert(struct
> rte_pipeline *p,
> 
>  		if (pkt_mask != 0) /* Output port TX */
>  			port_out->ops.f_tx(port_out->h_port, pkt);
> -		else
> +		else {
>  			rte_pktmbuf_free(pkt);
> +			RTE_PIPELINE_STATS_ADD(port_out-
> >n_pkts_dropped_by_ah, 1);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +int rte_pipeline_port_in_stats_read(struct rte_pipeline *p, uint32_t
> port_id,
> +	struct rte_pipeline_port_in_stats *stats, int clear)
> +{
> +	struct rte_port_in *port;
> +	int retval;
> +
> +	if (p == NULL) {
> +		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
> +			__func__);
> +		return -EINVAL;
> +	}
> +
> +	if (port_id >= p->num_ports_in) {
> +		RTE_LOG(ERR, PIPELINE,
> +			"%s: port IN ID %u is out of range\n",
> +			__func__, port_id);
> +		return -EINVAL;
> +	}
> +
> +	port = &p->ports_in[port_id];
> +
> +	if (port->ops.f_stats != NULL) {
> +		retval = port->ops.f_stats(port->h_port, &stats->stats,
> clear);
> +		if (retval)
> +			return retval;
> +	} else if (stats != NULL)
> +		memset(&stats->stats, 0, sizeof(stats->stats));
> +
> +	if (stats != NULL)
> +		stats->n_pkts_dropped_by_ah = port-
> >n_pkts_dropped_by_ah;
> +
> +	if (clear != 0)
> +		port->n_pkts_dropped_by_ah = 0;
> +
> +	return 0;
> +}
> +
> +int rte_pipeline_port_out_stats_read(struct rte_pipeline *p, uint32_t
> port_id,
> +	struct rte_pipeline_port_out_stats *stats, int clear)
> +{
> +	struct rte_port_out *port;
> +	int retval;
> +
> +	if (p == NULL) {
> +		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
> __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (port_id >= p->num_ports_out) {
> +		RTE_LOG(ERR, PIPELINE,
> +			"%s: port OUT ID %u is out of range\n", __func__,
> port_id);
> +		return -EINVAL;
> +	}
> +
> +	port = &p->ports_out[port_id];
> +	if (port->ops.f_stats != NULL) {
> +		retval = port->ops.f_stats(port->h_port, &stats->stats,
> clear);
> +		if (retval != 0)
> +			return retval;
> +	} else if (stats != NULL)
> +		memset(&stats->stats, 0, sizeof(stats->stats));
> +
> +	if (stats != NULL)
> +		stats->n_pkts_dropped_by_ah = port-
> >n_pkts_dropped_by_ah;
> +
> +	if (clear != 0)
> +		port->n_pkts_dropped_by_ah = 0;
> +
> +	return 0;
> +}
> +
> +int rte_pipeline_table_stats_read(struct rte_pipeline *p, uint32_t table_id,
> +	struct rte_pipeline_table_stats *stats, int clear)
> +{
> +	struct rte_table *table;
> +	int retval;
> +
> +	if (p == NULL) {
> +		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
> +			__func__);
> +		return -EINVAL;
> +	}
> +
> +	if (table_id >= p->num_tables) {
> +		RTE_LOG(ERR, PIPELINE,
> +				"%s: table %u is out of range\n", __func__,
> table_id);
> +		return -EINVAL;
> +	}
> +
> +	table = &p->tables[table_id];
> +	if (table->ops.f_stats != NULL) {
> +		retval = table->ops.f_stats(table->h_table, &stats->stats,
> clear);
> +		if (retval != 0)
> +			return retval;
> +	} else if (stats != NULL)
> +		memset(&stats->stats, 0, sizeof(stats->stats));
> +
> +	if (stats != NULL) {
> +		stats->n_pkts_dropped_by_lkp_hit_ah =
> +			table->n_pkts_dropped_by_lkp_hit_ah;
> +		stats->n_pkts_dropped_by_lkp_miss_ah =
> +			table->n_pkts_dropped_by_lkp_miss_ah;
> +		stats->n_pkts_dropped_lkp_hit = table-
> >n_pkts_dropped_lkp_hit;
> +		stats->n_pkts_dropped_lkp_miss = table-
> >n_pkts_dropped_lkp_miss;
> +	}
> +
> +	if (clear != 0) {
> +		table->n_pkts_dropped_by_lkp_hit_ah = 0;
> +		table->n_pkts_dropped_by_lkp_miss_ah = 0;
> +		table->n_pkts_dropped_lkp_hit = 0;
> +		table->n_pkts_dropped_lkp_miss = 0;
>  	}
> 
>  	return 0;
>  }
> +
> diff --git a/lib/librte_pipeline/rte_pipeline.h
> b/lib/librte_pipeline/rte_pipeline.h
> index fb1014a..a7757c6 100644
> --- a/lib/librte_pipeline/rte_pipeline.h
> +++ b/lib/librte_pipeline/rte_pipeline.h
> @@ -111,6 +111,45 @@ struct rte_pipeline_params {
>  	uint32_t offset_port_id;
>  };
> 
> +/** Pipeline port in stats. */
> +struct rte_pipeline_port_in_stats {
> +	/** Port in stats. */
> +	struct rte_port_in_stats stats;
> +
> +	/** Number of packets dropped by action handler. */
> +	uint64_t n_pkts_dropped_by_ah;
> +
> +};
> +
> +/** Pipeline port out stats. */
> +struct rte_pipeline_port_out_stats {
> +	/** Port out stats. */
> +	struct rte_port_out_stats stats;
> +
> +	/** Number of packets dropped by action handler. */
> +	uint64_t n_pkts_dropped_by_ah;
> +};
> +
> +/** Pipeline table stats. */
> +struct rte_pipeline_table_stats {
> +	/** Table stats. */
> +	struct rte_table_stats stats;
> +
> +	/** Number of packets dropped by lookup hit action handler. */
> +	uint64_t n_pkts_dropped_by_lkp_hit_ah;
> +
> +	/** Number of packets dropped by lookup miss action handler. */
> +	uint64_t n_pkts_dropped_by_lkp_miss_ah;
> +
> +	/** Number of packets dropped by pipeline in behalf of this table
> based on
> +	 * on action specified in table entry. */
> +	uint64_t n_pkts_dropped_lkp_hit;
> +
> +	/** Number of packets dropped by pipeline in behalf of this table
> based on
> +	 * on action specified in table entry. */
> +	uint64_t n_pkts_dropped_lkp_miss;
> +};
> +
>  /**
>   * Pipeline create
>   *
> @@ -425,6 +464,26 @@ int rte_pipeline_table_entry_delete(struct
> rte_pipeline *p,
>  	int *key_found,
>  	struct rte_pipeline_table_entry *entry);
> 
> +/**
> + * Read pipeline table stats.
> + *
> + * This function reads table statistics identified by *table_id* of given
> + * pipeline *p*.
> + *
> + * @param p
> + *   Handle to pipeline instance.
> + * @param table_id
> + *   Port ID what stats will be returned.
> + * @param stats
> + *   Statistics buffer.
> + * @param clear
> + *   If not 0 clear stats after reading.
> + * @return
> + *   0 on success, error code otherwise
> + */
> +int rte_pipeline_table_stats_read(struct rte_pipeline *p, uint32_t table_id,
> +	struct rte_pipeline_table_stats *stats, int clear);
> +
>  /*
>   * Port IN
>   *
> @@ -539,6 +598,26 @@ int rte_pipeline_port_in_enable(struct rte_pipeline
> *p,
>  int rte_pipeline_port_in_disable(struct rte_pipeline *p,
>  	uint32_t port_id);
> 
> +/**
> + * Read pipeline port in stats.
> + *
> + * This function reads port in statistics identified by *port_id* of given
> + * pipeline *p*.
> + *
> + * @param p
> + *   Handle to pipeline instance.
> + * @param port_id
> + *   Port ID what stats will be returned.
> + * @param stats
> + *   Statistics buffer.
> + * @param clear
> + *   If not 0 clear stats after reading.
> + * @return
> + *   0 on success, error code otherwise
> + */
> +int rte_pipeline_port_in_stats_read(struct rte_pipeline *p, uint32_t
> port_id,
> +	struct rte_pipeline_port_in_stats *stats, int clear);
> +
>  /*
>   * Port OUT
>   *
> @@ -657,6 +736,26 @@ int rte_pipeline_port_out_packet_insert(struct
> rte_pipeline *p,
>  	uint32_t port_id,
>  	struct rte_mbuf *pkt);
> 
> +/**
> + * Read pipeline port out stats.
> + *
> + * This function reads port out statistics identified by *port_id* of given
> + * pipeline *p*.
> + *
> + * @param p
> + *   Handle to pipeline instance.
> + * @param port_id
> + *   Port ID what stats will be returned.
> + * @param stats
> + *   Statistics buffer.
> + * @param clear
> + *   If not 0 clear stats after reading.
> + * @return
> + *   0 on success, error code otherwise
> + */
> +int rte_pipeline_port_out_stats_read(struct rte_pipeline *p, uint32_t
> port_id,
> +	struct rte_pipeline_port_out_stats *stats, int clear);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> --
> 1.7.9.5

Acked by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
  
Thomas Monjalon May 18, 2015, 11:01 a.m. UTC | #2
2015-05-05 15:11, Dumitrescu, Cristian:
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Michal Jastrzebski
> > From: Pawel Wodkowski <pawelx.wdkowski@intel.com>
> > 
> > This patch adds statistics collection for librte_pipeline.
> > Those statistics ale disabled by default during build time.
> > 
> > Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> > ---
> >  config/common_bsdapp               |    1 +
> >  config/common_linuxapp             |    1 +
[...]
> >  # Compile librte_pipeline
> >  #
> >  CONFIG_RTE_LIBRTE_PIPELINE=y
> > +CONFIG_RTE_PIPELINE_STATS_COLLECT=n
[...]
> 
> Acked by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>

Nack because of new config option.
The same problem appear for all series related to packet framework.
  
Cristian Dumitrescu May 19, 2015, 10:41 p.m. UTC | #3
Hi Thomas,

I am asking for your help to identify a technical solution for moving forward. Enabling the stats is a very important part of our Packet Framework enhancements, as it is a dependency for a set of patches we plan to submit next.

1. What is the technical solution to avoid performance loss due to stats support?
Generally, I would agree with you that config options should be avoided, especially those that alter the API (function prototypes, data structure layouts, etc). Please note this is not the case for any of our patches, as only the stats collection is enabled/disabled, while the data structures and functions are not changed by the build time option.

But what can you do for statistics? We all know that collecting the stats sucks up CPU cycles, especially due to memory accesses, so stats always have a run-time cost. Traditionally, stats are typically enabled for debugging purposes and then turned off for the release version when performance is required. How can this be done if build time flags are not used?

Enabling stats conditionally at run-time has a significant cost potentially, as this involves adding branches to the code; this is particularly bad due to branches being added in the library code, which is not aware of the overhead of the application running on top of it on the same CPU core. We are battling cycles very hard in the Packet Framework, trying to avoid any waste, and the cost of branch misprediction of ~15 cycles is prohibitive when your packet budget (for the overall app, not just the library) is less than ~200 cycles. Sometimes, branches are predicted correctly by the CPU, some other times not, for example when the application code adds a lot of additional branches (which is the typical case for an app). This is outside of the control of the library.

I would not recommend the run-time conditional option for stats support in DPDK, where performance is key. The only option that looks feasible to me to avoid performance impact is well-behaved build time option (i.e. those that does not change the API functions and data structures).


 2. You already agreed that having build time options for performance reasons is fine, why change now?
We had a previous email thread on QoS stats (http://www.dpdk.org/ml/archives/dev/2015-February/013820.html ), and you stated:

>We must remove and avoid build-time options.
>The only ones which might be acceptable are the ones which allow more
>performance by disabling some features.

I think stats support is the perfect example where this is applicable. We took this as the guidance from you, and we worked out these stats exactly as you recommended. Let me also mention that the format of the API functions and data structures for stats were also designed to match exactly the requirements from this discussion (e.g. clear on read approach, etc). Our functions match exactly the strategy that Stephen and myself agreed on at that time. Our work was done under this agreement.


3. There are other libraries that already apply the build time option for stats support
Other DPDK libraries provide stats based on build time configuration: Ethdev, QoS scheduler, IP fragmentation. Are you suggesting we need to remove the build time stats option for these libraries, too? If yes, what about the performance impact, which would be prohibitive (I can definitely testify this for QOS scheduler, where every cycle counts). This would result in choosing between functionality (stats support) vs. performance, and it might be that for some of these libraries the decision will be to remove the stats support altogether in order to keep the performance unchanged.

I think we should encourage people to add stats to more libraries, and if we forbid build-time option for stats support we send a strong signal to discourage people from adding stats support to the applicable DPDK libraries.


4. Coding guidelines
Is there a rule on "build time options are no longer allowed in DPDK" published somewhere in our documentation? Is it part of the coding standard? Am I wrong to say this is not the case at this point?
If we intend to have such a rule, then I think it needs to be discussed on this list  and published, to avoid any misinterpretation (leading to rework and debates). It should be published together with all the acceptable exceptions that are part of any rule definition. I personally think the stats support should be such an exception.

Thanks for your help!

Regards,
Cristian


> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Monday, May 18, 2015 12:02 PM
> To: Wodkowski, PawelX
> Cc: dev@dpdk.org; Dumitrescu, Cristian; Jastrzebski, MichalX K
> Subject: Re: [dpdk-dev] [PATCH v2] pipeline: add statistics for librte_pipeline
> ports and tables
> 
> 2015-05-05 15:11, Dumitrescu, Cristian:
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Michal Jastrzebski
> > > From: Pawel Wodkowski <pawelx.wdkowski@intel.com>
> > >
> > > This patch adds statistics collection for librte_pipeline.
> > > Those statistics ale disabled by default during build time.
> > >
> > > Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> > > ---
> > >  config/common_bsdapp               |    1 +
> > >  config/common_linuxapp             |    1 +
> [...]
> > >  # Compile librte_pipeline
> > >  #
> > >  CONFIG_RTE_LIBRTE_PIPELINE=y
> > > +CONFIG_RTE_PIPELINE_STATS_COLLECT=n
> [...]
> >
> > Acked by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> 
> Nack because of new config option.
> The same problem appear for all series related to packet framework.
  
Thomas Monjalon May 20, 2015, 12:06 a.m. UTC | #4
Hi Cristian,

Thanks for the detailed explanation.

You are raising a trade-off problem about feature/maintenance/performance.
I think we must choose among 4 solutions:
	1/ always enabled
	2/ build-time log level
	3/ run-time option
	4/ build-time option

It's good to have this discussion and let other contributors giving their
opinion.

2015-05-19 22:41, Dumitrescu, Cristian:
> Hi Thomas,
> 
> I am asking for your help to identify a technical solution for moving
> forward. Enabling the stats is a very important part of our Packet
> Framework enhancements, as it is a dependency for a set of patches
> we plan to submit next.
> 
> 1. What is the technical solution to avoid performance loss due to stats
> support?
> Generally, I would agree with you that config options should be avoided,
> especially those that alter the API (function prototypes, data structure
> layouts, etc). Please note this is not the case for any of our patches,
> as only the stats collection is enabled/disabled, while the data
> structures and functions are not changed by the build time option.
> 
> But what can you do for statistics? We all know that collecting the stats
> sucks up CPU cycles, especially due to memory accesses, so stats always
> have a run-time cost. Traditionally, stats are typically enabled for
> debugging purposes and then turned off for the release version when
> performance is required. How can this be done if build time flags are not
> used?

Statistics in drivers are always enabled (first solution).
If those statistics are only used for debugging, why not using the build-time
option CONFIG_RTE_LOG_LEVEL? (second solution)

> Enabling stats conditionally at run-time has a significant cost
> potentially, as this involves adding branches to the code; this is

Yes, a run-time option (similar to run-time log level) will cost only
an "unlikely if" branching. (third solution)
 
> particularly bad due to branches being added in the library code,
> which is not aware of the overhead of the application running on top of
> it on the same CPU core. We are battling cycles very hard in the
> Packet Framework, trying to avoid any waste, and the cost of branch
> misprediction of ~15 cycles is prohibitive when your packet budget
> (for the overall app, not just the library) is less than ~200 cycles.

This kind of issue applies to many areas of DPDK.

> Sometimes, branches are predicted correctly by the CPU, some other
> times not, for example when the application code adds a lot of additional
> branches (which is the typical case for an app). This is outside of the
> control of the library.
> 
> I would not recommend the run-time conditional option for stats support
> in DPDK, where performance is key. The only option that looks feasible
> to me to avoid performance impact is well-behaved build time option
> (i.e. those that does not change the API functions and data structures).

The compile-time option makes testing coverage really complex.
(fourth solution)

>  2. You already agreed that having build time options for performance
>  reasons is fine, why change now?

Right, build time options are tolerated in DPDK because it is a performance
oriented project. We also have to avoid 2 issues:
- increasing number of compile-time options hide some bugs
- binary distributions of DPDK cannot use these compile-time "features"

> We had a previous email thread on QoS stats (http://www.dpdk.org/ml/archives/dev/2015-February/013820.html),
> and you stated:
> 
> >We must remove and avoid build-time options.
> >The only ones which might be acceptable are the ones which allow more
> >performance by disabling some features.
> 
> I think stats support is the perfect example where this is applicable.
> We took this as the guidance from you, and we worked out these stats
> exactly as you recommended. Let me also mention that the format of the
> API functions and data structures for stats were also designed to match
> exactly the requirements from this discussion (e.g. clear on read approach,
> etc). Our functions match exactly the strategy that Stephen and myself
> agreed on at that time. Our work was done under this agreement.
> 
> 
> 3. There are other libraries that already apply the build time option for
> stats support

That's why I suggested to remove this kind of option in the other libraries.

> Other DPDK libraries provide stats based on build time configuration:
> Ethdev, QoS scheduler, IP fragmentation.

ethdev? Are you referring to RTE_ETHDEV_QUEUE_STAT_CNTRS which is a max?

> Are you suggesting we need to remove the build time stats option for
> these libraries, too?

Yes for CONFIG_RTE_LIBRTE_IP_FRAG_TBL_STAT and CONFIG_RTE_SCHED_COLLECT_STATS,
but it's open to comments from others.

> If yes, what about the performance impact, which would be prohibitive
> (I can definitely testify this for QOS scheduler, where every cycle counts).
> This would result in choosing between functionality (stats support) vs.
> performance, and it might be that for some of these libraries the decision
> will be to remove the stats support altogether in order to keep the
> performance unchanged.

Note that distributions must take this decision when packaging DPDK:
stats or extra performance?
The decision is easier if those options are clearly documented as debug
facilities. But in this case, using only 1 option (log level) would make
maintenance easier (counterpart is enabling all or none stats).

> I think we should encourage people to add stats to more libraries, and
> if we forbid build-time option for stats support we send a strong signal
> to discourage people from adding stats support to the applicable DPDK
> libraries.

That's a simple fifth solution: removing statistics from these libraries.
But I feel it's a bad idea.

> 4. Coding guidelines
> Is there a rule on "build time options are no longer allowed in DPDK"
> published somewhere in our documentation? Is it part of the coding standard?
> Am I wrong to say this is not the case at this point?

No, no and no you are not wrrong;)

> If we intend to have such a rule, then I think it needs to be discussed
> on this list  and published, to avoid any misinterpretation (leading to
> rework and debates). It should be published together with all the
> acceptable exceptions that are part of any rule definition.

You are totally right. We must start writing this kind of document.

> I personally think the stats support should be such an exception.

That's your opinion and we have to carefully read opinion from others.

> Thanks for your help!

Thanks for the contribution, Cristian.


> > -----Original Message-----
> > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > Sent: Monday, May 18, 2015 12:02 PM
> > To: Wodkowski, PawelX
> > Cc: dev@dpdk.org; Dumitrescu, Cristian; Jastrzebski, MichalX K
> > Subject: Re: [dpdk-dev] [PATCH v2] pipeline: add statistics for librte_pipeline
> > ports and tables
> > 
> > 2015-05-05 15:11, Dumitrescu, Cristian:
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Michal Jastrzebski
> > > > From: Pawel Wodkowski <pawelx.wdkowski@intel.com>
> > > >
> > > > This patch adds statistics collection for librte_pipeline.
> > > > Those statistics ale disabled by default during build time.
> > > >
> > > > Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> > > > ---
> > > >  config/common_bsdapp               |    1 +
> > > >  config/common_linuxapp             |    1 +
> > [...]
> > > >  # Compile librte_pipeline
> > > >  #
> > > >  CONFIG_RTE_LIBRTE_PIPELINE=y
> > > > +CONFIG_RTE_PIPELINE_STATS_COLLECT=n
> > [...]
> > >
> > > Acked by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> > 
> > Nack because of new config option.
> > The same problem appear for all series related to packet framework.
  
Cristian Dumitrescu May 20, 2015, 1:57 p.m. UTC | #5
Hi Thomas,

Can you please describe what solution 2 on your list (build-time log level) consists of?

I see log level useful for printing messages when an event takes place, but this is not what these stats patches are about. We want to poll for those counters on demand: if the build-time flag is off, then the value of those counters is 0; when the build-time is on, then the stats counters have the real value. Basically, the build-time flag only enables/disables the update of the counters at run-time, which is where CPU cycles are consumed. I am not sure how the log levels can help here?

Regards,
Cristian

> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Wednesday, May 20, 2015 1:06 AM
> To: Dumitrescu, Cristian
> Cc: Wodkowski, PawelX; dev@dpdk.org; Jastrzebski, MichalX K
> Subject: Re: [dpdk-dev] [PATCH v2] pipeline: add statistics for librte_pipeline
> ports and tables
> 
> Hi Cristian,
> 
> Thanks for the detailed explanation.
> 
> You are raising a trade-off problem about
> feature/maintenance/performance.
> I think we must choose among 4 solutions:
> 	1/ always enabled
> 	2/ build-time log level
> 	3/ run-time option
> 	4/ build-time option
> 
> It's good to have this discussion and let other contributors giving their
> opinion.
> 
> 2015-05-19 22:41, Dumitrescu, Cristian:
> > Hi Thomas,
> >
> > I am asking for your help to identify a technical solution for moving
> > forward. Enabling the stats is a very important part of our Packet
> > Framework enhancements, as it is a dependency for a set of patches
> > we plan to submit next.
> >
> > 1. What is the technical solution to avoid performance loss due to stats
> > support?
> > Generally, I would agree with you that config options should be avoided,
> > especially those that alter the API (function prototypes, data structure
> > layouts, etc). Please note this is not the case for any of our patches,
> > as only the stats collection is enabled/disabled, while the data
> > structures and functions are not changed by the build time option.
> >
> > But what can you do for statistics? We all know that collecting the stats
> > sucks up CPU cycles, especially due to memory accesses, so stats always
> > have a run-time cost. Traditionally, stats are typically enabled for
> > debugging purposes and then turned off for the release version when
> > performance is required. How can this be done if build time flags are not
> > used?
> 
> Statistics in drivers are always enabled (first solution).
> If those statistics are only used for debugging, why not using the build-time
> option CONFIG_RTE_LOG_LEVEL? (second solution)
> 
> > Enabling stats conditionally at run-time has a significant cost
> > potentially, as this involves adding branches to the code; this is
> 
> Yes, a run-time option (similar to run-time log level) will cost only
> an "unlikely if" branching. (third solution)
> 
> > particularly bad due to branches being added in the library code,
> > which is not aware of the overhead of the application running on top of
> > it on the same CPU core. We are battling cycles very hard in the
> > Packet Framework, trying to avoid any waste, and the cost of branch
> > misprediction of ~15 cycles is prohibitive when your packet budget
> > (for the overall app, not just the library) is less than ~200 cycles.
> 
> This kind of issue applies to many areas of DPDK.
> 
> > Sometimes, branches are predicted correctly by the CPU, some other
> > times not, for example when the application code adds a lot of additional
> > branches (which is the typical case for an app). This is outside of the
> > control of the library.
> >
> > I would not recommend the run-time conditional option for stats support
> > in DPDK, where performance is key. The only option that looks feasible
> > to me to avoid performance impact is well-behaved build time option
> > (i.e. those that does not change the API functions and data structures).
> 
> The compile-time option makes testing coverage really complex.
> (fourth solution)
> 
> >  2. You already agreed that having build time options for performance
> >  reasons is fine, why change now?
> 
> Right, build time options are tolerated in DPDK because it is a performance
> oriented project. We also have to avoid 2 issues:
> - increasing number of compile-time options hide some bugs
> - binary distributions of DPDK cannot use these compile-time "features"
> 
> > We had a previous email thread on QoS stats
> (http://www.dpdk.org/ml/archives/dev/2015-February/013820.html),
> > and you stated:
> >
> > >We must remove and avoid build-time options.
> > >The only ones which might be acceptable are the ones which allow more
> > >performance by disabling some features.
> >
> > I think stats support is the perfect example where this is applicable.
> > We took this as the guidance from you, and we worked out these stats
> > exactly as you recommended. Let me also mention that the format of the
> > API functions and data structures for stats were also designed to match
> > exactly the requirements from this discussion (e.g. clear on read approach,
> > etc). Our functions match exactly the strategy that Stephen and myself
> > agreed on at that time. Our work was done under this agreement.
> >
> >
> > 3. There are other libraries that already apply the build time option for
> > stats support
> 
> That's why I suggested to remove this kind of option in the other libraries.
> 
> > Other DPDK libraries provide stats based on build time configuration:
> > Ethdev, QoS scheduler, IP fragmentation.
> 
> ethdev? Are you referring to RTE_ETHDEV_QUEUE_STAT_CNTRS which is a
> max?
> 
> > Are you suggesting we need to remove the build time stats option for
> > these libraries, too?
> 
> Yes for CONFIG_RTE_LIBRTE_IP_FRAG_TBL_STAT and
> CONFIG_RTE_SCHED_COLLECT_STATS,
> but it's open to comments from others.
> 
> > If yes, what about the performance impact, which would be prohibitive
> > (I can definitely testify this for QOS scheduler, where every cycle counts).
> > This would result in choosing between functionality (stats support) vs.
> > performance, and it might be that for some of these libraries the decision
> > will be to remove the stats support altogether in order to keep the
> > performance unchanged.
> 
> Note that distributions must take this decision when packaging DPDK:
> stats or extra performance?
> The decision is easier if those options are clearly documented as debug
> facilities. But in this case, using only 1 option (log level) would make
> maintenance easier (counterpart is enabling all or none stats).
> 
> > I think we should encourage people to add stats to more libraries, and
> > if we forbid build-time option for stats support we send a strong signal
> > to discourage people from adding stats support to the applicable DPDK
> > libraries.
> 
> That's a simple fifth solution: removing statistics from these libraries.
> But I feel it's a bad idea.
> 
> > 4. Coding guidelines
> > Is there a rule on "build time options are no longer allowed in DPDK"
> > published somewhere in our documentation? Is it part of the coding
> standard?
> > Am I wrong to say this is not the case at this point?
> 
> No, no and no you are not wrrong;)
> 
> > If we intend to have such a rule, then I think it needs to be discussed
> > on this list  and published, to avoid any misinterpretation (leading to
> > rework and debates). It should be published together with all the
> > acceptable exceptions that are part of any rule definition.
> 
> You are totally right. We must start writing this kind of document.
> 
> > I personally think the stats support should be such an exception.
> 
> That's your opinion and we have to carefully read opinion from others.
> 
> > Thanks for your help!
> 
> Thanks for the contribution, Cristian.
> 
> 
> > > -----Original Message-----
> > > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > Sent: Monday, May 18, 2015 12:02 PM
> > > To: Wodkowski, PawelX
> > > Cc: dev@dpdk.org; Dumitrescu, Cristian; Jastrzebski, MichalX K
> > > Subject: Re: [dpdk-dev] [PATCH v2] pipeline: add statistics for
> librte_pipeline
> > > ports and tables
> > >
> > > 2015-05-05 15:11, Dumitrescu, Cristian:
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Michal
> Jastrzebski
> > > > > From: Pawel Wodkowski <pawelx.wdkowski@intel.com>
> > > > >
> > > > > This patch adds statistics collection for librte_pipeline.
> > > > > Those statistics ale disabled by default during build time.
> > > > >
> > > > > Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> > > > > ---
> > > > >  config/common_bsdapp               |    1 +
> > > > >  config/common_linuxapp             |    1 +
> > > [...]
> > > > >  # Compile librte_pipeline
> > > > >  #
> > > > >  CONFIG_RTE_LIBRTE_PIPELINE=y
> > > > > +CONFIG_RTE_PIPELINE_STATS_COLLECT=n
> > > [...]
> > > >
> > > > Acked by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> > >
> > > Nack because of new config option.
> > > The same problem appear for all series related to packet framework.
>
  
Thomas Monjalon May 20, 2015, 2:44 p.m. UTC | #6
Please Cristian, do not top post.
I'm replacing your comment in the right context.

2015-05-20 13:57, Dumitrescu, Cristian:
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > Thanks for the detailed explanation.
> > 
> > You are raising a trade-off problem about
> > feature/maintenance/performance.
> > I think we must choose among 4 solutions:
> > 	1/ always enabled
> > 	2/ build-time log level
> > 	3/ run-time option
> > 	4/ build-time option
> > 
> > It's good to have this discussion and let other contributors giving their
> > opinion.
> > 
> > 2015-05-19 22:41, Dumitrescu, Cristian:
> > > 1. What is the technical solution to avoid performance loss due to stats
> > > support?
> > > Generally, I would agree with you that config options should be avoided,
> > > especially those that alter the API (function prototypes, data structure
> > > layouts, etc). Please note this is not the case for any of our patches,
> > > as only the stats collection is enabled/disabled, while the data
> > > structures and functions are not changed by the build time option.
> > >
> > > But what can you do for statistics? We all know that collecting the stats
> > > sucks up CPU cycles, especially due to memory accesses, so stats always
> > > have a run-time cost. Traditionally, stats are typically enabled for
> > > debugging purposes and then turned off for the release version when
> > > performance is required. How can this be done if build time flags are not
> > > used?
> > 
> > Statistics in drivers are always enabled (first solution).
> > If those statistics are only used for debugging, why not using the build-time
> > option CONFIG_RTE_LOG_LEVEL? (second solution)
> 
> Can you please describe what solution 2 on your list (build-time log
> level) consists of?
> 
> I see log level useful for printing messages when an event takes place,
> but this is not what these stats patches are about. We want to poll
> for those counters on demand: if the build-time flag is off, then the
> value of those counters is 0; when the build-time is on, then the stats
> counters have the real value. Basically, the build-time flag only
> enables/disables the update of the counters at run-time, which is where
> CPU cycles are consumed. I am not sure how the log levels can help here?

I think that counting stats is a kind of logging.
Some stats are always counted (see drivers) and you want to use these ones
only for debugging (after rebuilding DPDK with some debug options).
So I suggest, as second solution, to check CONFIG_RTE_LOG_LEVEL is at debug
level instead of having one option per module.
It would be implemented with "#if RTE_LOG_LEVEL == RTE_LOG_DEBUG" in
RTE_PIPELINE_STATS_ADD.
  
Stephen Hemminger May 20, 2015, 5:59 p.m. UTC | #7
On Wed, 20 May 2015 16:44:35 +0200
Thomas Monjalon <thomas.monjalon@6wind.com> wrote:

> Please Cristian, do not top post.
> I'm replacing your comment in the right context.
> 
> 2015-05-20 13:57, Dumitrescu, Cristian:
> > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > Thanks for the detailed explanation.
> > > 
> > > You are raising a trade-off problem about
> > > feature/maintenance/performance.
> > > I think we must choose among 4 solutions:
> > > 	1/ always enabled
> > > 	2/ build-time log level
> > > 	3/ run-time option
> > > 	4/ build-time option
> > > 
> > > It's good to have this discussion and let other contributors giving their
> > > opinion.
> > > 
> > > 2015-05-19 22:41, Dumitrescu, Cristian:
> > > > 1. What is the technical solution to avoid performance loss due to stats
> > > > support?
> > > > Generally, I would agree with you that config options should be avoided,
> > > > especially those that alter the API (function prototypes, data structure
> > > > layouts, etc). Please note this is not the case for any of our patches,
> > > > as only the stats collection is enabled/disabled, while the data
> > > > structures and functions are not changed by the build time option.
> > > >
> > > > But what can you do for statistics? We all know that collecting the stats
> > > > sucks up CPU cycles, especially due to memory accesses, so stats always
> > > > have a run-time cost. Traditionally, stats are typically enabled for
> > > > debugging purposes and then turned off for the release version when
> > > > performance is required. How can this be done if build time flags are not
> > > > used?
> > > 
> > > Statistics in drivers are always enabled (first solution).
> > > If those statistics are only used for debugging, why not using the build-time
> > > option CONFIG_RTE_LOG_LEVEL? (second solution)
> > 
> > Can you please describe what solution 2 on your list (build-time log
> > level) consists of?
> > 
> > I see log level useful for printing messages when an event takes place,
> > but this is not what these stats patches are about. We want to poll
> > for those counters on demand: if the build-time flag is off, then the
> > value of those counters is 0; when the build-time is on, then the stats
> > counters have the real value. Basically, the build-time flag only
> > enables/disables the update of the counters at run-time, which is where
> > CPU cycles are consumed. I am not sure how the log levels can help here?
> 
> I think that counting stats is a kind of logging.
> Some stats are always counted (see drivers) and you want to use these ones
> only for debugging (after rebuilding DPDK with some debug options).
> So I suggest, as second solution, to check CONFIG_RTE_LOG_LEVEL is at debug
> level instead of having one option per module.
> It would be implemented with "#if RTE_LOG_LEVEL == RTE_LOG_DEBUG" in
> RTE_PIPELINE_STATS_ADD.
> 

In my experience, per-queue or per-cpu statistics have no visible performance
impact. And every real world deployment wants statistics
  
Thomas Monjalon May 20, 2015, 10:01 p.m. UTC | #8
2015-05-20 10:59, Stephen Hemminger:
> On Wed, 20 May 2015 16:44:35 +0200
> Thomas Monjalon <thomas.monjalon@6wind.com> wrote:
> 
> > Please Cristian, do not top post.
> > I'm replacing your comment in the right context.
> > 
> > 2015-05-20 13:57, Dumitrescu, Cristian:
> > > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > > Thanks for the detailed explanation.
> > > > 
> > > > You are raising a trade-off problem about
> > > > feature/maintenance/performance.
> > > > I think we must choose among 4 solutions:
> > > > 	1/ always enabled
> > > > 	2/ build-time log level
> > > > 	3/ run-time option
> > > > 	4/ build-time option
> > > > 
> > > > It's good to have this discussion and let other contributors giving their
> > > > opinion.
> > > > 
> > > > 2015-05-19 22:41, Dumitrescu, Cristian:
> > > > > 1. What is the technical solution to avoid performance loss due to stats
> > > > > support?
> > > > > Generally, I would agree with you that config options should be avoided,
> > > > > especially those that alter the API (function prototypes, data structure
> > > > > layouts, etc). Please note this is not the case for any of our patches,
> > > > > as only the stats collection is enabled/disabled, while the data
> > > > > structures and functions are not changed by the build time option.
> > > > >
> > > > > But what can you do for statistics? We all know that collecting the stats
> > > > > sucks up CPU cycles, especially due to memory accesses, so stats always
> > > > > have a run-time cost. Traditionally, stats are typically enabled for
> > > > > debugging purposes and then turned off for the release version when
> > > > > performance is required. How can this be done if build time flags are not
> > > > > used?
> > > > 
> > > > Statistics in drivers are always enabled (first solution).
> > > > If those statistics are only used for debugging, why not using the build-time
> > > > option CONFIG_RTE_LOG_LEVEL? (second solution)
> > > 
> > > Can you please describe what solution 2 on your list (build-time log
> > > level) consists of?
> > > 
> > > I see log level useful for printing messages when an event takes place,
> > > but this is not what these stats patches are about. We want to poll
> > > for those counters on demand: if the build-time flag is off, then the
> > > value of those counters is 0; when the build-time is on, then the stats
> > > counters have the real value. Basically, the build-time flag only
> > > enables/disables the update of the counters at run-time, which is where
> > > CPU cycles are consumed. I am not sure how the log levels can help here?
> > 
> > I think that counting stats is a kind of logging.
> > Some stats are always counted (see drivers) and you want to use these ones
> > only for debugging (after rebuilding DPDK with some debug options).
> > So I suggest, as second solution, to check CONFIG_RTE_LOG_LEVEL is at debug
> > level instead of having one option per module.
> > It would be implemented with "#if RTE_LOG_LEVEL == RTE_LOG_DEBUG" in
> > RTE_PIPELINE_STATS_ADD.
> > 
> 
> In my experience, per-queue or per-cpu statistics have no visible performance
> impact. And every real world deployment wants statistics

Yes. Maybe that having a benchmark with per-cpu stats would help to discuss
the first solution (stats always enabled).
  
Cristian Dumitrescu May 20, 2015, 11:41 p.m. UTC | #9
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Wednesday, May 20, 2015 3:45 PM
> To: Dumitrescu, Cristian
> Cc: Wodkowski, PawelX; dev@dpdk.org; Jastrzebski, MichalX K
> Subject: Re: [dpdk-dev] [PATCH v2] pipeline: add statistics for librte_pipeline
> ports and tables
> 
> Please Cristian, do not top post.
> I'm replacing your comment in the right context.
> 
> 2015-05-20 13:57, Dumitrescu, Cristian:
> > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > Thanks for the detailed explanation.
> > >
> > > You are raising a trade-off problem about
> > > feature/maintenance/performance.
> > > I think we must choose among 4 solutions:
> > > 	1/ always enabled
> > > 	2/ build-time log level
> > > 	3/ run-time option
> > > 	4/ build-time option
> > >
> > > It's good to have this discussion and let other contributors giving their
> > > opinion.
> > >
> > > 2015-05-19 22:41, Dumitrescu, Cristian:
> > > > 1. What is the technical solution to avoid performance loss due to stats
> > > > support?
> > > > Generally, I would agree with you that config options should be
> avoided,
> > > > especially those that alter the API (function prototypes, data structure
> > > > layouts, etc). Please note this is not the case for any of our patches,
> > > > as only the stats collection is enabled/disabled, while the data
> > > > structures and functions are not changed by the build time option.
> > > >
> > > > But what can you do for statistics? We all know that collecting the stats
> > > > sucks up CPU cycles, especially due to memory accesses, so stats always
> > > > have a run-time cost. Traditionally, stats are typically enabled for
> > > > debugging purposes and then turned off for the release version when
> > > > performance is required. How can this be done if build time flags are
> not
> > > > used?
> > >
> > > Statistics in drivers are always enabled (first solution).
> > > If those statistics are only used for debugging, why not using the build-
> time
> > > option CONFIG_RTE_LOG_LEVEL? (second solution)
> >
> > Can you please describe what solution 2 on your list (build-time log
> > level) consists of?
> >
> > I see log level useful for printing messages when an event takes place,
> > but this is not what these stats patches are about. We want to poll
> > for those counters on demand: if the build-time flag is off, then the
> > value of those counters is 0; when the build-time is on, then the stats
> > counters have the real value. Basically, the build-time flag only
> > enables/disables the update of the counters at run-time, which is where
> > CPU cycles are consumed. I am not sure how the log levels can help here?
> 
> I think that counting stats is a kind of logging.
> Some stats are always counted (see drivers) and you want to use these ones
> only for debugging (after rebuilding DPDK with some debug options).
> So I suggest, as second solution, to check CONFIG_RTE_LOG_LEVEL is at
> debug
> level instead of having one option per module.
> It would be implemented with "#if RTE_LOG_LEVEL == RTE_LOG_DEBUG" in
> RTE_PIPELINE_STATS_ADD.

The problem I see with this approach is that you cannot turn off debug messages while still having the statistics collected.  We should allow people to collects the stats without getting the debug messages. How do we do this?

Although most people use the stats for debugging and then turn them off, some other people are OK with the stats related performance drop for the production code (e.g. they might want to log some stats periodically), but they want to get rid of the debug messages. 

Potential idea:
Would it be acceptable to create a new log level that is dedicated to stats? This would have to be higher priority than all the present ones, so it needs to be: #define RTE_LOG_STATS    1U. This way, we can get rid of all the debug messages while still logging stats:

#if ((RTE_LOG_STATS <= RTE_LOG_LEVEL)
#define STATS_COUNTER_ADD(level, type, counter, val) counter += val
#else
#define STATS_COUNTER_ADD(level, type, counter, val)
#endif

However, this won't allow getting the debug messages with stats collection being suppressed. But probably this case is not relevant.
  
Cristian Dumitrescu May 20, 2015, 11:56 p.m. UTC | #10
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Wednesday, May 20, 2015 11:02 PM
> To: Stephen Hemminger; Dumitrescu, Cristian
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2] pipeline: add statistics for librte_pipeline
> ports and tables
> 
> 2015-05-20 10:59, Stephen Hemminger:
> > On Wed, 20 May 2015 16:44:35 +0200
> > Thomas Monjalon <thomas.monjalon@6wind.com> wrote:
> >
> > > Please Cristian, do not top post.
> > > I'm replacing your comment in the right context.
> > >
> > > 2015-05-20 13:57, Dumitrescu, Cristian:
> > > > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > > > Thanks for the detailed explanation.
> > > > >
> > > > > You are raising a trade-off problem about
> > > > > feature/maintenance/performance.
> > > > > I think we must choose among 4 solutions:
> > > > > 	1/ always enabled
> > > > > 	2/ build-time log level
> > > > > 	3/ run-time option
> > > > > 	4/ build-time option
> > > > >
> > > > > It's good to have this discussion and let other contributors giving their
> > > > > opinion.
> > > > >
> > > > > 2015-05-19 22:41, Dumitrescu, Cristian:
> > > > > > 1. What is the technical solution to avoid performance loss due to
> stats
> > > > > > support?
> > > > > > Generally, I would agree with you that config options should be
> avoided,
> > > > > > especially those that alter the API (function prototypes, data
> structure
> > > > > > layouts, etc). Please note this is not the case for any of our patches,
> > > > > > as only the stats collection is enabled/disabled, while the data
> > > > > > structures and functions are not changed by the build time option.
> > > > > >
> > > > > > But what can you do for statistics? We all know that collecting the
> stats
> > > > > > sucks up CPU cycles, especially due to memory accesses, so stats
> always
> > > > > > have a run-time cost. Traditionally, stats are typically enabled for
> > > > > > debugging purposes and then turned off for the release version
> when
> > > > > > performance is required. How can this be done if build time flags are
> not
> > > > > > used?
> > > > >
> > > > > Statistics in drivers are always enabled (first solution).
> > > > > If those statistics are only used for debugging, why not using the
> build-time
> > > > > option CONFIG_RTE_LOG_LEVEL? (second solution)
> > > >
> > > > Can you please describe what solution 2 on your list (build-time log
> > > > level) consists of?
> > > >
> > > > I see log level useful for printing messages when an event takes place,
> > > > but this is not what these stats patches are about. We want to poll
> > > > for those counters on demand: if the build-time flag is off, then the
> > > > value of those counters is 0; when the build-time is on, then the stats
> > > > counters have the real value. Basically, the build-time flag only
> > > > enables/disables the update of the counters at run-time, which is
> where
> > > > CPU cycles are consumed. I am not sure how the log levels can help
> here?
> > >
> > > I think that counting stats is a kind of logging.
> > > Some stats are always counted (see drivers) and you want to use these
> ones
> > > only for debugging (after rebuilding DPDK with some debug options).
> > > So I suggest, as second solution, to check CONFIG_RTE_LOG_LEVEL is at
> debug
> > > level instead of having one option per module.
> > > It would be implemented with "#if RTE_LOG_LEVEL == RTE_LOG_DEBUG"
> in
> > > RTE_PIPELINE_STATS_ADD.
> > >
> >
> > In my experience, per-queue or per-cpu statistics have no visible
> performance
> > impact. And every real world deployment wants statistics
> 
> Yes. Maybe that having a benchmark with per-cpu stats would help to discuss
> the first solution (stats always enabled).

What are per-cpu stats? Are you referring to the false sharing problem when each cpu core maintains its own stats, but stats from several cores end up being stores in the same cache line? We don't have this problem here.
  
Thomas Monjalon May 21, 2015, 7:29 a.m. UTC | #11
2015-05-20 23:41, Dumitrescu, Cristian:
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > 2015-05-20 13:57, Dumitrescu, Cristian:
> > > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > > Thanks for the detailed explanation.
> > > >
> > > > You are raising a trade-off problem about
> > > > feature/maintenance/performance.
> > > > I think we must choose among 4 solutions:
> > > > 	1/ always enabled
> > > > 	2/ build-time log level
> > > > 	3/ run-time option
> > > > 	4/ build-time option
> > > >
> > > > It's good to have this discussion and let other contributors giving their
> > > > opinion.
> > > >
> > > > 2015-05-19 22:41, Dumitrescu, Cristian:
> > > > > 1. What is the technical solution to avoid performance loss due to stats
> > > > > support?
> > > > > Generally, I would agree with you that config options should be avoided,
> > > > > especially those that alter the API (function prototypes, data structure
> > > > > layouts, etc). Please note this is not the case for any of our patches,
> > > > > as only the stats collection is enabled/disabled, while the data
> > > > > structures and functions are not changed by the build time option.
> > > > >
> > > > > But what can you do for statistics? We all know that collecting the stats
> > > > > sucks up CPU cycles, especially due to memory accesses, so stats always
> > > > > have a run-time cost. Traditionally, stats are typically enabled for
> > > > > debugging purposes and then turned off for the release version when
> > > > > performance is required. How can this be done if build time flags are
> > > > > not used?
> > > >
> > > > Statistics in drivers are always enabled (first solution).
> > > > If those statistics are only used for debugging, why not using the build-
> > > > time option CONFIG_RTE_LOG_LEVEL? (second solution)
> > >
> > > Can you please describe what solution 2 on your list (build-time log
> > > level) consists of?
> > >
> > > I see log level useful for printing messages when an event takes place,
> > > but this is not what these stats patches are about. We want to poll
> > > for those counters on demand: if the build-time flag is off, then the
> > > value of those counters is 0; when the build-time is on, then the stats
> > > counters have the real value. Basically, the build-time flag only
> > > enables/disables the update of the counters at run-time, which is where
> > > CPU cycles are consumed. I am not sure how the log levels can help here?
> > 
> > I think that counting stats is a kind of logging.
> > Some stats are always counted (see drivers) and you want to use these ones
> > only for debugging (after rebuilding DPDK with some debug options).
> > So I suggest, as second solution, to check CONFIG_RTE_LOG_LEVEL is at
> > debug
> > level instead of having one option per module.
> > It would be implemented with "#if RTE_LOG_LEVEL == RTE_LOG_DEBUG" in
> > RTE_PIPELINE_STATS_ADD.
> 
> The problem I see with this approach is that you cannot turn off debug
> messages while still having the statistics collected.  We should allow
> people to collects the stats without getting the debug messages. How do
> we do this?

By setting build-time log level to debug, you would enable stats and debug
messages. Then you can adjust the log messages level with the run-time
option --log-level.
  
Cristian Dumitrescu May 21, 2015, 1:33 p.m. UTC | #12
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Thursday, May 21, 2015 8:30 AM
> To: Dumitrescu, Cristian
> Cc: Wodkowski, PawelX; dev@dpdk.org; Jastrzebski, MichalX K
> Subject: Re: [dpdk-dev] [PATCH v2] pipeline: add statistics for librte_pipeline
> ports and tables
> 
> 2015-05-20 23:41, Dumitrescu, Cristian:
> > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > 2015-05-20 13:57, Dumitrescu, Cristian:
> > > > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > > > Thanks for the detailed explanation.
> > > > >
> > > > > You are raising a trade-off problem about
> > > > > feature/maintenance/performance.
> > > > > I think we must choose among 4 solutions:
> > > > > 	1/ always enabled
> > > > > 	2/ build-time log level
> > > > > 	3/ run-time option
> > > > > 	4/ build-time option
> > > > >
> > > > > It's good to have this discussion and let other contributors giving their
> > > > > opinion.
> > > > >
> > > > > 2015-05-19 22:41, Dumitrescu, Cristian:
> > > > > > 1. What is the technical solution to avoid performance loss due to
> stats
> > > > > > support?
> > > > > > Generally, I would agree with you that config options should be
> avoided,
> > > > > > especially those that alter the API (function prototypes, data
> structure
> > > > > > layouts, etc). Please note this is not the case for any of our patches,
> > > > > > as only the stats collection is enabled/disabled, while the data
> > > > > > structures and functions are not changed by the build time option.
> > > > > >
> > > > > > But what can you do for statistics? We all know that collecting the
> stats
> > > > > > sucks up CPU cycles, especially due to memory accesses, so stats
> always
> > > > > > have a run-time cost. Traditionally, stats are typically enabled for
> > > > > > debugging purposes and then turned off for the release version
> when
> > > > > > performance is required. How can this be done if build time flags are
> > > > > > not used?
> > > > >
> > > > > Statistics in drivers are always enabled (first solution).
> > > > > If those statistics are only used for debugging, why not using the
> build-
> > > > > time option CONFIG_RTE_LOG_LEVEL? (second solution)
> > > >
> > > > Can you please describe what solution 2 on your list (build-time log
> > > > level) consists of?
> > > >
> > > > I see log level useful for printing messages when an event takes place,
> > > > but this is not what these stats patches are about. We want to poll
> > > > for those counters on demand: if the build-time flag is off, then the
> > > > value of those counters is 0; when the build-time is on, then the stats
> > > > counters have the real value. Basically, the build-time flag only
> > > > enables/disables the update of the counters at run-time, which is
> where
> > > > CPU cycles are consumed. I am not sure how the log levels can help
> here?
> > >
> > > I think that counting stats is a kind of logging.
> > > Some stats are always counted (see drivers) and you want to use these
> ones
> > > only for debugging (after rebuilding DPDK with some debug options).
> > > So I suggest, as second solution, to check CONFIG_RTE_LOG_LEVEL is at
> > > debug
> > > level instead of having one option per module.
> > > It would be implemented with "#if RTE_LOG_LEVEL == RTE_LOG_DEBUG"
> in
> > > RTE_PIPELINE_STATS_ADD.
> >
> > The problem I see with this approach is that you cannot turn off debug
> > messages while still having the statistics collected.  We should allow
> > people to collects the stats without getting the debug messages. How do
> > we do this?
> 
> By setting build-time log level to debug, you would enable stats and debug
> messages. Then you can adjust the log messages level with the run-time
> option --log-level.

This is a really clever trick! I have to say it took me some time to make sure I got it right :)
So when RTE_LOG_LEVEL (build time option) is set to DEBUG (i.e. 8), then we get both the stats and the debug messages, but then we can adjust the level of debug messages at run-time through the --log-level EAL option.
I can work with this option, thank you!

There is one more simple proposal that came to my mind late last night: how about single config file option RTE_COLLECT_STATS=y/n that should be used by all the libs across the board to indicate whether stats should be enabled or not?
This is logically very similar to the solution above, as they both look at a single option in the config file, but maybe it is also more intuitive for people.

I will go with your choice. Which one do you pick?
  
Jay Rolette May 21, 2015, 2:59 p.m. UTC | #13
On Thu, May 21, 2015 at 8:33 AM, Dumitrescu, Cristian <
cristian.dumitrescu@intel.com> wrote:

> > > The problem I see with this approach is that you cannot turn off debug
> > > messages while still having the statistics collected.  We should allow
> > > people to collects the stats without getting the debug messages. How do
> > > we do this?
> >
> > By setting build-time log level to debug, you would enable stats and
> debug
> > messages. Then you can adjust the log messages level with the run-time
> > option --log-level.
>
> This is a really clever trick! I have to say it took me some time to make
> sure I got it right :)
> So when RTE_LOG_LEVEL (build time option) is set to DEBUG (i.e. 8), then
> we get both the stats and the debug messages, but then we can adjust the
> level of debug messages at run-time through the --log-level EAL option.
> I can work with this option, thank you!
>
> There is one more simple proposal that came to my mind late last night:
> how about single config file option RTE_COLLECT_STATS=y/n that should be
> used by all the libs across the board to indicate whether stats should be
> enabled or not?
> This is logically very similar to the solution above, as they both look at
> a single option in the config file, but maybe it is also more intuitive for
> people.
>
> I will go with your choice. Which one do you pick?
>

As Stephen said previously, any real DPDK app needs stats. You can't
support network products operationally without them. What's the point of
making them optional other than trying to juice benchmarks?

Jay
  
Cristian Dumitrescu May 21, 2015, 4:15 p.m. UTC | #14
From: Jay Rolette [mailto:rolette@infiniteio.com]

Sent: Thursday, May 21, 2015 4:00 PM
To: Dumitrescu, Cristian
Cc: Thomas Monjalon; dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v2] pipeline: add statistics for librte_pipeline ports and tables


On Thu, May 21, 2015 at 8:33 AM, Dumitrescu, Cristian <cristian.dumitrescu@intel.com<mailto:cristian.dumitrescu@intel.com>> wrote:
> > The problem I see with this approach is that you cannot turn off debug

> > messages while still having the statistics collected.  We should allow

> > people to collects the stats without getting the debug messages. How do

> > we do this?

>

> By setting build-time log level to debug, you would enable stats and debug

> messages. Then you can adjust the log messages level with the run-time

> option --log-level.


This is a really clever trick! I have to say it took me some time to make sure I got it right :)
So when RTE_LOG_LEVEL (build time option) is set to DEBUG (i.e. 8), then we get both the stats and the debug messages, but then we can adjust the level of debug messages at run-time through the --log-level EAL option.
I can work with this option, thank you!

There is one more simple proposal that came to my mind late last night: how about single config file option RTE_COLLECT_STATS=y/n that should be used by all the libs across the board to indicate whether stats should be enabled or not?
This is logically very similar to the solution above, as they both look at a single option in the config file, but maybe it is also more intuitive for people.

I will go with your choice. Which one do you pick?

As Stephen said previously, any real DPDK app needs stats. You can't support network products operationally without them. What's the point of making them optional other than trying to juice benchmarks?

Jay

Hi Jay,

As explained in this thread, our strategy as a library is to keep all options open for the application: the application should be the one to decide whether the statistics collection should be enabled or not.

The library should not take application level decisions:

a)      For application A, these stats are relevant, so they should be collected;

b)      For application B, these counters are not relevant, so they should not be collected, so we allow the application to spend the CPU cycles doing some other useful work;

c)      For application C, these counters might only be relevant during debugging phase, so they should be collected during that time, while later on, when debugging is completed, their collection should be disabled.

I do not see why we should force the application to collect the stats if/when it does not need them. The library should allow the application to decide what the library should do, not the other way around.

Regards,
Cristian
  
Thomas Monjalon May 25, 2015, 10:48 a.m. UTC | #15
2015-05-21 13:33, Dumitrescu, Cristian:
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > 2015-05-20 23:41, Dumitrescu, Cristian:
> > > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > > I think that counting stats is a kind of logging.
> > > > Some stats are always counted (see drivers) and you want to use these
> > > > ones only for debugging (after rebuilding DPDK with some debug options).
> > > > So I suggest, as second solution, to check CONFIG_RTE_LOG_LEVEL is at
> > > > debug level instead of having one option per module.
> > > > It would be implemented with "#if RTE_LOG_LEVEL == RTE_LOG_DEBUG"
> > > > in RTE_PIPELINE_STATS_ADD.
> > >
> > > The problem I see with this approach is that you cannot turn off debug
> > > messages while still having the statistics collected.  We should allow
> > > people to collects the stats without getting the debug messages. How do
> > > we do this?
> > 
> > By setting build-time log level to debug, you would enable stats and debug
> > messages. Then you can adjust the log messages level with the run-time
> > option --log-level.
> 
> This is a really clever trick! I have to say it took me some time to
> make sure I got it right :)
> So when RTE_LOG_LEVEL (build time option) is set to DEBUG (i.e. 8),
> then we get both the stats and the debug messages, but then we can
> adjust the level of debug messages at run-time through the --log-level
> EAL option.
> I can work with this option, thank you!
> 
> There is one more simple proposal that came to my mind late last night:
> how about single config file option RTE_COLLECT_STATS=y/n that should
> be used by all the libs across the board to indicate whether stats
> should be enabled or not?

Stats in drivers must be always collected because they are not considered
as debug information (until now).
So I think a global option RTE_COLLECT_STATS would be confusing.

> This is logically very similar to the solution above, as they both look
> at a single option in the config file, but maybe it is also more
> intuitive for people.
> 
> I will go with your choice. Which one do you pick?

I prefer we choose on a per-lib basis which information are considered
for debug or not. But it's only my opinion.
So if nobody disagrees, I'd prefer going with RTE_LOG_LEVEL.
  

Patch

diff --git a/config/common_bsdapp b/config/common_bsdapp
index 1d0f5b2..e4f0bf5 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -414,6 +414,7 @@  CONFIG_RTE_TABLE_LPM_STATS_COLLECT=n
 # Compile librte_pipeline
 #
 CONFIG_RTE_LIBRTE_PIPELINE=y
+CONFIG_RTE_PIPELINE_STATS_COLLECT=n
 
 #
 # Compile librte_kni
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 8b01ca9..05553d9 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -421,6 +421,7 @@  CONFIG_RTE_TABLE_LPM_STATS_COLLECT=n
 # Compile librte_pipeline
 #
 CONFIG_RTE_LIBRTE_PIPELINE=y
+CONFIG_RTE_PIPELINE_STATS_COLLECT=n
 
 #
 # Compile librte_kni
diff --git a/lib/librte_pipeline/rte_pipeline.c b/lib/librte_pipeline/rte_pipeline.c
index 36d92c9..69bf003 100644
--- a/lib/librte_pipeline/rte_pipeline.c
+++ b/lib/librte_pipeline/rte_pipeline.c
@@ -48,6 +48,17 @@ 
 
 #define RTE_TABLE_INVALID                                 UINT32_MAX
 
+#ifdef RTE_PIPELINE_STATS_COLLECT
+#define RTE_PIPELINE_STATS_ADD(counter, val) \
+	({ (counter) += (val); })
+
+#define RTE_PIPELINE_STATS_ADD_M(counter, mask) \
+	({ (counter) += __builtin_popcountll(mask); })
+#else
+#define RTE_PIPELINE_STATS_ADD(counter, val)
+#define RTE_PIPELINE_STATS_ADD_M(counter, mask)
+#endif
+
 struct rte_port_in {
 	/* Input parameters */
 	struct rte_port_in_ops ops;
@@ -63,6 +74,8 @@  struct rte_port_in {
 
 	/* List of enabled ports */
 	struct rte_port_in *next;
+
+	uint64_t n_pkts_dropped_by_ah;
 };
 
 struct rte_port_out {
@@ -74,6 +87,8 @@  struct rte_port_out {
 
 	/* Handle to low-level port */
 	void *h_port;
+
+	uint64_t n_pkts_dropped_by_ah;
 };
 
 struct rte_table {
@@ -90,6 +105,12 @@  struct rte_table {
 
 	/* Handle to the low-level table object */
 	void *h_table;
+
+	/* Stats for this table. */
+	uint64_t n_pkts_dropped_by_lkp_hit_ah;
+	uint64_t n_pkts_dropped_by_lkp_miss_ah;
+	uint64_t n_pkts_dropped_lkp_hit;
+	uint64_t n_pkts_dropped_lkp_miss;
 };
 
 #define RTE_PIPELINE_MAX_NAME_SZ                           124
@@ -1040,6 +1061,8 @@  rte_pipeline_action_handler_port_bulk(struct rte_pipeline *p,
 
 		port_out->f_action_bulk(p->pkts, &pkts_mask, port_out->arg_ah);
 		p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= pkts_mask ^  mask;
+		RTE_PIPELINE_STATS_ADD_M(port_out->n_pkts_dropped_by_ah,
+				pkts_mask ^  mask);
 	}
 
 	/* Output port TX */
@@ -1071,6 +1094,9 @@  rte_pipeline_action_handler_port(struct rte_pipeline *p, uint64_t pkts_mask)
 				p->action_mask0[RTE_PIPELINE_ACTION_DROP] |=
 					(pkt_mask ^ 1LLU) << i;
 
+				RTE_PIPELINE_STATS_ADD(port_out->n_pkts_dropped_by_ah,
+						pkt_mask ^ 1LLU);
+
 				/* Output port TX */
 				if (pkt_mask != 0)
 					port_out->ops.f_tx(port_out->h_port,
@@ -1104,6 +1130,9 @@  rte_pipeline_action_handler_port(struct rte_pipeline *p, uint64_t pkts_mask)
 				p->action_mask0[RTE_PIPELINE_ACTION_DROP] |=
 					(pkt_mask ^ 1LLU) << i;
 
+				RTE_PIPELINE_STATS_ADD(port_out->n_pkts_dropped_by_ah,
+						pkt_mask ^ 1LLU);
+
 				/* Output port TX */
 				if (pkt_mask != 0)
 					port_out->ops.f_tx(port_out->h_port,
@@ -1140,6 +1169,9 @@  rte_pipeline_action_handler_port_meta(struct rte_pipeline *p,
 				p->action_mask0[RTE_PIPELINE_ACTION_DROP] |=
 					(pkt_mask ^ 1LLU) << i;
 
+				RTE_PIPELINE_STATS_ADD(port_out->n_pkts_dropped_by_ah,
+						pkt_mask ^ 1ULL);
+
 				/* Output port TX */
 				if (pkt_mask != 0)
 					port_out->ops.f_tx(port_out->h_port,
@@ -1174,6 +1206,9 @@  rte_pipeline_action_handler_port_meta(struct rte_pipeline *p,
 				p->action_mask0[RTE_PIPELINE_ACTION_DROP] |=
 					(pkt_mask ^ 1LLU) << i;
 
+				RTE_PIPELINE_STATS_ADD(port_out->n_pkts_dropped_by_ah,
+						pkt_mask ^ 1ULL);
+
 				/* Output port TX */
 				if (pkt_mask != 0)
 					port_out->ops.f_tx(port_out->h_port,
@@ -1232,10 +1267,10 @@  rte_pipeline_run(struct rte_pipeline *p)
 		if (port_in->f_action != NULL) {
 			uint64_t mask = pkts_mask;
 
-			port_in->f_action(p->pkts, n_pkts, &pkts_mask,
-				port_in->arg_ah);
-			p->action_mask0[RTE_PIPELINE_ACTION_DROP] |=
-				pkts_mask ^ mask;
+			port_in->f_action(p->pkts, n_pkts, &pkts_mask, port_in->arg_ah);
+			mask ^= pkts_mask;
+			p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= mask;
+			RTE_PIPELINE_STATS_ADD_M(port_in->n_pkts_dropped_by_ah, mask);
 		}
 
 		/* Table */
@@ -1261,9 +1296,10 @@  rte_pipeline_run(struct rte_pipeline *p)
 					table->f_action_miss(p->pkts,
 						&lookup_miss_mask,
 						default_entry, table->arg_ah);
-					p->action_mask0[
-						RTE_PIPELINE_ACTION_DROP] |=
-						lookup_miss_mask ^ mask;
+					mask ^= lookup_miss_mask;
+					p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= mask;
+					RTE_PIPELINE_STATS_ADD_M(
+						table->n_pkts_dropped_by_lkp_miss_ah, mask);
 				}
 
 				/* Table reserved actions */
@@ -1277,6 +1313,10 @@  rte_pipeline_run(struct rte_pipeline *p)
 					uint32_t pos = default_entry->action;
 
 					p->action_mask0[pos] = lookup_miss_mask;
+					if (pos == RTE_PIPELINE_ACTION_DROP) {
+						RTE_PIPELINE_STATS_ADD_M(table->n_pkts_dropped_lkp_miss,
+							lookup_miss_mask);
+					}
 				}
 			}
 
@@ -1289,9 +1329,10 @@  rte_pipeline_run(struct rte_pipeline *p)
 					table->f_action_hit(p->pkts,
 						&lookup_hit_mask,
 						p->entries, table->arg_ah);
-					p->action_mask0[
-						RTE_PIPELINE_ACTION_DROP] |=
-						lookup_hit_mask ^ mask;
+					mask ^= lookup_hit_mask;
+					p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= mask;
+					RTE_PIPELINE_STATS_ADD_M(
+						table->n_pkts_dropped_by_lkp_hit_ah, mask);
 				}
 
 				/* Table reserved actions */
@@ -1308,6 +1349,9 @@  rte_pipeline_run(struct rte_pipeline *p)
 				p->action_mask0[RTE_PIPELINE_ACTION_TABLE] |=
 					p->action_mask1[
 						RTE_PIPELINE_ACTION_TABLE];
+
+				RTE_PIPELINE_STATS_ADD_M(table->n_pkts_dropped_lkp_hit,
+						p->action_mask1[RTE_PIPELINE_ACTION_DROP]);
 			}
 
 			/* Prepare for next iteration */
@@ -1370,9 +1414,128 @@  rte_pipeline_port_out_packet_insert(struct rte_pipeline *p,
 
 		if (pkt_mask != 0) /* Output port TX */
 			port_out->ops.f_tx(port_out->h_port, pkt);
-		else
+		else {
 			rte_pktmbuf_free(pkt);
+			RTE_PIPELINE_STATS_ADD(port_out->n_pkts_dropped_by_ah, 1);
+		}
+	}
+
+	return 0;
+}
+
+int rte_pipeline_port_in_stats_read(struct rte_pipeline *p, uint32_t port_id,
+	struct rte_pipeline_port_in_stats *stats, int clear)
+{
+	struct rte_port_in *port;
+	int retval;
+
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (port_id >= p->num_ports_in) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: port IN ID %u is out of range\n",
+			__func__, port_id);
+		return -EINVAL;
+	}
+
+	port = &p->ports_in[port_id];
+
+	if (port->ops.f_stats != NULL) {
+		retval = port->ops.f_stats(port->h_port, &stats->stats, clear);
+		if (retval)
+			return retval;
+	} else if (stats != NULL)
+		memset(&stats->stats, 0, sizeof(stats->stats));
+
+	if (stats != NULL)
+		stats->n_pkts_dropped_by_ah = port->n_pkts_dropped_by_ah;
+
+	if (clear != 0)
+		port->n_pkts_dropped_by_ah = 0;
+
+	return 0;
+}
+
+int rte_pipeline_port_out_stats_read(struct rte_pipeline *p, uint32_t port_id,
+	struct rte_pipeline_port_out_stats *stats, int clear)
+{
+	struct rte_port_out *port;
+	int retval;
+
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (port_id >= p->num_ports_out) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: port OUT ID %u is out of range\n", __func__, port_id);
+		return -EINVAL;
+	}
+
+	port = &p->ports_out[port_id];
+	if (port->ops.f_stats != NULL) {
+		retval = port->ops.f_stats(port->h_port, &stats->stats, clear);
+		if (retval != 0)
+			return retval;
+	} else if (stats != NULL)
+		memset(&stats->stats, 0, sizeof(stats->stats));
+
+	if (stats != NULL)
+		stats->n_pkts_dropped_by_ah = port->n_pkts_dropped_by_ah;
+
+	if (clear != 0)
+		port->n_pkts_dropped_by_ah = 0;
+
+	return 0;
+}
+
+int rte_pipeline_table_stats_read(struct rte_pipeline *p, uint32_t table_id,
+	struct rte_pipeline_table_stats *stats, int clear)
+{
+	struct rte_table *table;
+	int retval;
+
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (table_id >= p->num_tables) {
+		RTE_LOG(ERR, PIPELINE,
+				"%s: table %u is out of range\n", __func__, table_id);
+		return -EINVAL;
+	}
+
+	table = &p->tables[table_id];
+	if (table->ops.f_stats != NULL) {
+		retval = table->ops.f_stats(table->h_table, &stats->stats, clear);
+		if (retval != 0)
+			return retval;
+	} else if (stats != NULL)
+		memset(&stats->stats, 0, sizeof(stats->stats));
+
+	if (stats != NULL) {
+		stats->n_pkts_dropped_by_lkp_hit_ah =
+			table->n_pkts_dropped_by_lkp_hit_ah;
+		stats->n_pkts_dropped_by_lkp_miss_ah =
+			table->n_pkts_dropped_by_lkp_miss_ah;
+		stats->n_pkts_dropped_lkp_hit = table->n_pkts_dropped_lkp_hit;
+		stats->n_pkts_dropped_lkp_miss = table->n_pkts_dropped_lkp_miss;
+	}
+
+	if (clear != 0) {
+		table->n_pkts_dropped_by_lkp_hit_ah = 0;
+		table->n_pkts_dropped_by_lkp_miss_ah = 0;
+		table->n_pkts_dropped_lkp_hit = 0;
+		table->n_pkts_dropped_lkp_miss = 0;
 	}
 
 	return 0;
 }
+
diff --git a/lib/librte_pipeline/rte_pipeline.h b/lib/librte_pipeline/rte_pipeline.h
index fb1014a..a7757c6 100644
--- a/lib/librte_pipeline/rte_pipeline.h
+++ b/lib/librte_pipeline/rte_pipeline.h
@@ -111,6 +111,45 @@  struct rte_pipeline_params {
 	uint32_t offset_port_id;
 };
 
+/** Pipeline port in stats. */
+struct rte_pipeline_port_in_stats {
+	/** Port in stats. */
+	struct rte_port_in_stats stats;
+
+	/** Number of packets dropped by action handler. */
+	uint64_t n_pkts_dropped_by_ah;
+
+};
+
+/** Pipeline port out stats. */
+struct rte_pipeline_port_out_stats {
+	/** Port out stats. */
+	struct rte_port_out_stats stats;
+
+	/** Number of packets dropped by action handler. */
+	uint64_t n_pkts_dropped_by_ah;
+};
+
+/** Pipeline table stats. */
+struct rte_pipeline_table_stats {
+	/** Table stats. */
+	struct rte_table_stats stats;
+
+	/** Number of packets dropped by lookup hit action handler. */
+	uint64_t n_pkts_dropped_by_lkp_hit_ah;
+
+	/** Number of packets dropped by lookup miss action handler. */
+	uint64_t n_pkts_dropped_by_lkp_miss_ah;
+
+	/** Number of packets dropped by pipeline in behalf of this table based on
+	 * on action specified in table entry. */
+	uint64_t n_pkts_dropped_lkp_hit;
+
+	/** Number of packets dropped by pipeline in behalf of this table based on
+	 * on action specified in table entry. */
+	uint64_t n_pkts_dropped_lkp_miss;
+};
+
 /**
  * Pipeline create
  *
@@ -425,6 +464,26 @@  int rte_pipeline_table_entry_delete(struct rte_pipeline *p,
 	int *key_found,
 	struct rte_pipeline_table_entry *entry);
 
+/**
+ * Read pipeline table stats.
+ *
+ * This function reads table statistics identified by *table_id* of given
+ * pipeline *p*.
+ *
+ * @param p
+ *   Handle to pipeline instance.
+ * @param table_id
+ *   Port ID what stats will be returned.
+ * @param stats
+ *   Statistics buffer.
+ * @param clear
+ *   If not 0 clear stats after reading.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_table_stats_read(struct rte_pipeline *p, uint32_t table_id,
+	struct rte_pipeline_table_stats *stats, int clear);
+
 /*
  * Port IN
  *
@@ -539,6 +598,26 @@  int rte_pipeline_port_in_enable(struct rte_pipeline *p,
 int rte_pipeline_port_in_disable(struct rte_pipeline *p,
 	uint32_t port_id);
 
+/**
+ * Read pipeline port in stats.
+ *
+ * This function reads port in statistics identified by *port_id* of given
+ * pipeline *p*.
+ *
+ * @param p
+ *   Handle to pipeline instance.
+ * @param port_id
+ *   Port ID what stats will be returned.
+ * @param stats
+ *   Statistics buffer.
+ * @param clear
+ *   If not 0 clear stats after reading.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_port_in_stats_read(struct rte_pipeline *p, uint32_t port_id,
+	struct rte_pipeline_port_in_stats *stats, int clear);
+
 /*
  * Port OUT
  *
@@ -657,6 +736,26 @@  int rte_pipeline_port_out_packet_insert(struct rte_pipeline *p,
 	uint32_t port_id,
 	struct rte_mbuf *pkt);
 
+/**
+ * Read pipeline port out stats.
+ *
+ * This function reads port out statistics identified by *port_id* of given
+ * pipeline *p*.
+ *
+ * @param p
+ *   Handle to pipeline instance.
+ * @param port_id
+ *   Port ID what stats will be returned.
+ * @param stats
+ *   Statistics buffer.
+ * @param clear
+ *   If not 0 clear stats after reading.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_port_out_stats_read(struct rte_pipeline *p, uint32_t port_id,
+	struct rte_pipeline_port_out_stats *stats, int clear);
+
 #ifdef __cplusplus
 }
 #endif