[dpdk-dev,v2,5/6] rte_sched: allow reading statistics without clearing

Message ID 1426004018-25948-6-git-send-email-stephen@networkplumber.org (mailing list archive)
State Superseded, archived
Headers

Commit Message

Stephen Hemminger March 10, 2015, 4:13 p.m. UTC
  From: Stephen Hemminger <shemming@brocade.com>

The rte_sched statistics API should allow reading statistics without
clearing. Make auto-clear optional.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
v2 -- change to flag for clear from separate read/clear

 app/test/test_sched.c        |  4 ++--
 examples/qos_sched/stats.c   | 16 +++++++++++-----
 lib/librte_sched/rte_sched.c | 44 ++++++++++++++++++++++----------------------
 lib/librte_sched/rte_sched.h | 18 ++++++++++--------
 4 files changed, 45 insertions(+), 37 deletions(-)
  

Comments

Cristian Dumitrescu March 12, 2015, 7:28 p.m. UTC | #1
> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, March 10, 2015 4:14 PM
> To: Dumitrescu, Cristian
> Cc: dev@dpdk.org; Stephen Hemminger; Stephen Hemminger
> Subject: [PATCH v2 5/6] rte_sched: allow reading statistics without clearing
> 
> From: Stephen Hemminger <shemming@brocade.com>
> 
> The rte_sched statistics API should allow reading statistics without
> clearing. Make auto-clear optional.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
> v2 -- change to flag for clear from separate read/clear
> 
>  app/test/test_sched.c        |  4 ++--
>  examples/qos_sched/stats.c   | 16 +++++++++++-----
>  lib/librte_sched/rte_sched.c | 44 ++++++++++++++++++++++-----------------
> -----
>  lib/librte_sched/rte_sched.h | 18 ++++++++++--------
>  4 files changed, 45 insertions(+), 37 deletions(-)
> 
> diff --git a/app/test/test_sched.c b/app/test/test_sched.c
> index 60c62de..3c9c9e3 100644
> --- a/app/test/test_sched.c
> +++ b/app/test/test_sched.c
> @@ -208,13 +208,13 @@ test_sched(void)
> 
>  	struct rte_sched_subport_stats subport_stats;
>  	uint32_t tc_ov;
> -	rte_sched_subport_read_stats(port, SUBPORT, &subport_stats,
> &tc_ov);
> +	rte_sched_subport_read_stats(port, SUBPORT, &subport_stats,
> &tc_ov, 1);
>  #if 0
>  	TEST_ASSERT_EQUAL(subport_stats.n_pkts_tc[TC-1], 10, "Wrong
> subport stats\n");
>  #endif
>  	struct rte_sched_queue_stats queue_stats;
>  	uint16_t qlen;
> -	rte_sched_queue_read_stats(port, QUEUE, &queue_stats, &qlen);
> +	rte_sched_queue_read_stats(port, QUEUE, &queue_stats, &qlen,
> 1);
>  #if 0
>  	TEST_ASSERT_EQUAL(queue_stats.n_pkts, 10, "Wrong queue
> stats\n");
>  #endif
> diff --git a/examples/qos_sched/stats.c b/examples/qos_sched/stats.c
> index b4db7b5..88caa54 100644
> --- a/examples/qos_sched/stats.c
> +++ b/examples/qos_sched/stats.c
> @@ -61,7 +61,7 @@ qavg_q(uint8_t port_id, uint32_t subport_id, uint32_t
> pipe_id, uint8_t tc, uint8
>          average = 0;
> 
>          for (count = 0; count < qavg_ntimes; count++) {
> -                rte_sched_queue_read_stats(port, queue_id, &stats, &qlen);
> +                rte_sched_queue_read_stats(port, queue_id, &stats, &qlen, 1);
>                  average += qlen;
>                  usleep(qavg_period);
>          }
> @@ -99,7 +99,9 @@ qavg_tcpipe(uint8_t port_id, uint32_t subport_id,
> uint32_t pipe_id, uint8_t tc)
>          for (count = 0; count < qavg_ntimes; count++) {
>                  part_average = 0;
>                  for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
> -                        rte_sched_queue_read_stats(port, queue_id + (tc *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + i), &stats, &qlen);
> +                        rte_sched_queue_read_stats(port,
> +				   queue_id + (tc *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + i),
> +				   &stats, &qlen, 1);
>                          part_average += qlen;
>                  }
>                  average += part_average /
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
> @@ -138,7 +140,8 @@ qavg_pipe(uint8_t port_id, uint32_t subport_id,
> uint32_t pipe_id)
>          for (count = 0; count < qavg_ntimes; count++) {
>                  part_average = 0;
>                  for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
> -                        rte_sched_queue_read_stats(port, queue_id + i, &stats,
> &qlen);
> +                        rte_sched_queue_read_stats(port, queue_id + i,
> +						   &stats, &qlen, 1);
>                          part_average += qlen;
>                  }
>                  average += part_average /
> (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
> @@ -178,7 +181,9 @@ qavg_tcsubport(uint8_t port_id, uint32_t subport_id,
> uint8_t tc)
>                          queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id *
> port_params.n_pipes_per_subport + i);
> 
>                          for (j = 0; j < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) {
> -                                rte_sched_queue_read_stats(port, queue_id + (tc *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + j), &stats, &qlen);
> +                                rte_sched_queue_read_stats(port,
> +							   queue_id + (tc *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + j),
> +							   &stats, &qlen, 1);
>                                  part_average += qlen;
>                          }
>                  }
> @@ -220,7 +225,8 @@ qavg_subport(uint8_t port_id, uint32_t subport_id)
>                          queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id *
> port_params.n_pipes_per_subport + i);
> 
>                          for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) {
> -                                rte_sched_queue_read_stats(port, queue_id + j, &stats,
> &qlen);
> +                                rte_sched_queue_read_stats(port, queue_id + j,
> +							   &stats, &qlen, 1);
>                                  part_average += qlen;
>                          }
>                  }
> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
> index 2071414..74d0e0a 100644
> --- a/lib/librte_sched/rte_sched.c
> +++ b/lib/librte_sched/rte_sched.c
> @@ -909,56 +909,56 @@ rte_sched_pipe_config(struct rte_sched_port
> *port,
> 
>  int
>  rte_sched_subport_read_stats(struct rte_sched_port *port,
> -	uint32_t subport_id,
> -	struct rte_sched_subport_stats *stats,
> -	uint32_t *tc_ov)
> +			     uint32_t subport_id,
> +			     struct rte_sched_subport_stats *stats,
> +			     uint32_t *tc_ov, int clear)
>  {
>  	struct rte_sched_subport *s;
> 
>  	/* Check user parameters */
> -	if ((port == NULL) ||
> -	    (subport_id >= port->n_subports_per_port) ||
> -		(stats == NULL) ||
> -		(tc_ov == NULL)) {
> +	if (port == NULL || subport_id >= port->n_subports_per_port)
>  		return -1;
> -	}
> +
>  	s = port->subport + subport_id;
> 
>  	/* Copy subport stats and clear */
> -	memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
> -	memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
> +	if (stats)
> +		*stats = s->stats;
> +	if (clear)
> +		memset(&s->stats, 0, sizeof(struct
> rte_sched_subport_stats));
> 
>  	/* Subport TC ovesubscription status */
> -	*tc_ov = s->tc_ov;
> +	if (tc_ov)
> +		*tc_ov = s->tc_ov;
> 
>  	return 0;
>  }
> 
>  int
>  rte_sched_queue_read_stats(struct rte_sched_port *port,
> -	uint32_t queue_id,
> -	struct rte_sched_queue_stats *stats,
> -	uint16_t *qlen)
> +			   uint32_t queue_id,
> +			   struct rte_sched_queue_stats *stats,
> +			   uint16_t *qlen, int clear)
>  {
>  	struct rte_sched_queue *q;
>  	struct rte_sched_queue_extra *qe;
> 
>  	/* Check user parameters */
> -	if ((port == NULL) ||
> -	    (queue_id >= rte_sched_port_queues_per_port(port)) ||
> -		(stats == NULL) ||
> -		(qlen == NULL)) {
> +	if (port == NULL || queue_id >=
> rte_sched_port_queues_per_port(port))
>  		return -1;
> -	}
> +
>  	q = port->queue + queue_id;
>  	qe = port->queue_extra + queue_id;
> 
>  	/* Copy queue stats and clear */
> -	memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
> -	memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
> +	if (stats)
> +		*stats = qe->stats;
> +	if (clear)
> +		memset(&qe->stats, 0, sizeof(struct
> rte_sched_queue_stats));
> 
>  	/* Queue length */
> -	*qlen = q->qw - q->qr;
> +	if (qlen)
> +		*qlen = q->qw - q->qr;
> 
>  	return 0;
>  }
> diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
> index e9bf18a..f0bf088 100644
> --- a/lib/librte_sched/rte_sched.h
> +++ b/lib/librte_sched/rte_sched.h
> @@ -308,14 +308,15 @@ rte_sched_port_get_memory_footprint(struct
> rte_sched_port_params *params);
>   * @param tc_ov
>   *   Pointer to pre-allocated 4-entry array where the oversubscription status
> for
>   *   each of the 4 subport traffic classes should be stored.
> + * @parm clear
> + *   Reset statistics after read
>   * @return
>   *   0 upon success, error code otherwise
>   */
>  int
> -rte_sched_subport_read_stats(struct rte_sched_port *port,
> -	uint32_t subport_id,
> -	struct rte_sched_subport_stats *stats,
> -	uint32_t *tc_ov);
> +rte_sched_subport_read_stats(struct rte_sched_port *port, uint32_t
> subport_id,
> +			     struct rte_sched_subport_stats *stats,
> +			     uint32_t *tc_ov, int clear);
> 
>  /**
>   * Hierarchical scheduler queue statistics read
> @@ -329,14 +330,15 @@ rte_sched_subport_read_stats(struct
> rte_sched_port *port,
>   *   counters should be stored
>   * @param qlen
>   *   Pointer to pre-allocated variable where the current queue length should
> be stored.
> + * @parm clear
> + *   Reset statistics after read
>   * @return
>   *   0 upon success, error code otherwise
>   */
>  int
> -rte_sched_queue_read_stats(struct rte_sched_port *port,
> -	uint32_t queue_id,
> -	struct rte_sched_queue_stats *stats,
> -	uint16_t *qlen);
> +rte_sched_queue_read_stats(struct rte_sched_port *port, uint32_t
> queue_id,
> +			   struct rte_sched_queue_stats *stats,
> +			   uint16_t *qlen, int clear);
> 
>  /*
>   * Run-time
> --
> 2.1.4

Hi Stephen,

Thank you for adding flexibility over clearing the stats or not.

I have one concern though: why change the stats read API to add a clear parameter rather than keep prototype for the stats functions unchanged and add the flag as part of the port creation parameters in struct rte_sched_port_params? This parameter could be saved into the internal struct rte_sched_port, which is passed (as a pointer) to the stats read functions. In my opinion, this approach is slightly more elegant and it keeps the changes to a minimum.

int
rte_sched_queue_read_stats(struct rte_sched_port *port,
	uint32_t queue_id,
	struct rte_sched_queue_stats *stats,
	uint16_t *qlen)
{
	...
	if (port->clear_stats_on_read)
		memset(...);
}

I made this suggestion during the previous round, but I did not get any opinion from you on it yet.

Regards,
Cristian
  
Stephen Hemminger March 12, 2015, 11:06 p.m. UTC | #2
On Thu, 12 Mar 2015 19:28:11 +0000
"Dumitrescu, Cristian" <cristian.dumitrescu@intel.com> wrote:

> Hi Stephen,
> 
> Thank you for adding flexibility over clearing the stats or not.
> 
> I have one concern though: why change the stats read API to add a clear parameter rather than keep prototype for the stats functions unchanged and add the flag as part of the port creation parameters in struct rte_sched_port_params? This parameter could be saved into the internal struct rte_sched_port, which is passed (as a pointer) to the stats read functions. In my opinion, this approach is slightly more elegant and it keeps the changes to a minimum.
> 
> int
> rte_sched_queue_read_stats(struct rte_sched_port *port,
> 	uint32_t queue_id,
> 	struct rte_sched_queue_stats *stats,
> 	uint16_t *qlen)
> {
> 	...
> 	if (port->clear_stats_on_read)
> 		memset(...);
> }
> 
> I made this suggestion during the previous round, but I did not get any opinion from you on it yet.
> 
> Regards,
> Cristian

I rejected the config parameter idea because I don't like it is inconsistent
with other statistics in DPDK and in related software. There is not a
config parameter that changes what BSD or Linux kernel API does.

The only reason for keeping the read and clear in one operation is
because you like it, and there are somebody might have built code
that expects it.

Changing the function signature is a nice red flag so that people will
notice at change.
  
Cristian Dumitrescu March 16, 2015, 7:58 p.m. UTC | #3
> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Thursday, March 12, 2015 11:06 PM
> To: Dumitrescu, Cristian
> Cc: dev@dpdk.org; Stephen Hemminger
> Subject: Re: [PATCH v2 5/6] rte_sched: allow reading statistics without
> clearing
> 
> On Thu, 12 Mar 2015 19:28:11 +0000
> "Dumitrescu, Cristian" <cristian.dumitrescu@intel.com> wrote:
> 
> > Hi Stephen,
> >
> > Thank you for adding flexibility over clearing the stats or not.
> >
> > I have one concern though: why change the stats read API to add a clear
> parameter rather than keep prototype for the stats functions unchanged and
> add the flag as part of the port creation parameters in struct
> rte_sched_port_params? This parameter could be saved into the internal
> struct rte_sched_port, which is passed (as a pointer) to the stats read
> functions. In my opinion, this approach is slightly more elegant and it keeps
> the changes to a minimum.
> >
> > int
> > rte_sched_queue_read_stats(struct rte_sched_port *port,
> > 	uint32_t queue_id,
> > 	struct rte_sched_queue_stats *stats,
> > 	uint16_t *qlen)
> > {
> > 	...
> > 	if (port->clear_stats_on_read)
> > 		memset(...);
> > }
> >
> > I made this suggestion during the previous round, but I did not get any
> opinion from you on it yet.
> >
> > Regards,
> > Cristian
> 
> I rejected the config parameter idea because I don't like it is inconsistent
> with other statistics in DPDK and in related software. There is not a
> config parameter that changes what BSD or Linux kernel API does.

Your approach has the advantage of being able to clear/not clear the stats per each read operation rather than configuring the behavior globally. I think this approach allows for the ultimate flexibility, so I am OK to go with it.

> 
> The only reason for keeping the read and clear in one operation is
> because you like it, and there are somebody might have built code
> that expects it.
>

Clearing the stats with a delay after the stats have been read is prone to a race condition, as during this time more packets could be processed, and these packets will not show up in the counters that the user read.
I think it depends on the need of each particular application whether this race condition is important or not: if the counters are read rarely (e.g. once per day) and only course accuracy is required, the error is probably irrelevant; if the app is looking for fine great accuracy (e.g. rate measurement, debugging, etc), then the error is not allowed. You seem to favour the former and ignore the later case.


> Changing the function signature is a nice red flag so that people will
> notice at change.

There is a small API change here. I am OK with it for the above reasons, provided that there are no objections on this from other contributors.
  
Cristian Dumitrescu March 16, 2015, 8 p.m. UTC | #4
> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, March 10, 2015 4:14 PM
> To: Dumitrescu, Cristian
> Cc: dev@dpdk.org; Stephen Hemminger; Stephen Hemminger
> Subject: [PATCH v2 5/6] rte_sched: allow reading statistics without clearing
> 
> From: Stephen Hemminger <shemming@brocade.com>
> 
> The rte_sched statistics API should allow reading statistics without
> clearing. Make auto-clear optional.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
> v2 -- change to flag for clear from separate read/clear
> 
>  app/test/test_sched.c        |  4 ++--
>  examples/qos_sched/stats.c   | 16 +++++++++++-----
>  lib/librte_sched/rte_sched.c | 44 ++++++++++++++++++++++-----------------
> -----
>  lib/librte_sched/rte_sched.h | 18 ++++++++++--------
>  4 files changed, 45 insertions(+), 37 deletions(-)
> 
> diff --git a/app/test/test_sched.c b/app/test/test_sched.c
> index 60c62de..3c9c9e3 100644
> --- a/app/test/test_sched.c
> +++ b/app/test/test_sched.c
> @@ -208,13 +208,13 @@ test_sched(void)
> 
>  	struct rte_sched_subport_stats subport_stats;
>  	uint32_t tc_ov;
> -	rte_sched_subport_read_stats(port, SUBPORT, &subport_stats,
> &tc_ov);
> +	rte_sched_subport_read_stats(port, SUBPORT, &subport_stats,
> &tc_ov, 1);
>  #if 0
>  	TEST_ASSERT_EQUAL(subport_stats.n_pkts_tc[TC-1], 10, "Wrong
> subport stats\n");
>  #endif
>  	struct rte_sched_queue_stats queue_stats;
>  	uint16_t qlen;
> -	rte_sched_queue_read_stats(port, QUEUE, &queue_stats, &qlen);
> +	rte_sched_queue_read_stats(port, QUEUE, &queue_stats, &qlen,
> 1);
>  #if 0
>  	TEST_ASSERT_EQUAL(queue_stats.n_pkts, 10, "Wrong queue
> stats\n");
>  #endif
> diff --git a/examples/qos_sched/stats.c b/examples/qos_sched/stats.c
> index b4db7b5..88caa54 100644
> --- a/examples/qos_sched/stats.c
> +++ b/examples/qos_sched/stats.c
> @@ -61,7 +61,7 @@ qavg_q(uint8_t port_id, uint32_t subport_id, uint32_t
> pipe_id, uint8_t tc, uint8
>          average = 0;
> 
>          for (count = 0; count < qavg_ntimes; count++) {
> -                rte_sched_queue_read_stats(port, queue_id, &stats, &qlen);
> +                rte_sched_queue_read_stats(port, queue_id, &stats, &qlen, 1);
>                  average += qlen;
>                  usleep(qavg_period);
>          }
> @@ -99,7 +99,9 @@ qavg_tcpipe(uint8_t port_id, uint32_t subport_id,
> uint32_t pipe_id, uint8_t tc)
>          for (count = 0; count < qavg_ntimes; count++) {
>                  part_average = 0;
>                  for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
> -                        rte_sched_queue_read_stats(port, queue_id + (tc *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + i), &stats, &qlen);
> +                        rte_sched_queue_read_stats(port,
> +				   queue_id + (tc *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + i),
> +				   &stats, &qlen, 1);
>                          part_average += qlen;
>                  }
>                  average += part_average /
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
> @@ -138,7 +140,8 @@ qavg_pipe(uint8_t port_id, uint32_t subport_id,
> uint32_t pipe_id)
>          for (count = 0; count < qavg_ntimes; count++) {
>                  part_average = 0;
>                  for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
> -                        rte_sched_queue_read_stats(port, queue_id + i, &stats,
> &qlen);
> +                        rte_sched_queue_read_stats(port, queue_id + i,
> +						   &stats, &qlen, 1);
>                          part_average += qlen;
>                  }
>                  average += part_average /
> (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
> @@ -178,7 +181,9 @@ qavg_tcsubport(uint8_t port_id, uint32_t subport_id,
> uint8_t tc)
>                          queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id *
> port_params.n_pipes_per_subport + i);
> 
>                          for (j = 0; j < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) {
> -                                rte_sched_queue_read_stats(port, queue_id + (tc *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + j), &stats, &qlen);
> +                                rte_sched_queue_read_stats(port,
> +							   queue_id + (tc *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + j),
> +							   &stats, &qlen, 1);
>                                  part_average += qlen;
>                          }
>                  }
> @@ -220,7 +225,8 @@ qavg_subport(uint8_t port_id, uint32_t subport_id)
>                          queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id *
> port_params.n_pipes_per_subport + i);
> 
>                          for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) {
> -                                rte_sched_queue_read_stats(port, queue_id + j, &stats,
> &qlen);
> +                                rte_sched_queue_read_stats(port, queue_id + j,
> +							   &stats, &qlen, 1);
>                                  part_average += qlen;
>                          }
>                  }
> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
> index 2071414..74d0e0a 100644
> --- a/lib/librte_sched/rte_sched.c
> +++ b/lib/librte_sched/rte_sched.c
> @@ -909,56 +909,56 @@ rte_sched_pipe_config(struct rte_sched_port
> *port,
> 
>  int
>  rte_sched_subport_read_stats(struct rte_sched_port *port,
> -	uint32_t subport_id,
> -	struct rte_sched_subport_stats *stats,
> -	uint32_t *tc_ov)
> +			     uint32_t subport_id,
> +			     struct rte_sched_subport_stats *stats,
> +			     uint32_t *tc_ov, int clear)
>  {
>  	struct rte_sched_subport *s;
> 
>  	/* Check user parameters */
> -	if ((port == NULL) ||
> -	    (subport_id >= port->n_subports_per_port) ||
> -		(stats == NULL) ||
> -		(tc_ov == NULL)) {
> +	if (port == NULL || subport_id >= port->n_subports_per_port)
>  		return -1;
> -	}
> +
>  	s = port->subport + subport_id;
> 
>  	/* Copy subport stats and clear */
> -	memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
> -	memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
> +	if (stats)
> +		*stats = s->stats;
> +	if (clear)
> +		memset(&s->stats, 0, sizeof(struct
> rte_sched_subport_stats));
> 
>  	/* Subport TC ovesubscription status */
> -	*tc_ov = s->tc_ov;
> +	if (tc_ov)
> +		*tc_ov = s->tc_ov;
> 
>  	return 0;
>  }
> 
>  int
>  rte_sched_queue_read_stats(struct rte_sched_port *port,
> -	uint32_t queue_id,
> -	struct rte_sched_queue_stats *stats,
> -	uint16_t *qlen)
> +			   uint32_t queue_id,
> +			   struct rte_sched_queue_stats *stats,
> +			   uint16_t *qlen, int clear)
>  {
>  	struct rte_sched_queue *q;
>  	struct rte_sched_queue_extra *qe;
> 
>  	/* Check user parameters */
> -	if ((port == NULL) ||
> -	    (queue_id >= rte_sched_port_queues_per_port(port)) ||
> -		(stats == NULL) ||
> -		(qlen == NULL)) {
> +	if (port == NULL || queue_id >=
> rte_sched_port_queues_per_port(port))
>  		return -1;
> -	}
> +
>  	q = port->queue + queue_id;
>  	qe = port->queue_extra + queue_id;
> 
>  	/* Copy queue stats and clear */
> -	memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
> -	memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
> +	if (stats)
> +		*stats = qe->stats;
> +	if (clear)
> +		memset(&qe->stats, 0, sizeof(struct
> rte_sched_queue_stats));
> 
>  	/* Queue length */
> -	*qlen = q->qw - q->qr;
> +	if (qlen)
> +		*qlen = q->qw - q->qr;
> 
>  	return 0;
>  }
> diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
> index e9bf18a..f0bf088 100644
> --- a/lib/librte_sched/rte_sched.h
> +++ b/lib/librte_sched/rte_sched.h
> @@ -308,14 +308,15 @@ rte_sched_port_get_memory_footprint(struct
> rte_sched_port_params *params);
>   * @param tc_ov
>   *   Pointer to pre-allocated 4-entry array where the oversubscription status
> for
>   *   each of the 4 subport traffic classes should be stored.
> + * @parm clear
> + *   Reset statistics after read
>   * @return
>   *   0 upon success, error code otherwise
>   */
>  int
> -rte_sched_subport_read_stats(struct rte_sched_port *port,
> -	uint32_t subport_id,
> -	struct rte_sched_subport_stats *stats,
> -	uint32_t *tc_ov);
> +rte_sched_subport_read_stats(struct rte_sched_port *port, uint32_t
> subport_id,
> +			     struct rte_sched_subport_stats *stats,
> +			     uint32_t *tc_ov, int clear);
> 
>  /**
>   * Hierarchical scheduler queue statistics read
> @@ -329,14 +330,15 @@ rte_sched_subport_read_stats(struct
> rte_sched_port *port,
>   *   counters should be stored
>   * @param qlen
>   *   Pointer to pre-allocated variable where the current queue length should
> be stored.
> + * @parm clear
> + *   Reset statistics after read
>   * @return
>   *   0 upon success, error code otherwise
>   */
>  int
> -rte_sched_queue_read_stats(struct rte_sched_port *port,
> -	uint32_t queue_id,
> -	struct rte_sched_queue_stats *stats,
> -	uint16_t *qlen);
> +rte_sched_queue_read_stats(struct rte_sched_port *port, uint32_t
> queue_id,
> +			   struct rte_sched_queue_stats *stats,
> +			   uint16_t *qlen, int clear);
> 
>  /*
>   * Run-time
> --
> 2.1.4

Acked by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
  
Stephen Hemminger March 16, 2015, 8:11 p.m. UTC | #5
On Mon, 16 Mar 2015 19:58:51 +0000
"Dumitrescu, Cristian" <cristian.dumitrescu@intel.com> wrote:

> 
> 
> > -----Original Message-----
> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Thursday, March 12, 2015 11:06 PM
> > To: Dumitrescu, Cristian
> > Cc: dev@dpdk.org; Stephen Hemminger
> > Subject: Re: [PATCH v2 5/6] rte_sched: allow reading statistics without
> > clearing
> > 
> > On Thu, 12 Mar 2015 19:28:11 +0000
> > "Dumitrescu, Cristian" <cristian.dumitrescu@intel.com> wrote:
> > 
> > > Hi Stephen,
> > >
> > > Thank you for adding flexibility over clearing the stats or not.
> > >
> > > I have one concern though: why change the stats read API to add a clear
> > parameter rather than keep prototype for the stats functions unchanged and
> > add the flag as part of the port creation parameters in struct
> > rte_sched_port_params? This parameter could be saved into the internal
> > struct rte_sched_port, which is passed (as a pointer) to the stats read
> > functions. In my opinion, this approach is slightly more elegant and it keeps
> > the changes to a minimum.
> > >
> > > int
> > > rte_sched_queue_read_stats(struct rte_sched_port *port,
> > > 	uint32_t queue_id,
> > > 	struct rte_sched_queue_stats *stats,
> > > 	uint16_t *qlen)
> > > {
> > > 	...
> > > 	if (port->clear_stats_on_read)
> > > 		memset(...);
> > > }
> > >
> > > I made this suggestion during the previous round, but I did not get any
> > opinion from you on it yet.
> > >
> > > Regards,
> > > Cristian
> > 
> > I rejected the config parameter idea because I don't like it is inconsistent
> > with other statistics in DPDK and in related software. There is not a
> > config parameter that changes what BSD or Linux kernel API does.
> 
> Your approach has the advantage of being able to clear/not clear the stats per each read operation rather than configuring the behavior globally. I think this approach allows for the ultimate flexibility, so I am OK to go with it.
> 
> > 
> > The only reason for keeping the read and clear in one operation is
> > because you like it, and there are somebody might have built code
> > that expects it.
> >
> 
> Clearing the stats with a delay after the stats have been read is prone to a race condition, as during this time more packets could be processed, and these packets will not show up in the counters that the user read.
> I think it depends on the need of each particular application whether this race condition is important or not: if the counters are read rarely (e.g. once per day) and only course accuracy is required, the error is probably irrelevant; if the app is looking for fine great accuracy (e.g. rate measurement, debugging, etc), then the error is not allowed. You seem to favour the former and ignore the later case.
> 
> 
> > Changing the function signature is a nice red flag so that people will
> > notice at change.
> 
> There is a small API change here. I am OK with it for the above reasons, provided that there are no objections on this from other contributors.
> 

If you really wanted a fast/safe read and clear operation, how about using
exchange instruction to exchange in a zero?
  
Cristian Dumitrescu March 16, 2015, 8:21 p.m. UTC | #6
> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Monday, March 16, 2015 8:11 PM
> To: Dumitrescu, Cristian
> Cc: dev@dpdk.org; Stephen Hemminger
> Subject: Re: [PATCH v2 5/6] rte_sched: allow reading statistics without
> clearing
> 
> On Mon, 16 Mar 2015 19:58:51 +0000
> "Dumitrescu, Cristian" <cristian.dumitrescu@intel.com> wrote:
> 
> >
> >
> > > -----Original Message-----
> > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > Sent: Thursday, March 12, 2015 11:06 PM
> > > To: Dumitrescu, Cristian
> > > Cc: dev@dpdk.org; Stephen Hemminger
> > > Subject: Re: [PATCH v2 5/6] rte_sched: allow reading statistics without
> > > clearing
> > >
> > > On Thu, 12 Mar 2015 19:28:11 +0000
> > > "Dumitrescu, Cristian" <cristian.dumitrescu@intel.com> wrote:
> > >
> > > > Hi Stephen,
> > > >
> > > > Thank you for adding flexibility over clearing the stats or not.
> > > >
> > > > I have one concern though: why change the stats read API to add a
> clear
> > > parameter rather than keep prototype for the stats functions unchanged
> and
> > > add the flag as part of the port creation parameters in struct
> > > rte_sched_port_params? This parameter could be saved into the internal
> > > struct rte_sched_port, which is passed (as a pointer) to the stats read
> > > functions. In my opinion, this approach is slightly more elegant and it
> keeps
> > > the changes to a minimum.
> > > >
> > > > int
> > > > rte_sched_queue_read_stats(struct rte_sched_port *port,
> > > > 	uint32_t queue_id,
> > > > 	struct rte_sched_queue_stats *stats,
> > > > 	uint16_t *qlen)
> > > > {
> > > > 	...
> > > > 	if (port->clear_stats_on_read)
> > > > 		memset(...);
> > > > }
> > > >
> > > > I made this suggestion during the previous round, but I did not get any
> > > opinion from you on it yet.
> > > >
> > > > Regards,
> > > > Cristian
> > >
> > > I rejected the config parameter idea because I don't like it is inconsistent
> > > with other statistics in DPDK and in related software. There is not a
> > > config parameter that changes what BSD or Linux kernel API does.
> >
> > Your approach has the advantage of being able to clear/not clear the stats
> per each read operation rather than configuring the behavior globally. I think
> this approach allows for the ultimate flexibility, so I am OK to go with it.
> >
> > >
> > > The only reason for keeping the read and clear in one operation is
> > > because you like it, and there are somebody might have built code
> > > that expects it.
> > >
> >
> > Clearing the stats with a delay after the stats have been read is prone to a
> race condition, as during this time more packets could be processed, and
> these packets will not show up in the counters that the user read.
> > I think it depends on the need of each particular application whether this
> race condition is important or not: if the counters are read rarely (e.g. once
> per day) and only course accuracy is required, the error is probably irrelevant;
> if the app is looking for fine great accuracy (e.g. rate measurement,
> debugging, etc), then the error is not allowed. You seem to favour the
> former and ignore the later case.
> >
> >
> > > Changing the function signature is a nice red flag so that people will
> > > notice at change.
> >
> > There is a small API change here. I am OK with it for the above reasons,
> provided that there are no objections on this from other contributors.
> >
> 
> If you really wanted a fast/safe read and clear operation, how about using
> exchange instruction to exchange in a zero?

When stats read is decoupled from stats clear, the exchange operation cannot fix this.

To your point: when stats are cleared on read, would it make sense to use the exchange operation? In my opinion, it does not make sense, as the rte_sched API is not thread safe (for performance reasons), which is explicitly  stated. As clearly documented, the scheduler enqueue and dequeue operations need to run on the same CPU core. Therefore, IMHO having a thread safe stats read function while the main functional API is not thread safe is not going to add value.
  
Stephen Hemminger March 16, 2015, 8:33 p.m. UTC | #7
On Mon, 16 Mar 2015 20:21:39 +0000
"Dumitrescu, Cristian" <cristian.dumitrescu@intel.com> wrote:

> When stats read is decoupled from stats clear, the exchange operation cannot fix this.
> 
> To your point: when stats are cleared on read, would it make sense to use the exchange operation? In my opinion, it does not make sense, as the rte_sched API is not thread safe (for performance reasons), which is explicitly  stated. As clearly documented, the scheduler enqueue and dequeue operations need to run on the same CPU core. Therefore, IMHO having a thread safe stats read function while the main functional API is not thread safe is not going to add value.

Agreed but for a different reason.
It is safe but not consistent to read statistics from another management thread.
As long as no other thread is making changes to underlying configuration only
the values might change while packets in flight. Making all the statistics
fully atomic_t would be a performance killer.
  

Patch

diff --git a/app/test/test_sched.c b/app/test/test_sched.c
index 60c62de..3c9c9e3 100644
--- a/app/test/test_sched.c
+++ b/app/test/test_sched.c
@@ -208,13 +208,13 @@  test_sched(void)
 
 	struct rte_sched_subport_stats subport_stats;
 	uint32_t tc_ov;
-	rte_sched_subport_read_stats(port, SUBPORT, &subport_stats, &tc_ov);
+	rte_sched_subport_read_stats(port, SUBPORT, &subport_stats, &tc_ov, 1);
 #if 0
 	TEST_ASSERT_EQUAL(subport_stats.n_pkts_tc[TC-1], 10, "Wrong subport stats\n");
 #endif
 	struct rte_sched_queue_stats queue_stats;
 	uint16_t qlen;
-	rte_sched_queue_read_stats(port, QUEUE, &queue_stats, &qlen);
+	rte_sched_queue_read_stats(port, QUEUE, &queue_stats, &qlen, 1);
 #if 0
 	TEST_ASSERT_EQUAL(queue_stats.n_pkts, 10, "Wrong queue stats\n");
 #endif
diff --git a/examples/qos_sched/stats.c b/examples/qos_sched/stats.c
index b4db7b5..88caa54 100644
--- a/examples/qos_sched/stats.c
+++ b/examples/qos_sched/stats.c
@@ -61,7 +61,7 @@  qavg_q(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id, uint8_t tc, uint8
         average = 0;
 
         for (count = 0; count < qavg_ntimes; count++) {
-                rte_sched_queue_read_stats(port, queue_id, &stats, &qlen);
+                rte_sched_queue_read_stats(port, queue_id, &stats, &qlen, 1);
                 average += qlen;
                 usleep(qavg_period);
         }
@@ -99,7 +99,9 @@  qavg_tcpipe(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id, uint8_t tc)
         for (count = 0; count < qavg_ntimes; count++) {
                 part_average = 0;
                 for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
-                        rte_sched_queue_read_stats(port, queue_id + (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + i), &stats, &qlen);
+                        rte_sched_queue_read_stats(port,
+				   queue_id + (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + i),
+				   &stats, &qlen, 1);
                         part_average += qlen;
                 }
                 average += part_average / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
@@ -138,7 +140,8 @@  qavg_pipe(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id)
         for (count = 0; count < qavg_ntimes; count++) {
                 part_average = 0;
                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
-                        rte_sched_queue_read_stats(port, queue_id + i, &stats, &qlen);
+                        rte_sched_queue_read_stats(port, queue_id + i,
+						   &stats, &qlen, 1);
                         part_average += qlen;
                 }
                 average += part_average / (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
@@ -178,7 +181,9 @@  qavg_tcsubport(uint8_t port_id, uint32_t subport_id, uint8_t tc)
                         queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + i);
 
                         for (j = 0; j < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) {
-                                rte_sched_queue_read_stats(port, queue_id + (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + j), &stats, &qlen);
+                                rte_sched_queue_read_stats(port,
+							   queue_id + (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + j),
+							   &stats, &qlen, 1);
                                 part_average += qlen;
                         }
                 }
@@ -220,7 +225,8 @@  qavg_subport(uint8_t port_id, uint32_t subport_id)
                         queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + i);
 
                         for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) {
-                                rte_sched_queue_read_stats(port, queue_id + j, &stats, &qlen);
+                                rte_sched_queue_read_stats(port, queue_id + j,
+							   &stats, &qlen, 1);
                                 part_average += qlen;
                         }
                 }
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index 2071414..74d0e0a 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -909,56 +909,56 @@  rte_sched_pipe_config(struct rte_sched_port *port,
 
 int
 rte_sched_subport_read_stats(struct rte_sched_port *port,
-	uint32_t subport_id,
-	struct rte_sched_subport_stats *stats,
-	uint32_t *tc_ov)
+			     uint32_t subport_id,
+			     struct rte_sched_subport_stats *stats,
+			     uint32_t *tc_ov, int clear)
 {
 	struct rte_sched_subport *s;
 
 	/* Check user parameters */
-	if ((port == NULL) ||
-	    (subport_id >= port->n_subports_per_port) ||
-		(stats == NULL) ||
-		(tc_ov == NULL)) {
+	if (port == NULL || subport_id >= port->n_subports_per_port) 
 		return -1;
-	}
+
 	s = port->subport + subport_id;
 
 	/* Copy subport stats and clear */
-	memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
-	memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
+	if (stats)
+		*stats = s->stats;
+	if (clear)
+		memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
 
 	/* Subport TC ovesubscription status */
-	*tc_ov = s->tc_ov;
+	if (tc_ov)
+		*tc_ov = s->tc_ov;
 
 	return 0;
 }
 
 int
 rte_sched_queue_read_stats(struct rte_sched_port *port,
-	uint32_t queue_id,
-	struct rte_sched_queue_stats *stats,
-	uint16_t *qlen)
+			   uint32_t queue_id,
+			   struct rte_sched_queue_stats *stats,
+			   uint16_t *qlen, int clear)
 {
 	struct rte_sched_queue *q;
 	struct rte_sched_queue_extra *qe;
 
 	/* Check user parameters */
-	if ((port == NULL) ||
-	    (queue_id >= rte_sched_port_queues_per_port(port)) ||
-		(stats == NULL) ||
-		(qlen == NULL)) {
+	if (port == NULL || queue_id >= rte_sched_port_queues_per_port(port))
 		return -1;
-	}
+
 	q = port->queue + queue_id;
 	qe = port->queue_extra + queue_id;
 
 	/* Copy queue stats and clear */
-	memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
-	memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
+	if (stats)
+		*stats = qe->stats;
+	if (clear)
+		memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
 
 	/* Queue length */
-	*qlen = q->qw - q->qr;
+	if (qlen)
+		*qlen = q->qw - q->qr;
 
 	return 0;
 }
diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
index e9bf18a..f0bf088 100644
--- a/lib/librte_sched/rte_sched.h
+++ b/lib/librte_sched/rte_sched.h
@@ -308,14 +308,15 @@  rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params);
  * @param tc_ov
  *   Pointer to pre-allocated 4-entry array where the oversubscription status for
  *   each of the 4 subport traffic classes should be stored.
+ * @parm clear
+ *   Reset statistics after read
  * @return
  *   0 upon success, error code otherwise
  */
 int
-rte_sched_subport_read_stats(struct rte_sched_port *port,
-	uint32_t subport_id,
-	struct rte_sched_subport_stats *stats,
-	uint32_t *tc_ov);
+rte_sched_subport_read_stats(struct rte_sched_port *port, uint32_t subport_id,
+			     struct rte_sched_subport_stats *stats,
+			     uint32_t *tc_ov, int clear);
 
 /**
  * Hierarchical scheduler queue statistics read
@@ -329,14 +330,15 @@  rte_sched_subport_read_stats(struct rte_sched_port *port,
  *   counters should be stored
  * @param qlen
  *   Pointer to pre-allocated variable where the current queue length should be stored.
+ * @parm clear
+ *   Reset statistics after read
  * @return
  *   0 upon success, error code otherwise
  */
 int
-rte_sched_queue_read_stats(struct rte_sched_port *port,
-	uint32_t queue_id,
-	struct rte_sched_queue_stats *stats,
-	uint16_t *qlen);
+rte_sched_queue_read_stats(struct rte_sched_port *port, uint32_t queue_id,
+			   struct rte_sched_queue_stats *stats,
+			   uint16_t *qlen, int clear);
 
 /*
  * Run-time