[RFC,1/3] sched: add pie based congestion management

Message ID 20210524105822.63171-2-wojciechx.liguzinski@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Add PIE support for HQoS library |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Liguzinski, WojciechX May 24, 2021, 10:58 a.m. UTC
  Implement pie based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   4 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_sched.c                    | 220 +++++++++++++++++------
 lib/sched/rte_sched.h                    |  53 ++++--
 4 files changed, 210 insertions(+), 77 deletions(-)
  

Comments

Morten Brørup May 25, 2021, 9:16 a.m. UTC | #1
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liguzinski,
> WojciechX
> Sent: Monday, 24 May 2021 12.58
> 
> Implement pie based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   4 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_sched.c                    | 220 +++++++++++++++++------
>  lib/sched/rte_sched.h                    |  53 ++++--
>  4 files changed, 210 insertions(+), 77 deletions(-)

Please use the abbreviation AQM instead of CMAN in the source code. This applies to the RTE_SCHED_CMAN definition, as well as functions, enums and variable names.

> +#ifdef RTE_SCHED_CMAN
> +
> +static int
> +rte_sched_red_config (struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +
> +		uint32_t j;
> +
> +		for (j = 0; j < RTE_COLORS; j++) {
> +			/* if min/max are both zero, then RED is disabled */
> +			if ((params->red_params[i][j].min_th |
> +				 params->red_params[i][j].max_th) == 0) {
> +				continue;
> +			}
> +
> +			if (rte_red_config_init(&s->red_config[i][j],
> +				params->red_params[i][j].wq_log2,
> +				params->red_params[i][j].min_th,
> +				params->red_params[i][j].max_th,
> +				params->red_params[i][j].maxp_inv) != 0) {
> +				rte_sched_free_memory(port, n_subports);
> +
> +				RTE_LOG(NOTICE, SCHED,
> +				"%s: RED configuration init fails\n",
> __func__);
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +	s->cman = RTE_SCHED_CMAN_WRED;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_pie_config (struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE tailq threshold incorrect \n", __func__);
> +			return -EINVAL;
> +		}
> +
> +		if (rte_pie_config_init(&s->pie_config[i],
> +			params->pie_params[i].qdelay_ref,
> +			params->pie_params[i].dp_update_interval,
> +			params->pie_params[i].max_burst,
> +			params->pie_params[i].tailq_th) != 0) {
> +			rte_sched_free_memory(port, n_subports);
> +
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE configuration init fails\n", __func__);
> +			return -EINVAL;
> +			}
> +	}
> +	s->cman = RTE_SCHED_CMAN_PIE;
> +	return 0;
> +}

I suggest moving the two above functions from rte_sched.c to respectively rte_red.c and rte_pie.c.

> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_CMAN
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	uint32_t red)
> +	uint32_t cman)
>  #else
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> +	__rte_unused uint32_t cman)
>  #endif

Two comments:
1. __rte_unused indicates that the variable might be unused, not that it is never used. So you do not need the first variant of this function declaration.
2. I suggest using "drops" as the variable name instead of "red" or "aqm".

> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_CMAN
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	uint32_t red)
> +	uint32_t cman)
>  #else
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> +	__rte_unused uint32_t cman)
>  #endif

The above two comments also apply here.

> +static inline void
> +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
> +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> +	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
> +	struct rte_pie *pie = &qe->pie;
> +
> +	/* Update queue length */
> +	pie->qlen -= 1;
> +	pie->qlen_bytes -= pkt_len;
> +
> +	rte_pie_dequeue (pie, pkt_len, time);
>  }

Can the RED/PIE specific functions somehow move to rte_red.c and rte_pie.c without degrading performance? Perhaps function pointers are required. This prevents rte_sched.c from growing too much.

> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> 
> +/**
> + * Congestion management (CMAN) mode

"Active Queue Management (AQM) mode", please.

> + *
> + * This is used for controlling the admission of packets into a packet
> queue or
> + * group of packet queues on congestion.
> + *
> + * The *Random Early Detection (RED)* algorithm works by proactively
> dropping
> + * more and more input packets as the queue occupancy builds up. When
> the queue
> + * is full or almost full, RED effectively works as *tail drop*. The
> *Weighted
> + * RED* algorithm uses a separate set of RED thresholds for each
> packet color.
> + *
> + * Similar to RED, Proportional Integral Controller Enhanced (PIE)
> randomly
> + * drops a packet at the onset of the congestion and tries to control
> the
> + * latency around the target value. The congestion detection, however,
> is based
> + * on the queueing latency instead of the queue length like RED. For
> more
> + * information, refer RFC8033.
> + */
> +enum rte_sched_cman_mode {
> +	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED)
> */

Please stick with either the name RED or WRED, for consistency.

> +	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller
> Enhanced (PIE) */
> +};
> +


> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> Registered Number: 308263
> 
> 
> This e-mail and any attachments may contain confidential material for
> the sole
> use of the intended recipient(s). Any review or distribution by others
> is
> strictly prohibited. If you are not the intended recipient, please
> contact the
> sender and delete all copies.
> 

Please don't use this footer when sending to the DPDK mailing list.
  
Liguzinski, WojciechX June 9, 2021, 8:36 a.m. UTC | #2
> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com> 
> Sent: Tuesday, May 25, 2021 11:17 AM
> To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>; dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Dharmappa, Savinay <savinay.dharmappa@intel.com>
> Subject: RE: [dpdk-dev] [RFC PATCH 1/3] sched: add pie based congestion management
>
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liguzinski, 
> > WojciechX
> > Sent: Monday, 24 May 2021 12.58
> > 
> > Implement pie based congestion management based on rfc8033
> > 
> > Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> > ---
> >  drivers/net/softnic/rte_eth_softnic_tm.c |   4 +-
> >  lib/sched/meson.build                    |  10 +-
> >  lib/sched/rte_sched.c                    | 220 +++++++++++++++++------
> >  lib/sched/rte_sched.h                    |  53 ++++--
> >  4 files changed, 210 insertions(+), 77 deletions(-)
>
> Please use the abbreviation AQM instead of CMAN in the source code. This applies to the RTE_SCHED_CMAN definition, as well as functions, enums and variable names.

Ok, sure, I'm going to change that where applicable.

>
> > +#ifdef RTE_SCHED_CMAN
> > +
> > +static int
> > +rte_sched_red_config (struct rte_sched_port *port,
> > +	struct rte_sched_subport *s,
> > +	struct rte_sched_subport_params *params,
> > +	uint32_t n_subports)
> > +{
> > +	uint32_t i;
> > +
> > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > +
> > +		uint32_t j;
> > +
> > +		for (j = 0; j < RTE_COLORS; j++) {
> > +			/* if min/max are both zero, then RED is disabled */
> > +			if ((params->red_params[i][j].min_th |
> > +				 params->red_params[i][j].max_th) == 0) {
> > +				continue;
> > +			}
> > +
> > +			if (rte_red_config_init(&s->red_config[i][j],
> > +				params->red_params[i][j].wq_log2,
> > +				params->red_params[i][j].min_th,
> > +				params->red_params[i][j].max_th,
> > +				params->red_params[i][j].maxp_inv) != 0) {
> > +				rte_sched_free_memory(port, n_subports);
> > +
> > +				RTE_LOG(NOTICE, SCHED,
> > +				"%s: RED configuration init fails\n",
> > __func__);
> > +				return -EINVAL;
> > +			}
> > +		}
> > +	}
> > +	s->cman = RTE_SCHED_CMAN_WRED;
> > +	return 0;
> > +}
> > +
> > +static int
> > +rte_sched_pie_config (struct rte_sched_port *port,
> > +	struct rte_sched_subport *s,
> > +	struct rte_sched_subport_params *params,
> > +	uint32_t n_subports)
> > +{
> > +	uint32_t i;
> > +
> > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> > +			RTE_LOG(NOTICE, SCHED,
> > +			"%s: PIE tailq threshold incorrect \n", __func__);
> > +			return -EINVAL;
> > +		}
> > +
> > +		if (rte_pie_config_init(&s->pie_config[i],
> > +			params->pie_params[i].qdelay_ref,
> > +			params->pie_params[i].dp_update_interval,
> > +			params->pie_params[i].max_burst,
> > +			params->pie_params[i].tailq_th) != 0) {
> > +			rte_sched_free_memory(port, n_subports);
> > +
> > +			RTE_LOG(NOTICE, SCHED,
> > +			"%s: PIE configuration init fails\n", __func__);
> > +			return -EINVAL;
> > +			}
> > +	}
> > +	s->cman = RTE_SCHED_CMAN_PIE;
> > +	return 0;
> > +}
>
> I suggest moving the two above functions from rte_sched.c to respectively rte_red.c and rte_pie.c.

rte_red.c and rte_pie.c hold functions implementing those algorithms and they don't know anything about ports and subports. That part refers to scheduler implementation. Putting those methods respectively to those files would in my opinion break the 'functional isolation'.

>
> > -#ifdef RTE_SCHED_RED
> > +#ifdef RTE_SCHED_CMAN
> >  static inline void
> >  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> > *port,
> >  	struct rte_sched_subport *subport,
> >  	uint32_t qindex,
> >  	struct rte_mbuf *pkt,
> > -	uint32_t red)
> > +	uint32_t cman)
> >  #else
> >  static inline void
> >  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> > *port,
> >  	struct rte_sched_subport *subport,
> >  	uint32_t qindex,
> >  	struct rte_mbuf *pkt,
> > -	__rte_unused uint32_t red)
> > +	__rte_unused uint32_t cman)
> >  #endif
>
> Two comments:
> 1. __rte_unused indicates that the variable might be unused, not that it is never used. So you do not need the first variant of this function declaration.

Thanks, it's going to be fixed.

> 2. I suggest using "drops" as the variable name instead of "red" or "aqm".

Ok, I will change that.

>
> > -#ifdef RTE_SCHED_RED
> > +#ifdef RTE_SCHED_CMAN
> >  static inline void
> >  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> > *subport,
> >  	uint32_t qindex,
> >  	struct rte_mbuf *pkt,
> > -	uint32_t red)
> > +	uint32_t cman)
> >  #else
> >  static inline void
> >  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> > *subport,
> >  	uint32_t qindex,
> >  	struct rte_mbuf *pkt,
> > -	__rte_unused uint32_t red)
> > +	__rte_unused uint32_t cman)
> >  #endif
>
> The above two comments also apply here.

Ok, it's going to be changed.

>
> > +static inline void
> > +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport, 
> > +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> > +	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
> > +	struct rte_pie *pie = &qe->pie;
> > +
> > +	/* Update queue length */
> > +	pie->qlen -= 1;
> > +	pie->qlen_bytes -= pkt_len;
> > +
> > +	rte_pie_dequeue (pie, pkt_len, time);
> >  }
>
> Can the RED/PIE specific functions somehow move to rte_red.c and rte_pie.c without degrading performance? Perhaps function pointers are required. This prevents rte_sched.c from growing too much.

Like I mentioned above, those functions use data structures known to scheduler and not directly to those algorithms which are implemented in those definition files. I will try think of a solution that could be suitable here.

>
> > diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> > 
> > +/**
> > + * Congestion management (CMAN) mode
>
> "Active Queue Management (AQM) mode", please.

Sure. ;-)

>
> > + *
> > + * This is used for controlling the admission of packets into a 
> > + packet
> > queue or
> > + * group of packet queues on congestion.
> > + *
> > + * The *Random Early Detection (RED)* algorithm works by proactively
> > dropping
> > + * more and more input packets as the queue occupancy builds up. When
> > the queue
> > + * is full or almost full, RED effectively works as *tail drop*. The
> > *Weighted
> > + * RED* algorithm uses a separate set of RED thresholds for each
> > packet color.
> > + *
> > + * Similar to RED, Proportional Integral Controller Enhanced (PIE)
> > randomly
> > + * drops a packet at the onset of the congestion and tries to control
> > the
> > + * latency around the target value. The congestion detection, 
> > + however,
> > is based
> > + * on the queueing latency instead of the queue length like RED. For
> > more
> > + * information, refer RFC8033.
> > + */
> > +enum rte_sched_cman_mode {
> > +	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED)
> > */
>
> Please stick with either the name RED or WRED, for consistency.

WRED is just an extension of RED so in places where I found that it is suitable I have used such naming, otherwise RED. I think it shouldn't be changed in all places as it may be confusing.

>
> > +	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller
> > Enhanced (PIE) */
> > +};
> > +
>
>
> > --------------------------------------------------------------
> > Intel Research and Development Ireland Limited Registered in Ireland 
> > Registered Office: Collinstown Industrial Park, Leixlip, County 
> > Kildare Registered Number: 308263
> > 
> > 
> > This e-mail and any attachments may contain confidential material for 
> > the sole use of the intended recipient(s). Any review or distribution 
> > by others is strictly prohibited. If you are not the intended 
> > recipient, please contact the sender and delete all copies.
> > 
>
> Please don't use this footer when sending to the DPDK mailing list.

Footer issue has been handled.

Thanks,
Wojtek
  
Morten Brørup June 9, 2021, 12:35 p.m. UTC | #3
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liguzinski,
> WojciechX
> Sent: Wednesday, 9 June 2021 10.37
> 
> > From: Morten Brørup <mb@smartsharesystems.com>
> > Sent: Tuesday, May 25, 2021 11:17 AM
> >
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liguzinski,
> > > WojciechX
> > > Sent: Monday, 24 May 2021 12.58
> > >
> > > Implement pie based congestion management based on rfc8033
> > >
> > > Signed-off-by: Liguzinski, WojciechX
> <wojciechx.liguzinski@intel.com>
> > > ---
> > >  drivers/net/softnic/rte_eth_softnic_tm.c |   4 +-
> > >  lib/sched/meson.build                    |  10 +-
> > >  lib/sched/rte_sched.c                    | 220 +++++++++++++++++--
> ----
> > >  lib/sched/rte_sched.h                    |  53 ++++--
> > >  4 files changed, 210 insertions(+), 77 deletions(-)
> >
> > Please use the abbreviation AQM instead of CMAN in the source code.
> This applies to the RTE_SCHED_CMAN definition, as well as functions,
> enums and variable names.
> 
> Ok, sure, I'm going to change that where applicable.
> 
> >
> > > +#ifdef RTE_SCHED_CMAN
> > > +
> > > +static int
> > > +rte_sched_red_config (struct rte_sched_port *port,
> > > +	struct rte_sched_subport *s,
> > > +	struct rte_sched_subport_params *params,
> > > +	uint32_t n_subports)
> > > +{
> > > +	uint32_t i;
> > > +
> > > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > > +
> > > +		uint32_t j;
> > > +
> > > +		for (j = 0; j < RTE_COLORS; j++) {
> > > +			/* if min/max are both zero, then RED is disabled */
> > > +			if ((params->red_params[i][j].min_th |
> > > +				 params->red_params[i][j].max_th) == 0) {
> > > +				continue;
> > > +			}
> > > +
> > > +			if (rte_red_config_init(&s->red_config[i][j],
> > > +				params->red_params[i][j].wq_log2,
> > > +				params->red_params[i][j].min_th,
> > > +				params->red_params[i][j].max_th,
> > > +				params->red_params[i][j].maxp_inv) != 0) {
> > > +				rte_sched_free_memory(port, n_subports);
> > > +
> > > +				RTE_LOG(NOTICE, SCHED,
> > > +				"%s: RED configuration init fails\n",
> > > __func__);
> > > +				return -EINVAL;
> > > +			}
> > > +		}
> > > +	}
> > > +	s->cman = RTE_SCHED_CMAN_WRED;
> > > +	return 0;
> > > +}
> > > +
> > > +static int
> > > +rte_sched_pie_config (struct rte_sched_port *port,
> > > +	struct rte_sched_subport *s,
> > > +	struct rte_sched_subport_params *params,
> > > +	uint32_t n_subports)
> > > +{
> > > +	uint32_t i;
> > > +
> > > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > > +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> > > +			RTE_LOG(NOTICE, SCHED,
> > > +			"%s: PIE tailq threshold incorrect \n", __func__);
> > > +			return -EINVAL;
> > > +		}
> > > +
> > > +		if (rte_pie_config_init(&s->pie_config[i],
> > > +			params->pie_params[i].qdelay_ref,
> > > +			params->pie_params[i].dp_update_interval,
> > > +			params->pie_params[i].max_burst,
> > > +			params->pie_params[i].tailq_th) != 0) {
> > > +			rte_sched_free_memory(port, n_subports);
> > > +
> > > +			RTE_LOG(NOTICE, SCHED,
> > > +			"%s: PIE configuration init fails\n", __func__);
> > > +			return -EINVAL;
> > > +			}
> > > +	}
> > > +	s->cman = RTE_SCHED_CMAN_PIE;
> > > +	return 0;
> > > +}
> >
> > I suggest moving the two above functions from rte_sched.c to
> respectively rte_red.c and rte_pie.c.
> 
> rte_red.c and rte_pie.c hold functions implementing those algorithms
> and they don't know anything about ports and subports. That part refers
> to scheduler implementation. Putting those methods respectively to
> those files would in my opinion break the 'functional isolation'.
> 

Then it makes sense keeping them here. You can ignore my suggestion.

> >
> > > -#ifdef RTE_SCHED_RED
> > > +#ifdef RTE_SCHED_CMAN
> > >  static inline void
> > >  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> > > *port,
> > >  	struct rte_sched_subport *subport,
> > >  	uint32_t qindex,
> > >  	struct rte_mbuf *pkt,
> > > -	uint32_t red)
> > > +	uint32_t cman)
> > >  #else
> > >  static inline void
> > >  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> > > *port,
> > >  	struct rte_sched_subport *subport,
> > >  	uint32_t qindex,
> > >  	struct rte_mbuf *pkt,
> > > -	__rte_unused uint32_t red)
> > > +	__rte_unused uint32_t cman)
> > >  #endif
> >
> > Two comments:
> > 1. __rte_unused indicates that the variable might be unused, not that
> it is never used. So you do not need the first variant of this function
> declaration.
> 
> Thanks, it's going to be fixed.
> 
> > 2. I suggest using "drops" as the variable name instead of "red" or
> "aqm".
> 
> Ok, I will change that.
> 
> >
> > > -#ifdef RTE_SCHED_RED
> > > +#ifdef RTE_SCHED_CMAN
> > >  static inline void
> > >  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> > > *subport,
> > >  	uint32_t qindex,
> > >  	struct rte_mbuf *pkt,
> > > -	uint32_t red)
> > > +	uint32_t cman)
> > >  #else
> > >  static inline void
> > >  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> > > *subport,
> > >  	uint32_t qindex,
> > >  	struct rte_mbuf *pkt,
> > > -	__rte_unused uint32_t red)
> > > +	__rte_unused uint32_t cman)
> > >  #endif
> >
> > The above two comments also apply here.
> 
> Ok, it's going to be changed.
> 
> >
> > > +static inline void
> > > +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
> > > +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> > > +	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
> > > +	struct rte_pie *pie = &qe->pie;
> > > +
> > > +	/* Update queue length */
> > > +	pie->qlen -= 1;
> > > +	pie->qlen_bytes -= pkt_len;
> > > +
> > > +	rte_pie_dequeue (pie, pkt_len, time);
> > >  }
> >
> > Can the RED/PIE specific functions somehow move to rte_red.c and
> rte_pie.c without degrading performance? Perhaps function pointers are
> required. This prevents rte_sched.c from growing too much.
> 
> Like I mentioned above, those functions use data structures known to
> scheduler and not directly to those algorithms which are implemented in
> those definition files. I will try think of a solution that could be
> suitable here.

Now that I understand your line of thinking, I agree with you. You can ignore my comment here too.

> 
> >
> > > diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> > >
> > > +/**
> > > + * Congestion management (CMAN) mode
> >
> > "Active Queue Management (AQM) mode", please.
> 
> Sure. ;-)
> 
> >
> > > + *
> > > + * This is used for controlling the admission of packets into a
> > > + packet
> > > queue or
> > > + * group of packet queues on congestion.
> > > + *
> > > + * The *Random Early Detection (RED)* algorithm works by
> proactively
> > > dropping
> > > + * more and more input packets as the queue occupancy builds up.
> When
> > > the queue
> > > + * is full or almost full, RED effectively works as *tail drop*.
> The
> > > *Weighted
> > > + * RED* algorithm uses a separate set of RED thresholds for each
> > > packet color.
> > > + *
> > > + * Similar to RED, Proportional Integral Controller Enhanced (PIE)
> > > randomly
> > > + * drops a packet at the onset of the congestion and tries to
> control
> > > the
> > > + * latency around the target value. The congestion detection,
> > > + however,
> > > is based
> > > + * on the queueing latency instead of the queue length like RED.
> For
> > > more
> > > + * information, refer RFC8033.
> > > + */
> > > +enum rte_sched_cman_mode {
> > > +	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED)
> > > */
> >
> > Please stick with either the name RED or WRED, for consistency.
> 
> WRED is just an extension of RED so in places where I found that it is
> suitable I have used such naming, otherwise RED. I think it shouldn't
> be changed in all places as it may be confusing.
> 

I don't have a strong opinion about this, and you are putting some thoughts into it, so I'm happy with that.

> >
> > > +	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller
> > > Enhanced (PIE) */
> > > +};
> > > +
> >

[snip]

> Footer issue has been handled.
> 
> Thanks,
> Wojtek

:-)
  

Patch

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..bdcd05b0e6 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@  pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@  tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@ 
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index cd87e688e4..a5fa8fadc8 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@  struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@  struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_cman_mode cman;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@  rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+
+static int
+rte_sched_red_config (struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->red_params[i][j].min_th |
+				 params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->red_params[i][j].wq_log2,
+				params->red_params[i][j].min_th,
+				params->red_params[i][j].max_th,
+				params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config (struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect \n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if(params->cman == RTE_SCHED_CMAN_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1169,30 +1263,11 @@  rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					rte_sched_free_memory(port, n_subports);
-
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					return -EINVAL;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1714,20 +1789,20 @@  rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
+	uint32_t cman)
 #else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
+	__rte_unused uint32_t cman)
 #endif
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
@@ -1735,8 +1810,8 @@  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_CMAN
+	subport->stats.n_pkts_cman_dropped[tc_index] += cman;
 #endif
 }
 
@@ -1752,18 +1827,18 @@  rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
+	uint32_t cman)
 #else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
+	__rte_unused uint32_t cman)
 #endif
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
@@ -1771,39 +1846,50 @@  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += cman;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1811,14 +1897,29 @@  rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
+
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue (pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1829,7 +1930,7 @@  static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1925,7 +2026,7 @@  rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2398,6 +2499,7 @@  grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2417,15 +2519,19 @@  grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_CMAN
+	if (subport->cman == RTE_SCHED_CMAN_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..692aba9442 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@  extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@  extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@  struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion management mode */
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@  struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_CMAN
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@  struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_CMAN
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 #endif
 
 	/** Bytes successfully written */