[v9,1/5] sched: add PIE based congestion management

Message ID 20211011075541.1182775-2-wojciechx.liguzinski@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Add PIE support for HQoS library |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Liguzinski, WojciechX Oct. 11, 2021, 7:55 a.m. UTC
  Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 228 +++++++++----
 lib/sched/rte_sched.h                    |  53 ++-
 lib/sched/version.map                    |   3 +
 7 files changed, 685 insertions(+), 90 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h
  

Comments

Cristian Dumitrescu Oct. 12, 2021, 3:59 p.m. UTC | #1
Hi Wojchech,

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, October 11, 2021 8:56 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v9 1/5] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 228 +++++++++----
>  lib/sched/rte_sched.h                    |  53 ++-
>  lib/sched/version.map                    |   3 +
>  7 files changed, 685 insertions(+), 90 deletions(-)
>  create mode 100644 lib/sched/rte_pie.c
>  create mode 100644 lib/sched/rte_pie.h
> 
> diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c
> b/drivers/net/softnic/rte_eth_softnic_tm.c
> index 90baba15ce..5b6c4e6d4b 100644
> --- a/drivers/net/softnic/rte_eth_softnic_tm.c
> +++ b/drivers/net/softnic/rte_eth_softnic_tm.c
> @@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
>  	return 0;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
>  #define WRED_SUPPORTED						1
>  #else
>  #define WRED_SUPPORTED						0
> @@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev,
> uint32_t tc_id)
>  	return NULL;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static void
>  wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
> @@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev,
> uint32_t subport_id)
>  	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE;
> tc_id++)
>  		for (color = RTE_COLOR_GREEN; color < RTE_COLORS;
> color++) {
>  			struct rte_red_params *dst =
> -				&pp->red_params[tc_id][color];
> +				&pp->wred_params[tc_id][color];
>  			struct tm_wred_profile *src_wp =
>  				tm_tc_wred_profile_get(dev, tc_id);
>  			struct rte_tm_red_params *src =
> diff --git a/lib/sched/meson.build b/lib/sched/meson.build
> index b24f7b8775..e7ae9bcf19 100644
> --- a/lib/sched/meson.build
> +++ b/lib/sched/meson.build
> @@ -1,11 +1,7 @@
>  # SPDX-License-Identifier: BSD-3-Clause
>  # Copyright(c) 2017 Intel Corporation
> 
> -sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
> -headers = files(
> -        'rte_approx.h',
> -        'rte_red.h',
> -        'rte_sched.h',
> -        'rte_sched_common.h',
> -)
> +sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
> +headers = files('rte_sched.h', 'rte_sched_common.h',
> +		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
>  deps += ['mbuf', 'meter']
> diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
> new file mode 100644
> index 0000000000..2fcecb2db4
> --- /dev/null
> +++ b/lib/sched/rte_pie.c
> @@ -0,0 +1,82 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <stdlib.h>
> +
> +#include "rte_pie.h"
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_malloc.h>
> +
> +#ifdef __INTEL_COMPILER
> +#pragma warning(disable:2259) /* conversion may lose significant bits */
> +#endif
> +
> +void
> +rte_pie_rt_data_init(struct rte_pie *pie)
> +{
> +	if (pie == NULL) {
> +		/* Allocate memory to use the PIE data structure */
> +		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
> +
> +		if (pie == NULL)
> +			RTE_LOG(ERR, SCHED, "%s: Memory allocation
> fails\n", __func__);
> +	}
> +
> +	pie->active = 0;
> +	pie->in_measurement = 0;
> +	pie->departed_bytes_count = 0;
> +	pie->start_measurement = 0;
> +	pie->last_measurement = 0;
> +	pie->qlen = 0;
> +	pie->avg_dq_time = 0;
> +	pie->burst_allowance = 0;
> +	pie->qdelay_old = 0;
> +	pie->drop_prob = 0;
> +	pie->accu_prob = 0;
> +}
> +
> +int
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th)
> +{
> +	uint64_t tsc_hz = rte_get_tsc_hz();
> +
> +	if (pie_cfg == NULL)
> +		return -1;
> +
> +	if (qdelay_ref <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for qdelay_ref\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (dp_update_interval <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for dp_update_interval\n",
> __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (max_burst <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for max_burst\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (tailq_th <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for tailq_th\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
> +	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) /
> 1000;
> +	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
> +	pie_cfg->tailq_th = tailq_th;
> +
> +	return 0;
> +}
> diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
> new file mode 100644
> index 0000000000..f83c95664f
> --- /dev/null
> +++ b/lib/sched/rte_pie.h
> @@ -0,0 +1,393 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef __RTE_PIE_H_INCLUDED__
> +#define __RTE_PIE_H_INCLUDED__
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * @file
> + * RTE Proportional Integral controller Enhanced (PIE)
> + *
> + *
> + ***/
> +
> +#include <stdint.h>
> +
> +#include <rte_random.h>
> +#include <rte_debug.h>
> +
> +#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
> +				     * to start measurement cycle (bytes)
> +				     */
> +#define RTE_DQ_WEIGHT      0.25    /**< Weight
> (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
> +#define RTE_ALPHA          0.125   /**< Weights in drop probability
> calculations */
> +#define RTE_BETA           1.25    /**< Weights in drop probability calculations
> */
> +#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number
> */
> +
> +
> +/**
> + * PIE configuration parameters passed by user
> + *
> + */
> +struct rte_pie_params {
> +	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
> +	uint16_t dp_update_interval;   /**< Update interval for drop
> probability (milliseconds) */
> +	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * PIE configuration parameters
> + *
> + */
> +struct rte_pie_config {
> +	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
> +	uint64_t dp_update_interval;   /**< Update interval for drop
> probability (in CPU cycles) */
> +	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * RED run-time data
> + */
> +struct rte_pie {
> +	uint16_t active;               /**< Flag for activating/deactivating pie */
> +	uint16_t in_measurement;       /**< Flag for activation of
> measurement cycle */
> +	uint32_t departed_bytes_count; /**< Number of bytes departed in
> current measurement cycle */
> +	uint64_t start_measurement;    /**< Time to start to measurement
> cycle (in cpu cycles) */
> +	uint64_t last_measurement;     /**< Time of last measurement (in
> cpu cycles) */
> +	uint64_t qlen;                 /**< Queue length (packets count) */
> +	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
> +	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in
> cpu cycles) */
> +	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
> +	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
> +	double drop_prob;              /**< Current packet drop probability */
> +	double accu_prob;              /**< Accumulated packet drop probability
> */
> +};
> +
> +/**
> + * @brief Initialises run-time data
> + *
> + * @param pie [in,out] data pointer to PIE runtime data
> + */
> +void
> +__rte_experimental
> +rte_pie_rt_data_init(struct rte_pie *pie);
> +
> +/**
> + * @brief Configures a single PIE configuration parameter structure.
> + *
> + * @param pie_cfg [in,out] config pointer to a PIE configuration parameter
> structure
> + * @param qdelay_ref [in]  latency target(milliseconds)
> + * @param dp_update_interval [in] update interval for drop probability
> (milliseconds)
> + * @param max_burst [in] maximum burst allowance (milliseconds)
> + * @param tailq_th [in] tail drop threshold for the queue (number of
> packets)
> + *
> + * @return Operation status
> + * @retval 0 success
> + * @retval !0 error
> + */
> +int
> +__rte_experimental
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th);
> +
> +/**
> + * @brief Decides packet enqueue when queue is empty
> + *
> + * Note: packet is never dropped in this particular case.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval !0 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len)
> +{
> +	RTE_ASSERT(pkt_len != NULL);
> +
> +	/* Update the PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/**
> +	 * If the queue has been idle for a while, turn off PIE and Reset
> counters
> +	 */
> +	if ((pie->active == 1) &&
> +		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param time [in] current time (measured in cpu cycles)
> + */
> +static inline void
> +__rte_experimental
> +_calc_drop_probability(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie, uint64_t time)
> +{
> +	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
> +
> +	/* Note: can be implemented using integer multiply.
> +	 * DQ_THRESHOLD is power of 2 value.
> +	 */
> +	double current_qdelay = pie->qlen * (pie->avg_dq_time /
> RTE_DQ_THRESHOLD);
> +
> +	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
> +		RTE_BETA * (current_qdelay - pie->qdelay_old);
> +
> +	if (pie->drop_prob < 0.000001)
> +		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
> +	else if (pie->drop_prob < 0.00001)
> +		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
> +	else if (pie->drop_prob < 0.0001)
> +		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
> +	else if (pie->drop_prob < 0.001)
> +		p = p * 0.03125;                    /* (1/32) = 0.03125   */
> +	else if (pie->drop_prob < 0.01)
> +		p = p * 0.125;                      /* (1/8) = 0.125    */
> +	else if (pie->drop_prob < 0.1)
> +		p = p * 0.5;                        /* (1/2) = 0.5    */
> +
> +	if (pie->drop_prob >= 0.1 && p > 0.02)
> +		p = 0.02;
> +
> +	pie->drop_prob += p;
> +
> +	double qdelay = qdelay_ref * 0.5;
> +
> +	/*  Exponentially decay drop prob when congestion goes away  */
> +	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
> +		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
> +
> +	/* Bound drop probability */
> +	if (pie->drop_prob < 0)
> +		pie->drop_prob = 0;
> +	if (pie->drop_prob > 1)
> +		pie->drop_prob = 1;
> +
> +	pie->qdelay_old = current_qdelay;
> +	pie->last_measurement = time;
> +
> +	uint64_t burst_allowance = pie->burst_allowance - pie_cfg-
> >dp_update_interval;
> +
> +	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + *
> + * @return operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +_rte_pie_drop(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie)
> +{
> +	uint64_t rand_value;
> +	double qdelay = pie_cfg->qdelay_ref * 0.5;
> +
> +	/* PIE is active but the queue is not congested: return 0 */
> +	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
> +		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
> +		return 0;
> +
> +	if (pie->drop_prob == 0)
> +		pie->accu_prob = 0;
> +
> +	/* For practical reasons, drop probability can be further scaled
> according
> +	 * to packet size, but one needs to set a bound to avoid unnecessary
> bias
> +	 * Random drop
> +	 */
> +	pie->accu_prob += pie->drop_prob;
> +
> +	if (pie->accu_prob < 0.85)
> +		return 0;
> +
> +	if (pie->accu_prob >= 8.5)
> +		return 1;
> +
> +	rand_value = rte_rand()/RTE_RAND_MAX;
> +
> +	if ((double)rand_value < pie->drop_prob) {
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped for non-
> empty queue
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on max threshold criterion
> + * @retval 2 drop the packet based on mark probability criterion
> + */
> +static inline int
> +__rte_experimental
> +rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	/* Check queue space against the tail drop threshold */
> +	if (pie->qlen >= pie_cfg->tailq_th) {
> +
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	if (pie->active) {
> +		/* Update drop probability after certain interval */
> +		if ((time - pie->last_measurement) >= pie_cfg-
> >dp_update_interval)
> +			_calc_drop_probability(pie_cfg, pie, time);
> +
> +		/* Decide whether packet to be dropped or enqueued */
> +		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance ==
> 0)
> +			return 2;
> +	}
> +
> +	/* When queue occupancy is over a certain threshold, turn on PIE */
> +	if ((pie->active == 0) &&
> +		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
> +		pie->active = 1;
> +		pie->qdelay_old = 0;
> +		pie->drop_prob = 0;
> +		pie->in_measurement = 1;
> +		pie->departed_bytes_count = 0;
> +		pie->avg_dq_time = 0;
> +		pie->last_measurement = time;
> +		pie->burst_allowance = pie_cfg->max_burst;
> +		pie->accu_prob = 0;
> +		pie->start_measurement = time;
> +	}
> +
> +	/* when queue has been idle for a while, turn off PIE and Reset
> counters */
> +	if (pie->active == 1 &&
> +		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	/* Update PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped
> + * Updates run time data and gives verdict whether to enqueue or drop the
> packet.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param qlen [in] queue length
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on drop probility criteria
> + */
> +static inline int
> +__rte_experimental
> +rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	const unsigned int qlen,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	RTE_ASSERT(pie_cfg != NULL);
> +	RTE_ASSERT(pie != NULL);
> +
> +	if (qlen != 0)
> +		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len,
> time);
> +	else
> +		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
> +}
> +
> +/**
> + * @brief PIE rate estimation method
> + * Called on each packet departure.
> + *
> + * @param pie [in] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp in cpu cycles
> + */
> +static inline void
> +__rte_experimental
> +rte_pie_dequeue(struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	uint64_t time)
> +{
> +	/* Dequeue rate estimation */
> +	if (pie->in_measurement) {
> +		pie->departed_bytes_count += pkt_len;
> +
> +		/* Start a new measurement cycle when enough packets */
> +		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
> +			uint64_t dq_time = time - pie->start_measurement;
> +
> +			if (pie->avg_dq_time == 0)
> +				pie->avg_dq_time = dq_time;
> +			else
> +				pie->avg_dq_time = dq_time *
> RTE_DQ_WEIGHT + pie->avg_dq_time
> +					* (1 - RTE_DQ_WEIGHT);
> +
> +			pie->in_measurement = 0;
> +		}
> +	}
> +
> +	/* Start measurement cycle when enough data in the queue */
> +	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie-
> >in_measurement == 0)) {
> +		pie->in_measurement = 1;
> +		pie->start_measurement = time;
> +		pie->departed_bytes_count = 0;
> +	}
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* __RTE_PIE_H_INCLUDED__ */
> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
> index a858f61f95..320435ed91 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c
> @@ -89,8 +89,12 @@ struct rte_sched_queue {
> 
>  struct rte_sched_queue_extra {
>  	struct rte_sched_queue_stats stats;
> -#ifdef RTE_SCHED_RED
> -	struct rte_red red;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red red;
> +		struct rte_pie pie;
> +	};
>  #endif
>  };
> 
> @@ -183,8 +187,13 @@ struct rte_sched_subport {
>  	/* Pipe queues size */
>  	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +	enum rte_sched_aqm_mode aqm;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red_config
> wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		struct rte_pie_config
> pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
> 
>  	/* Scheduling loop detection */
> @@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port
> *port, uint32_t n_subports)
>  	rte_free(port);
>  }
> 
> +#ifdef RTE_SCHED_AQM
> +
> +static int
> +rte_sched_red_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +
> +		uint32_t j;
> +
> +		for (j = 0; j < RTE_COLORS; j++) {
> +			/* if min/max are both zero, then RED is disabled */
> +			if ((params->wred_params[i][j].min_th |
> +				 params->wred_params[i][j].max_th) == 0) {
> +				continue;
> +			}
> +
> +			if (rte_red_config_init(&s->wred_config[i][j],
> +				params->wred_params[i][j].wq_log2,
> +				params->wred_params[i][j].min_th,
> +				params->wred_params[i][j].max_th,
> +				params->wred_params[i][j].maxp_inv) != 0) {
> +				rte_sched_free_memory(port, n_subports);
> +
> +				RTE_LOG(NOTICE, SCHED,
> +				"%s: RED configuration init fails\n",
> __func__);
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +	s->aqm = RTE_SCHED_AQM_WRED;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_pie_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE tailq threshold incorrect\n", __func__);
> +			return -EINVAL;
> +		}
> +
> +		if (rte_pie_config_init(&s->pie_config[i],
> +			params->pie_params[i].qdelay_ref,
> +			params->pie_params[i].dp_update_interval,
> +			params->pie_params[i].max_burst,
> +			params->pie_params[i].tailq_th) != 0) {
> +			rte_sched_free_memory(port, n_subports);
> +
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE configuration init fails\n", __func__);
> +			return -EINVAL;
> +			}
> +	}
> +	s->aqm = RTE_SCHED_AQM_PIE;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_aqm_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	if (params->aqm == RTE_SCHED_AQM_WRED)
> +		return rte_sched_red_config(port, s, params, n_subports);
> +
> +	else if (params->aqm == RTE_SCHED_AQM_PIE)
> +		return rte_sched_pie_config(port, s, params, n_subports);
> +
> +	return -EINVAL;
> +}
> +#endif
> +
>  int
>  rte_sched_subport_config(struct rte_sched_port *port,
>  	uint32_t subport_id,
> @@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct
> rte_sched_port *port,
>  		s->n_pipe_profiles = params->n_pipe_profiles;
>  		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -			uint32_t j;
> -
> -			for (j = 0; j < RTE_COLORS; j++) {
> -			/* if min/max are both zero, then RED is disabled */
> -				if ((params->red_params[i][j].min_th |
> -				     params->red_params[i][j].max_th) == 0) {
> -					continue;
> -				}
> -
> -				if (rte_red_config_init(&s->red_config[i][j],
> -				    params->red_params[i][j].wq_log2,
> -				    params->red_params[i][j].min_th,
> -				    params->red_params[i][j].max_th,
> -				    params->red_params[i][j].maxp_inv) != 0)
> {
> -					RTE_LOG(NOTICE, SCHED,
> -					"%s: RED configuration init fails\n",
> -					__func__);
> -					ret = -EINVAL;
> -					goto out;
> -				}
> -			}
> +#ifdef RTE_SCHED_AQM
> +		status = rte_sched_aqm_config(port, s, params,
> n_subports);
> +		if (status) {
> +			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration
> fails\n", __func__);
> +			return status;
>  		}
>  #endif
> 
> @@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct
> rte_sched_port *port,
>  	subport->stats.n_bytes_tc[tc_index] += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
> -	struct rte_sched_subport *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
>  	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
> -#ifdef RTE_SCHED_RED
> -	subport->stats.n_pkts_red_dropped[tc_index] += red;
> +#ifdef RTE_SCHED_AQM
> +	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
>  #endif
>  }
> 
> @@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct
> rte_sched_subport *subport,
>  	qe->stats.n_bytes += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	qe->stats.n_pkts_dropped += 1;
>  	qe->stats.n_bytes_dropped += pkt_len;
> -#ifdef RTE_SCHED_RED
> -	qe->stats.n_pkts_red_dropped += red;
> +#ifdef RTE_SCHED_AQM
> +	qe->stats.n_pkts_aqm_dropped += drops;
>  #endif
>  }
> 
>  #endif /* RTE_SCHED_COLLECT_STATS */
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static inline int
> -rte_sched_port_red_drop(struct rte_sched_port *port,
> +rte_sched_port_aqm_drop(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport,
>  	struct rte_mbuf *pkt,
>  	uint32_t qindex,
>  	uint16_t qlen)
>  {
>  	struct rte_sched_queue_extra *qe;
> -	struct rte_red_config *red_cfg;
> -	struct rte_red *red;
>  	uint32_t tc_index;
> -	enum rte_color color;
> 
>  	tc_index = rte_sched_port_pipe_tc(port, qindex);
> -	color = rte_sched_port_pkt_read_color(pkt);
> -	red_cfg = &subport->red_config[tc_index][color];
> +	qe = subport->queue_extra + qindex;
> 
> -	if ((red_cfg->min_th | red_cfg->max_th) == 0)
> -		return 0;
> +	/* WRED */
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red_config *red_cfg;
> +		struct rte_red *red;
> +		enum rte_color color;
> 
> -	qe = subport->queue_extra + qindex;
> -	red = &qe->red;
> +		color = rte_sched_port_pkt_read_color(pkt);
> +		red_cfg = &subport->wred_config[tc_index][color];
> +
> +		if ((red_cfg->min_th | red_cfg->max_th) == 0)
> +			return 0;
> 
> -	return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +		red = &qe->red;
> +
> +		return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +	}
> +
> +	/* PIE */
> +	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
> +	struct rte_pie *pie = &qe->pie;
> +
> +	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port-
> >time_cpu_cycles);
>  }
> 
>  static inline void
> @@ -1815,14 +1885,29 @@
> rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport, uint32_t qindex)
>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> -	struct rte_red *red = &qe->red;
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red *red = &qe->red;
> +
> +		rte_red_mark_queue_empty(red, port->time);
> +	}
> +}
> +
> +static inline void
> +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
> +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> +	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> +	struct rte_pie *pie = &qe->pie;
> 
> -	rte_red_mark_queue_empty(red, port->time);
> +	/* Update queue length */
> +	pie->qlen -= 1;
> +	pie->qlen_bytes -= pkt_len;
> +
> +	rte_pie_dequeue(pie, pkt_len, time);
>  }
> 
>  #else
> 
> -static inline int rte_sched_port_red_drop(struct rte_sched_port *port
> __rte_unused,
> +static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port
> __rte_unused,
>  	struct rte_sched_subport *subport __rte_unused,
>  	struct rte_mbuf *pkt __rte_unused,
>  	uint32_t qindex __rte_unused,
> @@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct
> rte_sched_port *port __rte_unus
> 
>  #define rte_sched_port_set_queue_empty_timestamp(port, subport,
> qindex)
> 
> -#endif /* RTE_SCHED_RED */
> +#endif /* RTE_SCHED_AQM */
> 
>  #ifdef RTE_SCHED_DEBUG
> 
> @@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct
> rte_sched_port *port,
>  	qlen = q->qw - q->qr;
> 
>  	/* Drop the packet (and update drop stats) when queue is full */
> -	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex,
> qlen) ||
> +	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex,
> qlen) ||
>  		     (qlen >= qsize))) {
>  		rte_pktmbuf_free(pkt);
>  #ifdef RTE_SCHED_COLLECT_STATS
> @@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port,
>  {
>  	struct rte_sched_grinder *grinder = subport->grinder + pos;
>  	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
> +	uint32_t qindex = grinder->qindex[grinder->qpos];
>  	struct rte_mbuf *pkt = grinder->pkt;
>  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
>  	uint32_t be_tc_active;
> @@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port,
>  		(pkt_len * grinder->wrr_cost[grinder->qpos]) &
> be_tc_active;
> 
>  	if (queue->qr == queue->qw) {
> -		uint32_t qindex = grinder->qindex[grinder->qpos];
> -
>  		rte_bitmap_clear(subport->bmp, qindex);
>  		grinder->qmask &= ~(1 << grinder->qpos);
>  		if (be_tc_active)
>  			grinder->wrr_mask[grinder->qpos] = 0;
> +
>  		rte_sched_port_set_queue_empty_timestamp(port,
> subport, qindex);
>  	}
> 
> +#ifdef RTE_SCHED_AQM
> +	if (subport->aqm == RTE_SCHED_AQM_PIE)
> +		rte_sched_port_pie_dequeue(subport, qindex, pkt_len,
> port->time_cpu_cycles);
> +#endif
> +
>  	/* Reset pipe loop detection */
>  	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
>  	grinder->productive = 1;
> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> index c1a772b70c..a5fe6266cd 100644
> --- a/lib/sched/rte_sched.h
> +++ b/lib/sched/rte_sched.h
> @@ -61,9 +61,10 @@ extern "C" {
>  #include <rte_mbuf.h>
>  #include <rte_meter.h>
> 
> -/** Random Early Detection (RED) */
> -#ifdef RTE_SCHED_RED
> +/** Active Queue Management */
> +#ifdef RTE_SCHED_AQM
>  #include "rte_red.h"
> +#include "rte_pie.h"
>  #endif
> 
>  /** Maximum number of queues per pipe.
> @@ -110,6 +111,28 @@ extern "C" {
>  #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
>  #endif
> 
> +/**
> + * Active Queue Management (AQM) mode
> + *
> + * This is used for controlling the admission of packets into a packet queue
> or
> + * group of packet queues on congestion.
> + *
> + * The *Random Early Detection (RED)* algorithm works by proactively
> dropping
> + * more and more input packets as the queue occupancy builds up. When
> the queue
> + * is full or almost full, RED effectively works as *tail drop*. The *Weighted
> + * RED* algorithm uses a separate set of RED thresholds for each packet
> color.
> + *
> + * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
> + * drops a packet at the onset of the congestion and tries to control the
> + * latency around the target value. The congestion detection, however, is
> based
> + * on the queueing latency instead of the queue length like RED. For more
> + * information, refer RFC8033.
> + */
> +enum rte_sched_aqm_mode {
> +	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection
> (WRED) */
> +	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller
> Enhanced (PIE) */
> +};
> +
>  /*
>   * Pipe configuration parameters. The period and credits_per_period
>   * parameters are measured in bytes, with one byte meaning the time
> @@ -174,9 +197,17 @@ struct rte_sched_subport_params {
>  	/** Max allowed profiles in the pipe profile table */
>  	uint32_t n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -	/** RED parameters */
> -	struct rte_red_params
> red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +#ifdef RTE_SCHED_AQM
> +	/** Active Queue Management mode */
> +	enum rte_sched_aqm_mode aqm;
> +
> +	RTE_STD_C11
> +	union {
> +		/** WRED parameters */
> +		struct rte_red_params
> wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		/** PIE parameters */
> +		struct rte_pie_params
> pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
>  };
> 
> @@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
>  	/** Number of bytes dropped for each traffic class */
>  	uint64_t
> n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	/** Number of packets dropped by red */
> -	uint64_t
> n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +#ifdef RTE_SCHED_AQM
> +	/** Number of packets dropped by active queue management
> scheme */
> +	uint64_t
> n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
>  #endif
>  };
> 
> @@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
>  	/** Packets dropped */
>  	uint64_t n_pkts_dropped;
> 
> -#ifdef RTE_SCHED_RED
> -	/** Packets dropped by RED */
> -	uint64_t n_pkts_red_dropped;
> +#ifdef RTE_SCHED_AQM
> +	/** Packets dropped by active queue management scheme */
> +	uint64_t n_pkts_aqm_dropped;
>  #endif
> 
>  	/** Bytes successfully written */
> diff --git a/lib/sched/version.map b/lib/sched/version.map
> index ace284b7de..3422821ac8 100644
> --- a/lib/sched/version.map
> +++ b/lib/sched/version.map
> @@ -30,4 +30,7 @@ EXPERIMENTAL {
>  	rte_sched_subport_pipe_profile_add;
>  	# added in 20.11
>  	rte_sched_port_subport_profile_add;
> +
> +	rte_pie_rt_data_init;
> +	rte_pie_config_init;
>  };
> --
> 2.25.1

NACK

I see that none of my previous comments from the V4 review got implemented, is there any reason to silently discard all of them?

https://patches.dpdk.org/project/dpdk/patch/20210705080421.18736-2-wojciechx.liguzinski@intel.com/

I did not see any reply from you on my comments, so I assumed that you accepted and implemented most of them, but I see that none of them were picked up.

Also, I don't see any revision history, just the version counter gets incremented, so reviewing a new version of your patch requires re-reading every line of code, which is time consuming. Could you please add a revision history?

Thanks,
Cristian
  
Liguzinski, WojciechX Oct. 12, 2021, 6:34 p.m. UTC | #2
Hi Cristian,

-----Original Message-----
From: Dumitrescu, Cristian <cristian.dumitrescu@intel.com> 
Sent: Tuesday, October 12, 2021 6:00 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>; dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>
Cc: Ajmera, Megha <megha.ajmera@intel.com>
Subject: RE: [PATCH v9 1/5] sched: add PIE based congestion management

Hi Wojchech,

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, October 11, 2021 8:56 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; 
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v9 1/5] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 228 +++++++++----
>  lib/sched/rte_sched.h                    |  53 ++-
>  lib/sched/version.map                    |   3 +
>  7 files changed, 685 insertions(+), 90 deletions(-)  create mode 
> 100644 lib/sched/rte_pie.c  create mode 100644 lib/sched/rte_pie.h
> 
> diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c
> b/drivers/net/softnic/rte_eth_softnic_tm.c
> index 90baba15ce..5b6c4e6d4b 100644
> --- a/drivers/net/softnic/rte_eth_softnic_tm.c
> +++ b/drivers/net/softnic/rte_eth_softnic_tm.c
> @@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
>  	return 0;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
>  #define WRED_SUPPORTED						1
>  #else
>  #define WRED_SUPPORTED						0
> @@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, 
> uint32_t tc_id)
>  	return NULL;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static void
>  wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id) @@ 
> -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t 
> subport_id)
>  	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE;
> tc_id++)
>  		for (color = RTE_COLOR_GREEN; color < RTE_COLORS;
> color++) {
>  			struct rte_red_params *dst =
> -				&pp->red_params[tc_id][color];
> +				&pp->wred_params[tc_id][color];
>  			struct tm_wred_profile *src_wp =
>  				tm_tc_wred_profile_get(dev, tc_id);
>  			struct rte_tm_red_params *src =
> diff --git a/lib/sched/meson.build b/lib/sched/meson.build index 
> b24f7b8775..e7ae9bcf19 100644
> --- a/lib/sched/meson.build
> +++ b/lib/sched/meson.build
> @@ -1,11 +1,7 @@
>  # SPDX-License-Identifier: BSD-3-Clause  # Copyright(c) 2017 Intel 
> Corporation
> 
> -sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c') -headers 
> = files(
> -        'rte_approx.h',
> -        'rte_red.h',
> -        'rte_sched.h',
> -        'rte_sched_common.h',
> -)
> +sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 
> +'rte_pie.c') headers = files('rte_sched.h', 'rte_sched_common.h',
> +		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
>  deps += ['mbuf', 'meter']
> diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c new file mode 
> 100644 index 0000000000..2fcecb2db4
> --- /dev/null
> +++ b/lib/sched/rte_pie.c
> @@ -0,0 +1,82 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation  */
> +
> +#include <stdlib.h>
> +
> +#include "rte_pie.h"
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_malloc.h>
> +
> +#ifdef __INTEL_COMPILER
> +#pragma warning(disable:2259) /* conversion may lose significant bits 
> +*/ #endif
> +
> +void
> +rte_pie_rt_data_init(struct rte_pie *pie) {
> +	if (pie == NULL) {
> +		/* Allocate memory to use the PIE data structure */
> +		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
> +
> +		if (pie == NULL)
> +			RTE_LOG(ERR, SCHED, "%s: Memory allocation
> fails\n", __func__);
> +	}
> +
> +	pie->active = 0;
> +	pie->in_measurement = 0;
> +	pie->departed_bytes_count = 0;
> +	pie->start_measurement = 0;
> +	pie->last_measurement = 0;
> +	pie->qlen = 0;
> +	pie->avg_dq_time = 0;
> +	pie->burst_allowance = 0;
> +	pie->qdelay_old = 0;
> +	pie->drop_prob = 0;
> +	pie->accu_prob = 0;
> +}
> +
> +int
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th)
> +{
> +	uint64_t tsc_hz = rte_get_tsc_hz();
> +
> +	if (pie_cfg == NULL)
> +		return -1;
> +
> +	if (qdelay_ref <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for qdelay_ref\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (dp_update_interval <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for dp_update_interval\n",
> __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (max_burst <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for max_burst\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (tailq_th <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for tailq_th\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
> +	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) /
> 1000;
> +	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
> +	pie_cfg->tailq_th = tailq_th;
> +
> +	return 0;
> +}
> diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h new file mode 
> 100644 index 0000000000..f83c95664f
> --- /dev/null
> +++ b/lib/sched/rte_pie.h
> @@ -0,0 +1,393 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation  */
> +
> +#ifndef __RTE_PIE_H_INCLUDED__
> +#define __RTE_PIE_H_INCLUDED__
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * @file
> + * RTE Proportional Integral controller Enhanced (PIE)
> + *
> + *
> + ***/
> +
> +#include <stdint.h>
> +
> +#include <rte_random.h>
> +#include <rte_debug.h>
> +
> +#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
> +				     * to start measurement cycle (bytes)
> +				     */
> +#define RTE_DQ_WEIGHT      0.25    /**< Weight
> (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
> +#define RTE_ALPHA          0.125   /**< Weights in drop probability
> calculations */
> +#define RTE_BETA           1.25    /**< Weights in drop probability calculations
> */
> +#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number
> */
> +
> +
> +/**
> + * PIE configuration parameters passed by user
> + *
> + */
> +struct rte_pie_params {
> +	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
> +	uint16_t dp_update_interval;   /**< Update interval for drop
> probability (milliseconds) */
> +	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * PIE configuration parameters
> + *
> + */
> +struct rte_pie_config {
> +	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
> +	uint64_t dp_update_interval;   /**< Update interval for drop
> probability (in CPU cycles) */
> +	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * RED run-time data
> + */
> +struct rte_pie {
> +	uint16_t active;               /**< Flag for activating/deactivating pie */
> +	uint16_t in_measurement;       /**< Flag for activation of
> measurement cycle */
> +	uint32_t departed_bytes_count; /**< Number of bytes departed in
> current measurement cycle */
> +	uint64_t start_measurement;    /**< Time to start to measurement
> cycle (in cpu cycles) */
> +	uint64_t last_measurement;     /**< Time of last measurement (in
> cpu cycles) */
> +	uint64_t qlen;                 /**< Queue length (packets count) */
> +	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
> +	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in
> cpu cycles) */
> +	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
> +	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
> +	double drop_prob;              /**< Current packet drop probability */
> +	double accu_prob;              /**< Accumulated packet drop probability
> */
> +};
> +
> +/**
> + * @brief Initialises run-time data
> + *
> + * @param pie [in,out] data pointer to PIE runtime data  */ void 
> +__rte_experimental rte_pie_rt_data_init(struct rte_pie *pie);
> +
> +/**
> + * @brief Configures a single PIE configuration parameter structure.
> + *
> + * @param pie_cfg [in,out] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param qdelay_ref [in]  latency target(milliseconds)
> + * @param dp_update_interval [in] update interval for drop 
> + probability
> (milliseconds)
> + * @param max_burst [in] maximum burst allowance (milliseconds)
> + * @param tailq_th [in] tail drop threshold for the queue (number of
> packets)
> + *
> + * @return Operation status
> + * @retval 0 success
> + * @retval !0 error
> + */
> +int
> +__rte_experimental
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th);
> +
> +/**
> + * @brief Decides packet enqueue when queue is empty
> + *
> + * Note: packet is never dropped in this particular case.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval !0 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len)
> +{
> +	RTE_ASSERT(pkt_len != NULL);
> +
> +	/* Update the PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/**
> +	 * If the queue has been idle for a while, turn off PIE and Reset
> counters
> +	 */
> +	if ((pie->active == 1) &&
> +		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param time [in] current time (measured in cpu cycles)  */ static 
> +inline void __rte_experimental _calc_drop_probability(const struct 
> +rte_pie_config *pie_cfg,
> +	struct rte_pie *pie, uint64_t time)
> +{
> +	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
> +
> +	/* Note: can be implemented using integer multiply.
> +	 * DQ_THRESHOLD is power of 2 value.
> +	 */
> +	double current_qdelay = pie->qlen * (pie->avg_dq_time /
> RTE_DQ_THRESHOLD);
> +
> +	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
> +		RTE_BETA * (current_qdelay - pie->qdelay_old);
> +
> +	if (pie->drop_prob < 0.000001)
> +		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
> +	else if (pie->drop_prob < 0.00001)
> +		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
> +	else if (pie->drop_prob < 0.0001)
> +		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
> +	else if (pie->drop_prob < 0.001)
> +		p = p * 0.03125;                    /* (1/32) = 0.03125   */
> +	else if (pie->drop_prob < 0.01)
> +		p = p * 0.125;                      /* (1/8) = 0.125    */
> +	else if (pie->drop_prob < 0.1)
> +		p = p * 0.5;                        /* (1/2) = 0.5    */
> +
> +	if (pie->drop_prob >= 0.1 && p > 0.02)
> +		p = 0.02;
> +
> +	pie->drop_prob += p;
> +
> +	double qdelay = qdelay_ref * 0.5;
> +
> +	/*  Exponentially decay drop prob when congestion goes away  */
> +	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
> +		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
> +
> +	/* Bound drop probability */
> +	if (pie->drop_prob < 0)
> +		pie->drop_prob = 0;
> +	if (pie->drop_prob > 1)
> +		pie->drop_prob = 1;
> +
> +	pie->qdelay_old = current_qdelay;
> +	pie->last_measurement = time;
> +
> +	uint64_t burst_allowance = pie->burst_allowance - pie_cfg-
> >dp_update_interval;
> +
> +	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0; 
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + *
> + * @return operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +_rte_pie_drop(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie)
> +{
> +	uint64_t rand_value;
> +	double qdelay = pie_cfg->qdelay_ref * 0.5;
> +
> +	/* PIE is active but the queue is not congested: return 0 */
> +	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
> +		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
> +		return 0;
> +
> +	if (pie->drop_prob == 0)
> +		pie->accu_prob = 0;
> +
> +	/* For practical reasons, drop probability can be further scaled
> according
> +	 * to packet size, but one needs to set a bound to avoid unnecessary
> bias
> +	 * Random drop
> +	 */
> +	pie->accu_prob += pie->drop_prob;
> +
> +	if (pie->accu_prob < 0.85)
> +		return 0;
> +
> +	if (pie->accu_prob >= 8.5)
> +		return 1;
> +
> +	rand_value = rte_rand()/RTE_RAND_MAX;
> +
> +	if ((double)rand_value < pie->drop_prob) {
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped for non-
> empty queue
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> + parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on max threshold criterion
> + * @retval 2 drop the packet based on mark probability criterion  */ 
> +static inline int __rte_experimental rte_pie_enqueue_nonempty(const 
> +struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	/* Check queue space against the tail drop threshold */
> +	if (pie->qlen >= pie_cfg->tailq_th) {
> +
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	if (pie->active) {
> +		/* Update drop probability after certain interval */
> +		if ((time - pie->last_measurement) >= pie_cfg-
> >dp_update_interval)
> +			_calc_drop_probability(pie_cfg, pie, time);
> +
> +		/* Decide whether packet to be dropped or enqueued */
> +		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance ==
> 0)
> +			return 2;
> +	}
> +
> +	/* When queue occupancy is over a certain threshold, turn on PIE */
> +	if ((pie->active == 0) &&
> +		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
> +		pie->active = 1;
> +		pie->qdelay_old = 0;
> +		pie->drop_prob = 0;
> +		pie->in_measurement = 1;
> +		pie->departed_bytes_count = 0;
> +		pie->avg_dq_time = 0;
> +		pie->last_measurement = time;
> +		pie->burst_allowance = pie_cfg->max_burst;
> +		pie->accu_prob = 0;
> +		pie->start_measurement = time;
> +	}
> +
> +	/* when queue has been idle for a while, turn off PIE and Reset
> counters */
> +	if (pie->active == 1 &&
> +		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	/* Update PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped
> + * Updates run time data and gives verdict whether to enqueue or drop 
> +the
> packet.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> + parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param qlen [in] queue length
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on drop probility criteria  */ 
> +static inline int __rte_experimental rte_pie_enqueue(const struct 
> +rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	const unsigned int qlen,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	RTE_ASSERT(pie_cfg != NULL);
> +	RTE_ASSERT(pie != NULL);
> +
> +	if (qlen != 0)
> +		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len,
> time);
> +	else
> +		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len); }
> +
> +/**
> + * @brief PIE rate estimation method
> + * Called on each packet departure.
> + *
> + * @param pie [in] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp in cpu cycles  */ static 
> +inline void __rte_experimental rte_pie_dequeue(struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	uint64_t time)
> +{
> +	/* Dequeue rate estimation */
> +	if (pie->in_measurement) {
> +		pie->departed_bytes_count += pkt_len;
> +
> +		/* Start a new measurement cycle when enough packets */
> +		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
> +			uint64_t dq_time = time - pie->start_measurement;
> +
> +			if (pie->avg_dq_time == 0)
> +				pie->avg_dq_time = dq_time;
> +			else
> +				pie->avg_dq_time = dq_time *
> RTE_DQ_WEIGHT + pie->avg_dq_time
> +					* (1 - RTE_DQ_WEIGHT);
> +
> +			pie->in_measurement = 0;
> +		}
> +	}
> +
> +	/* Start measurement cycle when enough data in the queue */
> +	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie-
> >in_measurement == 0)) {
> +		pie->in_measurement = 1;
> +		pie->start_measurement = time;
> +		pie->departed_bytes_count = 0;
> +	}
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* __RTE_PIE_H_INCLUDED__ */
> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c index 
> a858f61f95..320435ed91 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c
> @@ -89,8 +89,12 @@ struct rte_sched_queue {
> 
>  struct rte_sched_queue_extra {
>  	struct rte_sched_queue_stats stats;
> -#ifdef RTE_SCHED_RED
> -	struct rte_red red;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red red;
> +		struct rte_pie pie;
> +	};
>  #endif
>  };
> 
> @@ -183,8 +187,13 @@ struct rte_sched_subport {
>  	/* Pipe queues size */
>  	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +	enum rte_sched_aqm_mode aqm;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red_config
> wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		struct rte_pie_config
> pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
> 
>  	/* Scheduling loop detection */
> @@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port 
> *port, uint32_t n_subports)
>  	rte_free(port);
>  }
> 
> +#ifdef RTE_SCHED_AQM
> +
> +static int
> +rte_sched_red_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +
> +		uint32_t j;
> +
> +		for (j = 0; j < RTE_COLORS; j++) {
> +			/* if min/max are both zero, then RED is disabled */
> +			if ((params->wred_params[i][j].min_th |
> +				 params->wred_params[i][j].max_th) == 0) {
> +				continue;
> +			}
> +
> +			if (rte_red_config_init(&s->wred_config[i][j],
> +				params->wred_params[i][j].wq_log2,
> +				params->wred_params[i][j].min_th,
> +				params->wred_params[i][j].max_th,
> +				params->wred_params[i][j].maxp_inv) != 0) {
> +				rte_sched_free_memory(port, n_subports);
> +
> +				RTE_LOG(NOTICE, SCHED,
> +				"%s: RED configuration init fails\n",
> __func__);
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +	s->aqm = RTE_SCHED_AQM_WRED;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_pie_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE tailq threshold incorrect\n", __func__);
> +			return -EINVAL;
> +		}
> +
> +		if (rte_pie_config_init(&s->pie_config[i],
> +			params->pie_params[i].qdelay_ref,
> +			params->pie_params[i].dp_update_interval,
> +			params->pie_params[i].max_burst,
> +			params->pie_params[i].tailq_th) != 0) {
> +			rte_sched_free_memory(port, n_subports);
> +
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE configuration init fails\n", __func__);
> +			return -EINVAL;
> +			}
> +	}
> +	s->aqm = RTE_SCHED_AQM_PIE;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_aqm_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	if (params->aqm == RTE_SCHED_AQM_WRED)
> +		return rte_sched_red_config(port, s, params, n_subports);
> +
> +	else if (params->aqm == RTE_SCHED_AQM_PIE)
> +		return rte_sched_pie_config(port, s, params, n_subports);
> +
> +	return -EINVAL;
> +}
> +#endif
> +
>  int
>  rte_sched_subport_config(struct rte_sched_port *port,
>  	uint32_t subport_id,
> @@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port 
> *port,
>  		s->n_pipe_profiles = params->n_pipe_profiles;
>  		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -			uint32_t j;
> -
> -			for (j = 0; j < RTE_COLORS; j++) {
> -			/* if min/max are both zero, then RED is disabled */
> -				if ((params->red_params[i][j].min_th |
> -				     params->red_params[i][j].max_th) == 0) {
> -					continue;
> -				}
> -
> -				if (rte_red_config_init(&s->red_config[i][j],
> -				    params->red_params[i][j].wq_log2,
> -				    params->red_params[i][j].min_th,
> -				    params->red_params[i][j].max_th,
> -				    params->red_params[i][j].maxp_inv) != 0)
> {
> -					RTE_LOG(NOTICE, SCHED,
> -					"%s: RED configuration init fails\n",
> -					__func__);
> -					ret = -EINVAL;
> -					goto out;
> -				}
> -			}
> +#ifdef RTE_SCHED_AQM
> +		status = rte_sched_aqm_config(port, s, params,
> n_subports);
> +		if (status) {
> +			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration
> fails\n", __func__);
> +			return status;
>  		}
>  #endif
> 
> @@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct
> rte_sched_port *port,
>  	subport->stats.n_bytes_tc[tc_index] += pkt_len;  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> *port,
> -	struct rte_sched_subport *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
>  	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len; -#ifdef 
> RTE_SCHED_RED
> -	subport->stats.n_pkts_red_dropped[tc_index] += red;
> +#ifdef RTE_SCHED_AQM
> +	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
>  #endif
>  }
> 
> @@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct
> rte_sched_subport *subport,
>  	qe->stats.n_bytes += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	qe->stats.n_pkts_dropped += 1;
>  	qe->stats.n_bytes_dropped += pkt_len; -#ifdef RTE_SCHED_RED
> -	qe->stats.n_pkts_red_dropped += red;
> +#ifdef RTE_SCHED_AQM
> +	qe->stats.n_pkts_aqm_dropped += drops;
>  #endif
>  }
> 
>  #endif /* RTE_SCHED_COLLECT_STATS */
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static inline int
> -rte_sched_port_red_drop(struct rte_sched_port *port,
> +rte_sched_port_aqm_drop(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport,
>  	struct rte_mbuf *pkt,
>  	uint32_t qindex,
>  	uint16_t qlen)
>  {
>  	struct rte_sched_queue_extra *qe;
> -	struct rte_red_config *red_cfg;
> -	struct rte_red *red;
>  	uint32_t tc_index;
> -	enum rte_color color;
> 
>  	tc_index = rte_sched_port_pipe_tc(port, qindex);
> -	color = rte_sched_port_pkt_read_color(pkt);
> -	red_cfg = &subport->red_config[tc_index][color];
> +	qe = subport->queue_extra + qindex;
> 
> -	if ((red_cfg->min_th | red_cfg->max_th) == 0)
> -		return 0;
> +	/* WRED */
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red_config *red_cfg;
> +		struct rte_red *red;
> +		enum rte_color color;
> 
> -	qe = subport->queue_extra + qindex;
> -	red = &qe->red;
> +		color = rte_sched_port_pkt_read_color(pkt);
> +		red_cfg = &subport->wred_config[tc_index][color];
> +
> +		if ((red_cfg->min_th | red_cfg->max_th) == 0)
> +			return 0;
> 
> -	return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +		red = &qe->red;
> +
> +		return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +	}
> +
> +	/* PIE */
> +	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
> +	struct rte_pie *pie = &qe->pie;
> +
> +	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port-
> >time_cpu_cycles);
>  }
> 
>  static inline void
> @@ -1815,14 +1885,29 @@
> rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport, uint32_t qindex)  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
> -	struct rte_red *red = &qe->red;
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red *red = &qe->red;
> +
> +		rte_red_mark_queue_empty(red, port->time);
> +	}
> +}
> +
> +static inline void
> +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport, 
> +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> +	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> +	struct rte_pie *pie = &qe->pie;
> 
> -	rte_red_mark_queue_empty(red, port->time);
> +	/* Update queue length */
> +	pie->qlen -= 1;
> +	pie->qlen_bytes -= pkt_len;
> +
> +	rte_pie_dequeue(pie, pkt_len, time);
>  }
> 
>  #else
> 
> -static inline int rte_sched_port_red_drop(struct rte_sched_port *port 
> __rte_unused,
> +static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port
> __rte_unused,
>  	struct rte_sched_subport *subport __rte_unused,
>  	struct rte_mbuf *pkt __rte_unused,
>  	uint32_t qindex __rte_unused,
> @@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct 
> rte_sched_port *port __rte_unus
> 
>  #define rte_sched_port_set_queue_empty_timestamp(port, subport,
> qindex)
> 
> -#endif /* RTE_SCHED_RED */
> +#endif /* RTE_SCHED_AQM */
> 
>  #ifdef RTE_SCHED_DEBUG
> 
> @@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port 
> *port,
>  	qlen = q->qw - q->qr;
> 
>  	/* Drop the packet (and update drop stats) when queue is full */
> -	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex,
> qlen) ||
> +	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex,
> qlen) ||
>  		     (qlen >= qsize))) {
>  		rte_pktmbuf_free(pkt);
>  #ifdef RTE_SCHED_COLLECT_STATS
> @@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port,  {
>  	struct rte_sched_grinder *grinder = subport->grinder + pos;
>  	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
> +	uint32_t qindex = grinder->qindex[grinder->qpos];
>  	struct rte_mbuf *pkt = grinder->pkt;
>  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
>  	uint32_t be_tc_active;
> @@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port,
>  		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
> 
>  	if (queue->qr == queue->qw) {
> -		uint32_t qindex = grinder->qindex[grinder->qpos];
> -
>  		rte_bitmap_clear(subport->bmp, qindex);
>  		grinder->qmask &= ~(1 << grinder->qpos);
>  		if (be_tc_active)
>  			grinder->wrr_mask[grinder->qpos] = 0;
> +
>  		rte_sched_port_set_queue_empty_timestamp(port,
> subport, qindex);
>  	}
> 
> +#ifdef RTE_SCHED_AQM
> +	if (subport->aqm == RTE_SCHED_AQM_PIE)
> +		rte_sched_port_pie_dequeue(subport, qindex, pkt_len,
> port->time_cpu_cycles);
> +#endif
> +
>  	/* Reset pipe loop detection */
>  	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
>  	grinder->productive = 1;
> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h index 
> c1a772b70c..a5fe6266cd 100644
> --- a/lib/sched/rte_sched.h
> +++ b/lib/sched/rte_sched.h
> @@ -61,9 +61,10 @@ extern "C" {
>  #include <rte_mbuf.h>
>  #include <rte_meter.h>
> 
> -/** Random Early Detection (RED) */
> -#ifdef RTE_SCHED_RED
> +/** Active Queue Management */
> +#ifdef RTE_SCHED_AQM
>  #include "rte_red.h"
> +#include "rte_pie.h"
>  #endif
> 
>  /** Maximum number of queues per pipe.
> @@ -110,6 +111,28 @@ extern "C" {
>  #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
>  #endif
> 
> +/**
> + * Active Queue Management (AQM) mode
> + *
> + * This is used for controlling the admission of packets into a 
> +packet queue
> or
> + * group of packet queues on congestion.
> + *
> + * The *Random Early Detection (RED)* algorithm works by proactively
> dropping
> + * more and more input packets as the queue occupancy builds up. When
> the queue
> + * is full or almost full, RED effectively works as *tail drop*. The 
> + *Weighted
> + * RED* algorithm uses a separate set of RED thresholds for each 
> + packet
> color.
> + *
> + * Similar to RED, Proportional Integral Controller Enhanced (PIE) 
> + randomly
> + * drops a packet at the onset of the congestion and tries to control 
> + the
> + * latency around the target value. The congestion detection, 
> + however, is
> based
> + * on the queueing latency instead of the queue length like RED. For 
> +more
> + * information, refer RFC8033.
> + */
> +enum rte_sched_aqm_mode {
> +	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection
> (WRED) */
> +	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller
> Enhanced (PIE) */
> +};
> +
>  /*
>   * Pipe configuration parameters. The period and credits_per_period
>   * parameters are measured in bytes, with one byte meaning the time 
> @@ -174,9 +197,17 @@ struct rte_sched_subport_params {
>  	/** Max allowed profiles in the pipe profile table */
>  	uint32_t n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -	/** RED parameters */
> -	struct rte_red_params
> red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +#ifdef RTE_SCHED_AQM
> +	/** Active Queue Management mode */
> +	enum rte_sched_aqm_mode aqm;
> +
> +	RTE_STD_C11
> +	union {
> +		/** WRED parameters */
> +		struct rte_red_params
> wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		/** PIE parameters */
> +		struct rte_pie_params
> pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
>  };
> 
> @@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
>  	/** Number of bytes dropped for each traffic class */
>  	uint64_t
> n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	/** Number of packets dropped by red */
> -	uint64_t
> n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +#ifdef RTE_SCHED_AQM
> +	/** Number of packets dropped by active queue management
> scheme */
> +	uint64_t
> n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
>  #endif
>  };
> 
> @@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
>  	/** Packets dropped */
>  	uint64_t n_pkts_dropped;
> 
> -#ifdef RTE_SCHED_RED
> -	/** Packets dropped by RED */
> -	uint64_t n_pkts_red_dropped;
> +#ifdef RTE_SCHED_AQM
> +	/** Packets dropped by active queue management scheme */
> +	uint64_t n_pkts_aqm_dropped;
>  #endif
> 
>  	/** Bytes successfully written */
> diff --git a/lib/sched/version.map b/lib/sched/version.map index 
> ace284b7de..3422821ac8 100644
> --- a/lib/sched/version.map
> +++ b/lib/sched/version.map
> @@ -30,4 +30,7 @@ EXPERIMENTAL {
>  	rte_sched_subport_pipe_profile_add;
>  	# added in 20.11
>  	rte_sched_port_subport_profile_add;
> +
> +	rte_pie_rt_data_init;
> +	rte_pie_config_init;
>  };
> --
> 2.25.1

NACK

I see that none of my previous comments from the V4 review got implemented, is there any reason to silently discard all of them?

https://patches.dpdk.org/project/dpdk/patch/20210705080421.18736-2-wojciechx.liguzinski@intel.com/

I did not see any reply from you on my comments, so I assumed that you accepted and implemented most of them, but I see that none of them were picked up.

Also, I don't see any revision history, just the version counter gets incremented, so reviewing a new version of your patch requires re-reading every line of code, which is time consuming. Could you please add a revision history?

Thanks,
Cristian

------------------------------------

First thing - I'm very sorry that I haven't replied to your comments. It was not my intention to ignore them.
I was going through them and I was a bit confused that most of your suggestions would actually revert majority of my changes suggested by other community members, e.g. by Stephen Hemminger.
I wanted to get some opinion how to proceed but I got some additional tasks on the way, so - my fault that I was postponing that. Again - apologies.

I will go through them again and implement/respond to them as quickly as possible.

I was uploading the patches following the procedure that were presented to me (rebasing the changes so the new ones are applied to existing commits), so honestly I was not aware that I need to do a revision history when applying anther version of patches.

Let me think of a way to provide you with such revision history.

Thanks,
Wojtek
  
Liguzinski, WojciechX Oct. 14, 2021, 4:02 p.m. UTC | #3
Hi Cristian,

-----Original Message-----
From: Liguzinski, WojciechX 
Sent: Tuesday, October 12, 2021 8:34 PM
To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>
Cc: Ajmera, Megha <megha.ajmera@intel.com>; Cybura, LukaszX <LukaszX.Cybura@intel.com>; Zegota, AnnaX <annax.zegota@intel.com>
Subject: RE: [PATCH v9 1/5] sched: add PIE based congestion management

Hi Cristian,

-----Original Message-----
From: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
Sent: Tuesday, October 12, 2021 6:00 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>; dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>
Cc: Ajmera, Megha <megha.ajmera@intel.com>
Subject: RE: [PATCH v9 1/5] sched: add PIE based congestion management

Hi Wojchech,

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, October 11, 2021 8:56 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; 
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v9 1/5] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 228 +++++++++----
>  lib/sched/rte_sched.h                    |  53 ++-
>  lib/sched/version.map                    |   3 +
>  7 files changed, 685 insertions(+), 90 deletions(-)  create mode
> 100644 lib/sched/rte_pie.c  create mode 100644 lib/sched/rte_pie.h
> 
> diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c
> b/drivers/net/softnic/rte_eth_softnic_tm.c
> index 90baba15ce..5b6c4e6d4b 100644
> --- a/drivers/net/softnic/rte_eth_softnic_tm.c
> +++ b/drivers/net/softnic/rte_eth_softnic_tm.c
> @@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
>  	return 0;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
>  #define WRED_SUPPORTED						1
>  #else
>  #define WRED_SUPPORTED						0
> @@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, 
> uint32_t tc_id)
>  	return NULL;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static void
>  wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id) @@
> -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t
> subport_id)
>  	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE;
> tc_id++)
>  		for (color = RTE_COLOR_GREEN; color < RTE_COLORS;
> color++) {
>  			struct rte_red_params *dst =
> -				&pp->red_params[tc_id][color];
> +				&pp->wred_params[tc_id][color];
>  			struct tm_wred_profile *src_wp =
>  				tm_tc_wred_profile_get(dev, tc_id);
>  			struct rte_tm_red_params *src =
> diff --git a/lib/sched/meson.build b/lib/sched/meson.build index
> b24f7b8775..e7ae9bcf19 100644
> --- a/lib/sched/meson.build
> +++ b/lib/sched/meson.build
> @@ -1,11 +1,7 @@
>  # SPDX-License-Identifier: BSD-3-Clause  # Copyright(c) 2017 Intel 
> Corporation
> 
> -sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c') -headers 
> = files(
> -        'rte_approx.h',
> -        'rte_red.h',
> -        'rte_sched.h',
> -        'rte_sched_common.h',
> -)
> +sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c',
> +'rte_pie.c') headers = files('rte_sched.h', 'rte_sched_common.h',
> +		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
>  deps += ['mbuf', 'meter']
> diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c new file mode
> 100644 index 0000000000..2fcecb2db4
> --- /dev/null
> +++ b/lib/sched/rte_pie.c
> @@ -0,0 +1,82 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation  */
> +
> +#include <stdlib.h>
> +
> +#include "rte_pie.h"
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_malloc.h>
> +
> +#ifdef __INTEL_COMPILER
> +#pragma warning(disable:2259) /* conversion may lose significant bits 
> +*/ #endif
> +
> +void
> +rte_pie_rt_data_init(struct rte_pie *pie) {
> +	if (pie == NULL) {
> +		/* Allocate memory to use the PIE data structure */
> +		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
> +
> +		if (pie == NULL)
> +			RTE_LOG(ERR, SCHED, "%s: Memory allocation
> fails\n", __func__);
> +	}
> +
> +	pie->active = 0;
> +	pie->in_measurement = 0;
> +	pie->departed_bytes_count = 0;
> +	pie->start_measurement = 0;
> +	pie->last_measurement = 0;
> +	pie->qlen = 0;
> +	pie->avg_dq_time = 0;
> +	pie->burst_allowance = 0;
> +	pie->qdelay_old = 0;
> +	pie->drop_prob = 0;
> +	pie->accu_prob = 0;
> +}
> +
> +int
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th)
> +{
> +	uint64_t tsc_hz = rte_get_tsc_hz();
> +
> +	if (pie_cfg == NULL)
> +		return -1;
> +
> +	if (qdelay_ref <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for qdelay_ref\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (dp_update_interval <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for dp_update_interval\n",
> __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (max_burst <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for max_burst\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (tailq_th <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for tailq_th\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
> +	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) /
> 1000;
> +	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
> +	pie_cfg->tailq_th = tailq_th;
> +
> +	return 0;
> +}
> diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h new file mode
> 100644 index 0000000000..f83c95664f
> --- /dev/null
> +++ b/lib/sched/rte_pie.h
> @@ -0,0 +1,393 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation  */
> +
> +#ifndef __RTE_PIE_H_INCLUDED__
> +#define __RTE_PIE_H_INCLUDED__
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * @file
> + * RTE Proportional Integral controller Enhanced (PIE)
> + *
> + *
> + ***/
> +
> +#include <stdint.h>
> +
> +#include <rte_random.h>
> +#include <rte_debug.h>
> +
> +#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
> +				     * to start measurement cycle (bytes)
> +				     */
> +#define RTE_DQ_WEIGHT      0.25    /**< Weight
> (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
> +#define RTE_ALPHA          0.125   /**< Weights in drop probability
> calculations */
> +#define RTE_BETA           1.25    /**< Weights in drop probability calculations
> */
> +#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number
> */
> +
> +
> +/**
> + * PIE configuration parameters passed by user
> + *
> + */
> +struct rte_pie_params {
> +	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
> +	uint16_t dp_update_interval;   /**< Update interval for drop
> probability (milliseconds) */
> +	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * PIE configuration parameters
> + *
> + */
> +struct rte_pie_config {
> +	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
> +	uint64_t dp_update_interval;   /**< Update interval for drop
> probability (in CPU cycles) */
> +	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * RED run-time data
> + */
> +struct rte_pie {
> +	uint16_t active;               /**< Flag for activating/deactivating pie */
> +	uint16_t in_measurement;       /**< Flag for activation of
> measurement cycle */
> +	uint32_t departed_bytes_count; /**< Number of bytes departed in
> current measurement cycle */
> +	uint64_t start_measurement;    /**< Time to start to measurement
> cycle (in cpu cycles) */
> +	uint64_t last_measurement;     /**< Time of last measurement (in
> cpu cycles) */
> +	uint64_t qlen;                 /**< Queue length (packets count) */
> +	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
> +	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in
> cpu cycles) */
> +	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
> +	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
> +	double drop_prob;              /**< Current packet drop probability */
> +	double accu_prob;              /**< Accumulated packet drop probability
> */
> +};
> +
> +/**
> + * @brief Initialises run-time data
> + *
> + * @param pie [in,out] data pointer to PIE runtime data  */ void 
> +__rte_experimental rte_pie_rt_data_init(struct rte_pie *pie);
> +
> +/**
> + * @brief Configures a single PIE configuration parameter structure.
> + *
> + * @param pie_cfg [in,out] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param qdelay_ref [in]  latency target(milliseconds)
> + * @param dp_update_interval [in] update interval for drop 
> + probability
> (milliseconds)
> + * @param max_burst [in] maximum burst allowance (milliseconds)
> + * @param tailq_th [in] tail drop threshold for the queue (number of
> packets)
> + *
> + * @return Operation status
> + * @retval 0 success
> + * @retval !0 error
> + */
> +int
> +__rte_experimental
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th);
> +
> +/**
> + * @brief Decides packet enqueue when queue is empty
> + *
> + * Note: packet is never dropped in this particular case.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval !0 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len)
> +{
> +	RTE_ASSERT(pkt_len != NULL);
> +
> +	/* Update the PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/**
> +	 * If the queue has been idle for a while, turn off PIE and Reset
> counters
> +	 */
> +	if ((pie->active == 1) &&
> +		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param time [in] current time (measured in cpu cycles)  */ static 
> +inline void __rte_experimental _calc_drop_probability(const struct 
> +rte_pie_config *pie_cfg,
> +	struct rte_pie *pie, uint64_t time)
> +{
> +	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
> +
> +	/* Note: can be implemented using integer multiply.
> +	 * DQ_THRESHOLD is power of 2 value.
> +	 */
> +	double current_qdelay = pie->qlen * (pie->avg_dq_time /
> RTE_DQ_THRESHOLD);
> +
> +	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
> +		RTE_BETA * (current_qdelay - pie->qdelay_old);
> +
> +	if (pie->drop_prob < 0.000001)
> +		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
> +	else if (pie->drop_prob < 0.00001)
> +		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
> +	else if (pie->drop_prob < 0.0001)
> +		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
> +	else if (pie->drop_prob < 0.001)
> +		p = p * 0.03125;                    /* (1/32) = 0.03125   */
> +	else if (pie->drop_prob < 0.01)
> +		p = p * 0.125;                      /* (1/8) = 0.125    */
> +	else if (pie->drop_prob < 0.1)
> +		p = p * 0.5;                        /* (1/2) = 0.5    */
> +
> +	if (pie->drop_prob >= 0.1 && p > 0.02)
> +		p = 0.02;
> +
> +	pie->drop_prob += p;
> +
> +	double qdelay = qdelay_ref * 0.5;
> +
> +	/*  Exponentially decay drop prob when congestion goes away  */
> +	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
> +		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
> +
> +	/* Bound drop probability */
> +	if (pie->drop_prob < 0)
> +		pie->drop_prob = 0;
> +	if (pie->drop_prob > 1)
> +		pie->drop_prob = 1;
> +
> +	pie->qdelay_old = current_qdelay;
> +	pie->last_measurement = time;
> +
> +	uint64_t burst_allowance = pie->burst_allowance - pie_cfg-
> >dp_update_interval;
> +
> +	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0; 
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + *
> + * @return operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +_rte_pie_drop(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie)
> +{
> +	uint64_t rand_value;
> +	double qdelay = pie_cfg->qdelay_ref * 0.5;
> +
> +	/* PIE is active but the queue is not congested: return 0 */
> +	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
> +		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
> +		return 0;
> +
> +	if (pie->drop_prob == 0)
> +		pie->accu_prob = 0;
> +
> +	/* For practical reasons, drop probability can be further scaled
> according
> +	 * to packet size, but one needs to set a bound to avoid unnecessary
> bias
> +	 * Random drop
> +	 */
> +	pie->accu_prob += pie->drop_prob;
> +
> +	if (pie->accu_prob < 0.85)
> +		return 0;
> +
> +	if (pie->accu_prob >= 8.5)
> +		return 1;
> +
> +	rand_value = rte_rand()/RTE_RAND_MAX;
> +
> +	if ((double)rand_value < pie->drop_prob) {
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped for non-
> empty queue
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> + parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on max threshold criterion
> + * @retval 2 drop the packet based on mark probability criterion  */ 
> +static inline int __rte_experimental rte_pie_enqueue_nonempty(const 
> +struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	/* Check queue space against the tail drop threshold */
> +	if (pie->qlen >= pie_cfg->tailq_th) {
> +
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	if (pie->active) {
> +		/* Update drop probability after certain interval */
> +		if ((time - pie->last_measurement) >= pie_cfg-
> >dp_update_interval)
> +			_calc_drop_probability(pie_cfg, pie, time);
> +
> +		/* Decide whether packet to be dropped or enqueued */
> +		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance ==
> 0)
> +			return 2;
> +	}
> +
> +	/* When queue occupancy is over a certain threshold, turn on PIE */
> +	if ((pie->active == 0) &&
> +		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
> +		pie->active = 1;
> +		pie->qdelay_old = 0;
> +		pie->drop_prob = 0;
> +		pie->in_measurement = 1;
> +		pie->departed_bytes_count = 0;
> +		pie->avg_dq_time = 0;
> +		pie->last_measurement = time;
> +		pie->burst_allowance = pie_cfg->max_burst;
> +		pie->accu_prob = 0;
> +		pie->start_measurement = time;
> +	}
> +
> +	/* when queue has been idle for a while, turn off PIE and Reset
> counters */
> +	if (pie->active == 1 &&
> +		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	/* Update PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped
> + * Updates run time data and gives verdict whether to enqueue or drop 
> +the
> packet.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> + parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param qlen [in] queue length
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on drop probility criteria  */ 
> +static inline int __rte_experimental rte_pie_enqueue(const struct 
> +rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	const unsigned int qlen,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	RTE_ASSERT(pie_cfg != NULL);
> +	RTE_ASSERT(pie != NULL);
> +
> +	if (qlen != 0)
> +		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len,
> time);
> +	else
> +		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len); }
> +
> +/**
> + * @brief PIE rate estimation method
> + * Called on each packet departure.
> + *
> + * @param pie [in] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp in cpu cycles  */ static 
> +inline void __rte_experimental rte_pie_dequeue(struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	uint64_t time)
> +{
> +	/* Dequeue rate estimation */
> +	if (pie->in_measurement) {
> +		pie->departed_bytes_count += pkt_len;
> +
> +		/* Start a new measurement cycle when enough packets */
> +		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
> +			uint64_t dq_time = time - pie->start_measurement;
> +
> +			if (pie->avg_dq_time == 0)
> +				pie->avg_dq_time = dq_time;
> +			else
> +				pie->avg_dq_time = dq_time *
> RTE_DQ_WEIGHT + pie->avg_dq_time
> +					* (1 - RTE_DQ_WEIGHT);
> +
> +			pie->in_measurement = 0;
> +		}
> +	}
> +
> +	/* Start measurement cycle when enough data in the queue */
> +	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie-
> >in_measurement == 0)) {
> +		pie->in_measurement = 1;
> +		pie->start_measurement = time;
> +		pie->departed_bytes_count = 0;
> +	}
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* __RTE_PIE_H_INCLUDED__ */
> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c index
> a858f61f95..320435ed91 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c
> @@ -89,8 +89,12 @@ struct rte_sched_queue {
> 
>  struct rte_sched_queue_extra {
>  	struct rte_sched_queue_stats stats;
> -#ifdef RTE_SCHED_RED
> -	struct rte_red red;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red red;
> +		struct rte_pie pie;
> +	};
>  #endif
>  };
> 
> @@ -183,8 +187,13 @@ struct rte_sched_subport {
>  	/* Pipe queues size */
>  	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +	enum rte_sched_aqm_mode aqm;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red_config
> wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		struct rte_pie_config
> pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
> 
>  	/* Scheduling loop detection */
> @@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port 
> *port, uint32_t n_subports)
>  	rte_free(port);
>  }
> 
> +#ifdef RTE_SCHED_AQM
> +
> +static int
> +rte_sched_red_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +
> +		uint32_t j;
> +
> +		for (j = 0; j < RTE_COLORS; j++) {
> +			/* if min/max are both zero, then RED is disabled */
> +			if ((params->wred_params[i][j].min_th |
> +				 params->wred_params[i][j].max_th) == 0) {
> +				continue;
> +			}
> +
> +			if (rte_red_config_init(&s->wred_config[i][j],
> +				params->wred_params[i][j].wq_log2,
> +				params->wred_params[i][j].min_th,
> +				params->wred_params[i][j].max_th,
> +				params->wred_params[i][j].maxp_inv) != 0) {
> +				rte_sched_free_memory(port, n_subports);
> +
> +				RTE_LOG(NOTICE, SCHED,
> +				"%s: RED configuration init fails\n",
> __func__);
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +	s->aqm = RTE_SCHED_AQM_WRED;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_pie_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE tailq threshold incorrect\n", __func__);
> +			return -EINVAL;
> +		}
> +
> +		if (rte_pie_config_init(&s->pie_config[i],
> +			params->pie_params[i].qdelay_ref,
> +			params->pie_params[i].dp_update_interval,
> +			params->pie_params[i].max_burst,
> +			params->pie_params[i].tailq_th) != 0) {
> +			rte_sched_free_memory(port, n_subports);
> +
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE configuration init fails\n", __func__);
> +			return -EINVAL;
> +			}
> +	}
> +	s->aqm = RTE_SCHED_AQM_PIE;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_aqm_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	if (params->aqm == RTE_SCHED_AQM_WRED)
> +		return rte_sched_red_config(port, s, params, n_subports);
> +
> +	else if (params->aqm == RTE_SCHED_AQM_PIE)
> +		return rte_sched_pie_config(port, s, params, n_subports);
> +
> +	return -EINVAL;
> +}
> +#endif
> +
>  int
>  rte_sched_subport_config(struct rte_sched_port *port,
>  	uint32_t subport_id,
> @@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port 
> *port,
>  		s->n_pipe_profiles = params->n_pipe_profiles;
>  		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -			uint32_t j;
> -
> -			for (j = 0; j < RTE_COLORS; j++) {
> -			/* if min/max are both zero, then RED is disabled */
> -				if ((params->red_params[i][j].min_th |
> -				     params->red_params[i][j].max_th) == 0) {
> -					continue;
> -				}
> -
> -				if (rte_red_config_init(&s->red_config[i][j],
> -				    params->red_params[i][j].wq_log2,
> -				    params->red_params[i][j].min_th,
> -				    params->red_params[i][j].max_th,
> -				    params->red_params[i][j].maxp_inv) != 0)
> {
> -					RTE_LOG(NOTICE, SCHED,
> -					"%s: RED configuration init fails\n",
> -					__func__);
> -					ret = -EINVAL;
> -					goto out;
> -				}
> -			}
> +#ifdef RTE_SCHED_AQM
> +		status = rte_sched_aqm_config(port, s, params,
> n_subports);
> +		if (status) {
> +			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration
> fails\n", __func__);
> +			return status;
>  		}
>  #endif
> 
> @@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct
> rte_sched_port *port,
>  	subport->stats.n_bytes_tc[tc_index] += pkt_len;  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> *port,
> -	struct rte_sched_subport *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
>  	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len; -#ifdef 
> RTE_SCHED_RED
> -	subport->stats.n_pkts_red_dropped[tc_index] += red;
> +#ifdef RTE_SCHED_AQM
> +	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
>  #endif
>  }
> 
> @@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct
> rte_sched_subport *subport,
>  	qe->stats.n_bytes += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	qe->stats.n_pkts_dropped += 1;
>  	qe->stats.n_bytes_dropped += pkt_len; -#ifdef RTE_SCHED_RED
> -	qe->stats.n_pkts_red_dropped += red;
> +#ifdef RTE_SCHED_AQM
> +	qe->stats.n_pkts_aqm_dropped += drops;
>  #endif
>  }
> 
>  #endif /* RTE_SCHED_COLLECT_STATS */
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static inline int
> -rte_sched_port_red_drop(struct rte_sched_port *port,
> +rte_sched_port_aqm_drop(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport,
>  	struct rte_mbuf *pkt,
>  	uint32_t qindex,
>  	uint16_t qlen)
>  {
>  	struct rte_sched_queue_extra *qe;
> -	struct rte_red_config *red_cfg;
> -	struct rte_red *red;
>  	uint32_t tc_index;
> -	enum rte_color color;
> 
>  	tc_index = rte_sched_port_pipe_tc(port, qindex);
> -	color = rte_sched_port_pkt_read_color(pkt);
> -	red_cfg = &subport->red_config[tc_index][color];
> +	qe = subport->queue_extra + qindex;
> 
> -	if ((red_cfg->min_th | red_cfg->max_th) == 0)
> -		return 0;
> +	/* WRED */
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red_config *red_cfg;
> +		struct rte_red *red;
> +		enum rte_color color;
> 
> -	qe = subport->queue_extra + qindex;
> -	red = &qe->red;
> +		color = rte_sched_port_pkt_read_color(pkt);
> +		red_cfg = &subport->wred_config[tc_index][color];
> +
> +		if ((red_cfg->min_th | red_cfg->max_th) == 0)
> +			return 0;
> 
> -	return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +		red = &qe->red;
> +
> +		return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +	}
> +
> +	/* PIE */
> +	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
> +	struct rte_pie *pie = &qe->pie;
> +
> +	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port-
> >time_cpu_cycles);
>  }
> 
>  static inline void
> @@ -1815,14 +1885,29 @@
> rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport, uint32_t qindex)  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
> -	struct rte_red *red = &qe->red;
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red *red = &qe->red;
> +
> +		rte_red_mark_queue_empty(red, port->time);
> +	}
> +}
> +
> +static inline void
> +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport, 
> +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> +	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> +	struct rte_pie *pie = &qe->pie;
> 
> -	rte_red_mark_queue_empty(red, port->time);
> +	/* Update queue length */
> +	pie->qlen -= 1;
> +	pie->qlen_bytes -= pkt_len;
> +
> +	rte_pie_dequeue(pie, pkt_len, time);
>  }
> 
>  #else
> 
> -static inline int rte_sched_port_red_drop(struct rte_sched_port *port 
> __rte_unused,
> +static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port
> __rte_unused,
>  	struct rte_sched_subport *subport __rte_unused,
>  	struct rte_mbuf *pkt __rte_unused,
>  	uint32_t qindex __rte_unused,
> @@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct 
> rte_sched_port *port __rte_unus
> 
>  #define rte_sched_port_set_queue_empty_timestamp(port, subport,
> qindex)
> 
> -#endif /* RTE_SCHED_RED */
> +#endif /* RTE_SCHED_AQM */
> 
>  #ifdef RTE_SCHED_DEBUG
> 
> @@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port 
> *port,
>  	qlen = q->qw - q->qr;
> 
>  	/* Drop the packet (and update drop stats) when queue is full */
> -	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex,
> qlen) ||
> +	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex,
> qlen) ||
>  		     (qlen >= qsize))) {
>  		rte_pktmbuf_free(pkt);
>  #ifdef RTE_SCHED_COLLECT_STATS
> @@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port,  {
>  	struct rte_sched_grinder *grinder = subport->grinder + pos;
>  	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
> +	uint32_t qindex = grinder->qindex[grinder->qpos];
>  	struct rte_mbuf *pkt = grinder->pkt;
>  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
>  	uint32_t be_tc_active;
> @@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port,
>  		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
> 
>  	if (queue->qr == queue->qw) {
> -		uint32_t qindex = grinder->qindex[grinder->qpos];
> -
>  		rte_bitmap_clear(subport->bmp, qindex);
>  		grinder->qmask &= ~(1 << grinder->qpos);
>  		if (be_tc_active)
>  			grinder->wrr_mask[grinder->qpos] = 0;
> +
>  		rte_sched_port_set_queue_empty_timestamp(port,
> subport, qindex);
>  	}
> 
> +#ifdef RTE_SCHED_AQM
> +	if (subport->aqm == RTE_SCHED_AQM_PIE)
> +		rte_sched_port_pie_dequeue(subport, qindex, pkt_len,
> port->time_cpu_cycles);
> +#endif
> +
>  	/* Reset pipe loop detection */
>  	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
>  	grinder->productive = 1;
> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h index 
> c1a772b70c..a5fe6266cd 100644
> --- a/lib/sched/rte_sched.h
> +++ b/lib/sched/rte_sched.h
> @@ -61,9 +61,10 @@ extern "C" {
>  #include <rte_mbuf.h>
>  #include <rte_meter.h>
> 
> -/** Random Early Detection (RED) */
> -#ifdef RTE_SCHED_RED
> +/** Active Queue Management */
> +#ifdef RTE_SCHED_AQM
>  #include "rte_red.h"
> +#include "rte_pie.h"
>  #endif
> 
>  /** Maximum number of queues per pipe.
> @@ -110,6 +111,28 @@ extern "C" {
>  #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
>  #endif
> 
> +/**
> + * Active Queue Management (AQM) mode
> + *
> + * This is used for controlling the admission of packets into a 
> +packet queue
> or
> + * group of packet queues on congestion.
> + *
> + * The *Random Early Detection (RED)* algorithm works by proactively
> dropping
> + * more and more input packets as the queue occupancy builds up. When
> the queue
> + * is full or almost full, RED effectively works as *tail drop*. The 
> + *Weighted
> + * RED* algorithm uses a separate set of RED thresholds for each 
> + packet
> color.
> + *
> + * Similar to RED, Proportional Integral Controller Enhanced (PIE) 
> + randomly
> + * drops a packet at the onset of the congestion and tries to control 
> + the
> + * latency around the target value. The congestion detection, 
> + however, is
> based
> + * on the queueing latency instead of the queue length like RED. For 
> +more
> + * information, refer RFC8033.
> + */
> +enum rte_sched_aqm_mode {
> +	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection
> (WRED) */
> +	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller
> Enhanced (PIE) */
> +};
> +
>  /*
>   * Pipe configuration parameters. The period and credits_per_period
>   * parameters are measured in bytes, with one byte meaning the time 
> @@ -174,9 +197,17 @@ struct rte_sched_subport_params {
>  	/** Max allowed profiles in the pipe profile table */
>  	uint32_t n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -	/** RED parameters */
> -	struct rte_red_params
> red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +#ifdef RTE_SCHED_AQM
> +	/** Active Queue Management mode */
> +	enum rte_sched_aqm_mode aqm;
> +
> +	RTE_STD_C11
> +	union {
> +		/** WRED parameters */
> +		struct rte_red_params
> wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		/** PIE parameters */
> +		struct rte_pie_params
> pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
>  };
> 
> @@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
>  	/** Number of bytes dropped for each traffic class */
>  	uint64_t
> n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	/** Number of packets dropped by red */
> -	uint64_t
> n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +#ifdef RTE_SCHED_AQM
> +	/** Number of packets dropped by active queue management
> scheme */
> +	uint64_t
> n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
>  #endif
>  };
> 
> @@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
>  	/** Packets dropped */
>  	uint64_t n_pkts_dropped;
> 
> -#ifdef RTE_SCHED_RED
> -	/** Packets dropped by RED */
> -	uint64_t n_pkts_red_dropped;
> +#ifdef RTE_SCHED_AQM
> +	/** Packets dropped by active queue management scheme */
> +	uint64_t n_pkts_aqm_dropped;
>  #endif
> 
>  	/** Bytes successfully written */
> diff --git a/lib/sched/version.map b/lib/sched/version.map index
> ace284b7de..3422821ac8 100644
> --- a/lib/sched/version.map
> +++ b/lib/sched/version.map
> @@ -30,4 +30,7 @@ EXPERIMENTAL {
>  	rte_sched_subport_pipe_profile_add;
>  	# added in 20.11
>  	rte_sched_port_subport_profile_add;
> +
> +	rte_pie_rt_data_init;
> +	rte_pie_config_init;
>  };
> --
> 2.25.1

NACK

I see that none of my previous comments from the V4 review got implemented, is there any reason to silently discard all of them?

https://patches.dpdk.org/project/dpdk/patch/20210705080421.18736-2-wojciechx.liguzinski@intel.com/

I did not see any reply from you on my comments, so I assumed that you accepted and implemented most of them, but I see that none of them were picked up.

Also, I don't see any revision history, just the version counter gets incremented, so reviewing a new version of your patch requires re-reading every line of code, which is time consuming. Could you please add a revision history?

Thanks,
Cristian

------------------------------------

First thing - I'm very sorry that I haven't replied to your comments. It was not my intention to ignore them.
I was going through them and I was a bit confused that most of your suggestions would actually revert majority of my changes suggested by other community members, e.g. by Stephen Hemminger.
I wanted to get some opinion how to proceed but I got some additional tasks on the way, so - my fault that I was postponing that. Again - apologies.

I will go through them again and implement/respond to them as quickly as possible.

I was uploading the patches following the procedure that were presented to me (rebasing the changes so the new ones are applied to existing commits), so honestly I was not aware that I need to do a revision history when applying anther version of patches.

Let me think of a way to provide you with such revision history.

Thanks,
Wojtek

----------------------


Thanks for your review comments.

Replying to them:
- The majority of them I have implemented/added to the code,
- Adding this one else statement for the "declaring new variables in the middle of the function" had to be rejected as it was in conflict with chechpatch and resulted in a warning,
- Another thing that I needed to omit is using this generic struct rte_sched_cman_params in struct rte_sched_subport (rte_sched.c) as config structures for red an pie are used there, and I thought that with limited time I don't want to risk with the implementation,
- Introducing rte_sched_cman_params structure changed a bit the code, but I hope that I didn't break anything
- Some changes (like in cfg_file.c & cfg_file.h) where the result of previous point + exceeding the line length

So now, I'd like to ask you for a review.
Unfortunately, I haven't found a neat way to show you the revision history, but please have a look here: https://patchwork.dpdk.org/project/dpdk/list/?submitter=2195&state=%2A&archive=both


Thanks,
Wojtek
  

Patch

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..5b6c4e6d4b 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@  pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@  tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@  wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->wred_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@ 
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..320435ed91 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@  struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@  struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_aqm_mode aqm;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red_config wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@  rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_AQM
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->wred_params[i][j].min_th |
+				 params->wred_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->wred_config[i][j],
+				params->wred_params[i][j].wq_log2,
+				params->wred_params[i][j].min_th,
+				params->wred_params[i][j].max_th,
+				params->wred_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->aqm = RTE_SCHED_AQM_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->aqm = RTE_SCHED_AQM_PIE;
+	return 0;
+}
+
+static int
+rte_sched_aqm_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->aqm == RTE_SCHED_AQM_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->aqm == RTE_SCHED_AQM_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1261,11 @@  rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_AQM
+		status = rte_sched_aqm_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,29 +1794,20 @@  rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_AQM
+	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
 #endif
 }
 
@@ -1756,58 +1823,61 @@  rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_AQM
+	qe->stats.n_pkts_aqm_dropped += drops;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->wred_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1815,14 +1885,29 @@  rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red *red = &qe->red;
+
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
 
-	rte_red_mark_queue_empty(red, port->time);
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue(pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1833,7 +1918,7 @@  static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2014,7 @@  rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2487,7 @@  grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2507,19 @@  grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_AQM
+	if (subport->aqm == RTE_SCHED_AQM_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..a5fe6266cd 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@  extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Active Queue Management */
+#ifdef RTE_SCHED_AQM
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@  extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Active Queue Management (AQM) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_aqm_mode {
+	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@  struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	/** Active Queue Management mode */
+	enum rte_sched_aqm_mode aqm;
+
+	RTE_STD_C11
+	union {
+		/** WRED parameters */
+		struct rte_red_params wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@  struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_AQM
+	/** Number of packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@  struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_AQM
+	/** Packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped;
 #endif
 
 	/** Bytes successfully written */
diff --git a/lib/sched/version.map b/lib/sched/version.map
index ace284b7de..3422821ac8 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@  EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };