[v4,02/11] sched: add config flexibility to tc queue sizes

Message ID 20190712095729.159767-3-jasvinder.singh@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Cristian Dumitrescu
Headers
Series sched: feature enhancements |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Jasvinder Singh July 12, 2019, 9:57 a.m. UTC
  Add support for zero queue sizes of the traffic classes. The queues
which are not used can be set to zero size. This helps in reducing
memory footprint of the hierarchical scheduler.

Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>
Signed-off-by: Abraham Tovar <abrahamx.tovar@intel.com>
Signed-off-by: Lukasz Krakowiak <lukaszx.krakowiak@intel.com>
---
 lib/librte_sched/rte_sched.c | 354 +++++++++++++++++++----------------
 lib/librte_sched/rte_sched.h |  12 +-
 2 files changed, 198 insertions(+), 168 deletions(-)
  

Comments

Cristian Dumitrescu July 16, 2019, 12:37 a.m. UTC | #1
> -----Original Message-----
> From: Singh, Jasvinder
> Sent: Friday, July 12, 2019 11:57 AM
> To: dev@dpdk.org
> Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Tovar, AbrahamX
> <abrahamx.tovar@intel.com>; Krakowiak, LukaszX
> <lukaszx.krakowiak@intel.com>
> Subject: [PATCH v4 02/11] sched: add config flexibility to tc queue sizes
> 
> Add support for zero queue sizes of the traffic classes. The queues
> which are not used can be set to zero size. This helps in reducing
> memory footprint of the hierarchical scheduler.
> 
> Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>
> Signed-off-by: Abraham Tovar <abrahamx.tovar@intel.com>
> Signed-off-by: Lukasz Krakowiak <lukaszx.krakowiak@intel.com>
> ---
>  lib/librte_sched/rte_sched.c | 354 +++++++++++++++++++----------------
>  lib/librte_sched/rte_sched.h |  12 +-
>  2 files changed, 198 insertions(+), 168 deletions(-)
> 
> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
> index b1f521794..411924c36 100644
> --- a/lib/librte_sched/rte_sched.c
> +++ b/lib/librte_sched/rte_sched.c
> @@ -149,15 +149,15 @@ struct rte_sched_grinder {
>  	struct rte_sched_pipe_profile *pipe_params;
> 
>  	/* TC cache */
> -	uint8_t tccache_qmask[4];
> -	uint32_t tccache_qindex[4];
> +	uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
>  	uint32_t tccache_w;
>  	uint32_t tccache_r;
> 
>  	/* Current TC */
>  	uint32_t tc_index;
> -	struct rte_sched_queue
> *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> -	struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	struct rte_sched_queue
> *queue[RTE_SCHED_MAX_QUEUES_PER_TC];
> +	struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC];
>  	uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
>  	uint16_t qsize[RTE_SCHED_MAX_QUEUES_PER_TC];
>  	uint32_t qmask;
> @@ -178,7 +178,7 @@ struct rte_sched_port {
>  	uint32_t rate;
>  	uint32_t mtu;
>  	uint32_t frame_overhead;
> -	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	uint16_t qsize[RTE_SCHED_QUEUES_PER_PIPE];
>  	uint32_t n_pipe_profiles;
>  	uint32_t pipe_tc3_rate_max;
>  #ifdef RTE_SCHED_RED
> @@ -260,14 +260,13 @@ rte_sched_port_qbase(struct rte_sched_port
> *port, uint32_t qindex)
>  static inline uint16_t
>  rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex)
>  {
> -	uint32_t tc = (qindex >> 2) & 0x3;
> -
> -	return port->qsize[tc];
> +	uint32_t qpos = qindex & 0xF;
> +	return port->qsize[qpos];
>  }
> 
>  static int
>  pipe_profile_check(struct rte_sched_pipe_params *params,
> -	uint32_t rate)
> +	uint32_t rate, uint16_t *qsize)
>  {
>  	uint32_t i;
> 
> @@ -285,12 +284,17 @@ pipe_profile_check(struct rte_sched_pipe_params
> *params,
>  		return -12;
> 
>  	/* TC rate: non-zero, less than pipe rate */
> -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -		if (params->tc_rate[i] == 0 ||
> -			params->tc_rate[i] > params->tb_rate)
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
> +		if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
> +			(qsize[i] != 0 && (params->tc_rate[i] == 0 ||
> +			params->tc_rate[i] > params->tb_rate)))
>  			return -13;
> +
>  	}
> 
> +	if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0)
> +		return -13;

You forgot to check that pipe BE TC rate is not bigger than the pipe rate, which is a check that is enforced for the other higher priority traffic classes.

> +
>  	/* TC period: non-zero */
>  	if (params->tc_period == 0)
>  		return -14;
> @@ -302,8 +306,10 @@ pipe_profile_check(struct rte_sched_pipe_params
> *params,
>  #endif
> 
>  	/* Queue WRR weights: non-zero */
> -	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
> -		if (params->wrr_weights[i] == 0)
> +	for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
> +		uint32_t qindex = RTE_SCHED_TRAFFIC_CLASS_BE + i;
> +		if ((qsize[qindex] != 0 && params->wrr_weights[i] == 0) ||
> +			(qsize[qindex] == 0 && params->wrr_weights[i] !=
> 0))
>  			return -16;
>  	}
>  	return 0;
> @@ -343,10 +349,10 @@ rte_sched_port_check_params(struct
> rte_sched_port_params *params)
>  	/* qsize: non-zero, power of 2,
>  	 * no bigger than 32K (due to 16-bit read/write pointers)
>  	 */
> -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
>  		uint16_t qsize = params->qsize[i];
> -
> -		if (qsize == 0 || !rte_is_power_of_2(qsize))
> +		if ((qsize != 0 && !rte_is_power_of_2(qsize)) ||
> +			((i == RTE_SCHED_TRAFFIC_CLASS_BE) && (qsize ==
> 0)))
>  			return -8;
>  	}
> 
> @@ -360,7 +366,7 @@ rte_sched_port_check_params(struct
> rte_sched_port_params *params)
>  		struct rte_sched_pipe_params *p = params->pipe_profiles +
> i;
>  		int status;
> 
> -		status = pipe_profile_check(p, params->rate);
> +		status = pipe_profile_check(p, params->rate, &params-
> >qsize[0]);
>  		if (status != 0)
>  			return status;
>  	}
> @@ -389,9 +395,9 @@ rte_sched_port_get_array_base(struct
> rte_sched_port_params *params, enum rte_sch
>  	uint32_t base, i;
> 
>  	size_per_pipe_queue_array = 0;
> -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -		size_per_pipe_queue_array +=
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS
> -			* params->qsize[i] * sizeof(struct rte_mbuf *);
> +	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
> +		size_per_pipe_queue_array +=
> +			params->qsize[i] * sizeof(struct rte_mbuf *);
>  	}
>  	size_queue_array = n_pipes_per_port *
> size_per_pipe_queue_array;
> 
> @@ -451,31 +457,12 @@ rte_sched_port_get_memory_footprint(struct
> rte_sched_port_params *params)
>  static void
>  rte_sched_port_config_qsize(struct rte_sched_port *port)
>  {
> -	/* TC 0 */
> +	uint32_t i;
>  	port->qsize_add[0] = 0;
> -	port->qsize_add[1] = port->qsize_add[0] + port->qsize[0];
> -	port->qsize_add[2] = port->qsize_add[1] + port->qsize[0];
> -	port->qsize_add[3] = port->qsize_add[2] + port->qsize[0];
> -
> -	/* TC 1 */
> -	port->qsize_add[4] = port->qsize_add[3] + port->qsize[0];
> -	port->qsize_add[5] = port->qsize_add[4] + port->qsize[1];
> -	port->qsize_add[6] = port->qsize_add[5] + port->qsize[1];
> -	port->qsize_add[7] = port->qsize_add[6] + port->qsize[1];
> -
> -	/* TC 2 */
> -	port->qsize_add[8] = port->qsize_add[7] + port->qsize[1];
> -	port->qsize_add[9] = port->qsize_add[8] + port->qsize[2];
> -	port->qsize_add[10] = port->qsize_add[9] + port->qsize[2];
> -	port->qsize_add[11] = port->qsize_add[10] + port->qsize[2];
> -
> -	/* TC 3 */
> -	port->qsize_add[12] = port->qsize_add[11] + port->qsize[2];
> -	port->qsize_add[13] = port->qsize_add[12] + port->qsize[3];
> -	port->qsize_add[14] = port->qsize_add[13] + port->qsize[3];
> -	port->qsize_add[15] = port->qsize_add[14] + port->qsize[3];
> -
> -	port->qsize_sum = port->qsize_add[15] + port->qsize[3];
> +	for (i = 1; i < RTE_SCHED_QUEUES_PER_PIPE; i++)
> +		port->qsize_add[i] = port->qsize_add[i-1] + port->qsize[i-1];
> +
> +	port->qsize_sum = port->qsize_add[15] + port->qsize[15];
>  }
> 
>  static void
> @@ -484,10 +471,11 @@ rte_sched_port_log_pipe_profile(struct
> rte_sched_port *port, uint32_t i)
>  	struct rte_sched_pipe_profile *p = port->pipe_profiles + i;
> 
>  	RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
> -		"    Token bucket: period = %u, credits per period = %u, size =
> %u\n"
> -		"    Traffic classes: period = %u, credits per period = [%u, %u,
> %u, %u]\n"
> -		"    Traffic class 3 oversubscription: weight = %hhu\n"
> -		"    WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
> +		"	Token bucket: period = %u, credits per period = %u,
> size = %u\n"
> +		"	Traffic classes: period = %u,\n"
> +		"	credits per period = [%u, %u, %u, %u, %u, %u, %u,
> %u, %u, %u, %u, %u, %u]\n"
> +		"	Best-effort traffic class oversubscription: weight =
> %hhu\n"
> +		"	WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
>  		i,
> 
>  		/* Token bucket */
> @@ -501,8 +489,17 @@ rte_sched_port_log_pipe_profile(struct
> rte_sched_port *port, uint32_t i)
>  		p->tc_credits_per_period[1],
>  		p->tc_credits_per_period[2],
>  		p->tc_credits_per_period[3],
> -
> -		/* Traffic class 3 oversubscription */
> +		p->tc_credits_per_period[4],
> +		p->tc_credits_per_period[5],
> +		p->tc_credits_per_period[6],
> +		p->tc_credits_per_period[7],
> +		p->tc_credits_per_period[8],
> +		p->tc_credits_per_period[9],
> +		p->tc_credits_per_period[10],
> +		p->tc_credits_per_period[11],
> +		p->tc_credits_per_period[12],
> +
> +		/* Best-effort traffic class oversubscription */
>  		p->tc_ov_weight,
> 
>  		/* WRR */
> @@ -548,9 +545,10 @@ rte_sched_pipe_profile_convert(struct
> rte_sched_port *port,
>  						rate);
> 
>  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> -		dst->tc_credits_per_period[i]
> -			= rte_sched_time_ms_to_bytes(src->tc_period,
> -				src->tc_rate[i]);
> +		if (port->qsize[i])
> +			dst->tc_credits_per_period[i]
> +				= rte_sched_time_ms_to_bytes(src-
> >tc_period,
> +					src->tc_rate[i]);
> 
>  #ifdef RTE_SCHED_SUBPORT_TC_OV

The TC_OV logic should be enabled for all the configuration/initialization code; the only place it should be conditionally enabled is the run-time code (e.g. grinder update/schedule), so no need for this #ifdef here. Same in many other places, should be easy to change for a consistent approach.

>  	dst->tc_ov_weight = src->tc_ov_weight;
> @@ -558,7 +556,7 @@ rte_sched_pipe_profile_convert(struct
> rte_sched_port *port,
> 
>  	/* WRR queues */
>  	for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++)
> -		if (port->qsize[i])
> +		if (port->qsize[RTE_SCHED_TRAFFIC_CLASS_BE + i])
>  			dst->n_be_queues++;
> 
>  	if (dst->n_be_queues == 1)
> @@ -620,7 +618,7 @@ rte_sched_port_config_pipe_profile_table(struct
> rte_sched_port *port,
>  	port->pipe_tc3_rate_max = 0;
>  	for (i = 0; i < port->n_pipe_profiles; i++) {
>  		struct rte_sched_pipe_params *src = params->pipe_profiles
> + i;
> -		uint32_t pipe_tc3_rate = src->tc_rate[3];
> +		uint32_t pipe_tc3_rate = src-
> >tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
> 
>  		if (port->pipe_tc3_rate_max < pipe_tc3_rate)
>  			port->pipe_tc3_rate_max = pipe_tc3_rate;
> @@ -762,12 +760,14 @@ rte_sched_port_free(struct rte_sched_port *port)
>  	for (qindex = 0; qindex < n_queues_per_port; qindex++) {
>  		struct rte_mbuf **mbufs = rte_sched_port_qbase(port,
> qindex);
>  		uint16_t qsize = rte_sched_port_qsize(port, qindex);
> -		struct rte_sched_queue *queue = port->queue + qindex;
> -		uint16_t qr = queue->qr & (qsize - 1);
> -		uint16_t qw = queue->qw & (qsize - 1);
> +		if (qsize != 0) {
> +			struct rte_sched_queue *queue = port->queue +
> qindex;
> +			uint16_t qr = queue->qr & (qsize - 1);
> +			uint16_t qw = queue->qw & (qsize - 1);
> 
> -		for (; qr != qw; qr = (qr + 1) & (qsize - 1))
> -			rte_pktmbuf_free(mbufs[qr]);
> +			for (; qr != qw; qr = (qr + 1) & (qsize - 1))
> +				rte_pktmbuf_free(mbufs[qr]);
> +		}
>  	}
> 
>  	rte_bitmap_free(port->bmp);
> @@ -780,9 +780,10 @@ rte_sched_port_log_subport_config(struct
> rte_sched_port *port, uint32_t i)
>  	struct rte_sched_subport *s = port->subport + i;
> 
>  	RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n"
> -		"    Token bucket: period = %u, credits per period = %u, size =
> %u\n"
> -		"    Traffic classes: period = %u, credits per period = [%u, %u,
> %u, %u]\n"
> -		"    Traffic class 3 oversubscription: wm min = %u, wm max =
> %u\n",
> +		"	Token bucket: period = %u, credits per period = %u,
> size = %u\n"
> +		"	Traffic classes: period = %u\n"
> +		"	credits per period = [%u, %u, %u, %u, %u, %u, %u,
> %u, %u, %u, %u, %u, %u]\n"
> +		"	Best effort traffic class oversubscription: wm min =
> %u, wm max = %u\n",
>  		i,
> 
>  		/* Token bucket */
> @@ -796,8 +797,17 @@ rte_sched_port_log_subport_config(struct
> rte_sched_port *port, uint32_t i)
>  		s->tc_credits_per_period[1],
>  		s->tc_credits_per_period[2],
>  		s->tc_credits_per_period[3],
> -
> -		/* Traffic class 3 oversubscription */
> +		s->tc_credits_per_period[4],
> +		s->tc_credits_per_period[5],
> +		s->tc_credits_per_period[6],
> +		s->tc_credits_per_period[7],
> +		s->tc_credits_per_period[8],
> +		s->tc_credits_per_period[9],
> +		s->tc_credits_per_period[10],
> +		s->tc_credits_per_period[11],
> +		s->tc_credits_per_period[12],
> +
> +		/* Best effort traffic class oversubscription */
>  		s->tc_ov_wm_min,
>  		s->tc_ov_wm_max);
>  }
> @@ -808,7 +818,7 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
>  	struct rte_sched_subport_params *params)
>  {
>  	struct rte_sched_subport *s;
> -	uint32_t i;
> +	uint32_t i, j;
> 
>  	/* Check user parameters */
>  	if (port == NULL ||
> @@ -822,12 +832,24 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
>  	if (params->tb_size == 0)
>  		return -3;
> 
> -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -		if (params->tc_rate[i] == 0 ||
> -		    params->tc_rate[i] > params->tb_rate)
> -			return -4;
> +	for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
> +		uint32_t tc_rate = params->tc_rate[j];
> +		uint16_t qsize = port->qsize[i];
> +
> +		if (((qsize == 0) &&
> +			((tc_rate != 0) && (j !=
> RTE_SCHED_TRAFFIC_CLASS_BE))) ||
> +			((qsize != 0) && (tc_rate == 0)) ||
> +			(tc_rate > params->tb_rate))
> +			return -3;
> +
> +		if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
> +			j++;
>  	}
> 
> +	if (port->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
> +		params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0)
> +		return -3;
> +
>  	if (params->tc_period == 0)
>  		return -5;
> 
> @@ -851,13 +873,16 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
>  	/* Traffic Classes (TCs) */
>  	s->tc_period = rte_sched_time_ms_to_bytes(params->tc_period,
> port->rate);
>  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -		s->tc_credits_per_period[i]
> -			= rte_sched_time_ms_to_bytes(params->tc_period,
> -						     params->tc_rate[i]);
> +		if (port->qsize[i])
> +			s->tc_credits_per_period[i]
> +				= rte_sched_time_ms_to_bytes(params-
> >tc_period,
> +								 params-
> >tc_rate[i]);
> +
>  	}
>  	s->tc_time = port->time + s->tc_period;
>  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> -		s->tc_credits[i] = s->tc_credits_per_period[i];
> +		if (port->qsize[i])
> +			s->tc_credits[i] = s->tc_credits_per_period[i];
> 
>  #ifdef RTE_SCHED_SUBPORT_TC_OV
>  	/* TC oversubscription */
> @@ -910,9 +935,9 @@ rte_sched_pipe_config(struct rte_sched_port *port,
>  		params = port->pipe_profiles + p->profile;
> 
>  #ifdef RTE_SCHED_SUBPORT_TC_OV
> -		double subport_tc3_rate = (double) s-
> >tc_credits_per_period[3]
> +		double subport_tc3_rate = (double) s-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) s->tc_period;
> -		double pipe_tc3_rate = (double) params-
> >tc_credits_per_period[3]
> +		double pipe_tc3_rate = (double) params-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) params->tc_period;
>  		uint32_t tc3_ov = s->tc_ov;
> 
> @@ -945,15 +970,19 @@ rte_sched_pipe_config(struct rte_sched_port
> *port,
> 
>  	/* Traffic Classes (TCs) */
>  	p->tc_time = port->time + params->tc_period;
> +	p->n_be_queues = params->n_be_queues;
>  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> -		p->tc_credits[i] = params->tc_credits_per_period[i];
> +		if (port->qsize[i])
> +			p->tc_credits[i] = params->tc_credits_per_period[i];
> 
>  #ifdef RTE_SCHED_SUBPORT_TC_OV
>  	{
>  		/* Subport TC3 oversubscription */
> -		double subport_tc3_rate = (double) s-
> >tc_credits_per_period[3]
> +		double subport_tc3_rate =
> +			(double) s-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) s->tc_period;
> -		double pipe_tc3_rate = (double) params-
> >tc_credits_per_period[3]
> +		double pipe_tc3_rate =
> +			(double) params-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) params->tc_period;
>  		uint32_t tc3_ov = s->tc_ov;
> 
> @@ -992,7 +1021,7 @@ rte_sched_port_pipe_profile_add(struct
> rte_sched_port *port,
>  		return -2;
> 
>  	/* Pipe params */
> -	status = pipe_profile_check(params, port->rate);
> +	status = pipe_profile_check(params, port->rate, &port->qsize[0]);
>  	if (status != 0)
>  		return status;
> 
> @@ -1008,8 +1037,8 @@ rte_sched_port_pipe_profile_add(struct
> rte_sched_port *port,
>  	*pipe_profile_id = port->n_pipe_profiles;
>  	port->n_pipe_profiles++;
> 
> -	if (port->pipe_tc3_rate_max < params->tc_rate[3])
> -		port->pipe_tc3_rate_max = params->tc_rate[3];
> +	if (port->pipe_tc3_rate_max < params-
> >tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
> +		port->pipe_tc3_rate_max = params-
> >tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
> 
>  	rte_sched_port_log_pipe_profile(port, *pipe_profile_id);
> 
> @@ -1020,15 +1049,12 @@ static inline uint32_t
>  rte_sched_port_qindex(struct rte_sched_port *port,
>  	uint32_t subport,
>  	uint32_t pipe,
> -	uint32_t traffic_class,
>  	uint32_t queue)
>  {
>  	return ((subport & (port->n_subports_per_port - 1)) <<
>  			(port->n_pipes_per_subport_log2 + 4)) |
>  			((pipe & (port->n_pipes_per_subport - 1)) << 4) |
> -			((traffic_class &
> -			    (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)) <<
> 2) |
> -			(queue &
> (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1));
> +			(queue & (RTE_SCHED_QUEUES_PER_PIPE - 1));
>  }
> 
>  void
> @@ -1038,8 +1064,8 @@ rte_sched_port_pkt_write(struct rte_sched_port
> *port,
>  			 uint32_t traffic_class,
>  			 uint32_t queue, enum rte_color color)
>  {
> -	uint32_t queue_id = rte_sched_port_qindex(port, subport, pipe,
> -			traffic_class, queue);
> +	uint32_t queue_id = rte_sched_port_qindex(port, subport, pipe,
> queue);
> +
>  	rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
>  }

Ouch, this is a bug! The queue parameter of public function rte_sched_port_pkt_write() is not the "queue within the pipe" (0 .. 15), it is actually the "queue within the traffic class" (0 for high priority TCs, 0 .. 3 for BE TC), as explained in the API Doxygen description.

You removed the traffic class parameter in the above rte_sched_port_qindex, we should put it back and keep the initial meaning of the queue parameter for the _qindex function, which is internal function.

The way to quickly fix this is simply having a per-port pre-computed small table of 16 entries indexed by the traffic class ID and provides the ID of the first pipe queue of that traffic class; then you simply add "queue within traffic class"  ID on top of this to get the "queue within pipe", which provides the lower bits of the "queue within port" that this function is looking to compute. Makes sense?

It would be good to do some more tests to trace the packet through different pipe traffic classes and queues.

> 
> @@ -1050,12 +1076,12 @@ rte_sched_port_pkt_read_tree_path(struct
> rte_sched_port *port,
>  				  uint32_t *traffic_class, uint32_t *queue)
>  {
>  	uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
> +	uint32_t tc_id = rte_mbuf_sched_traffic_class_get(pkt);

I am afraid this is also incorrect, as although the mbuf->sched.traffic_class should be in sync with the traffic class built into the librte_sched hierarchy (and mbuf->sched.queue_id), there is no guarantee that:
1. The application is using and populating the mbuf->sched.traffic_class field
2. The application is assigning the same meaning or equivalence levels between the mbuf->sched.traffic_class and the librte_sched hierarchy traffic class.

We need to obtain the traffic class ID from queue ID, please. Thank you for your understanding!

There is also a quick way to do this as well through a similar small precomputed table: input = "queue within pipe" ID, outputs are traffic class ID and "queue within traffic class" ID. Makes sense?

We need to use this strategy for all the places in this file, so in this file we only use mbuf->sched.queue and not use mbuf->sched.traffic_class (nor its accessor function rte_mbuf_sched_traffic_class_get()) at all. This accessor function shows up 4 times currently.

> 
>  	*subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
>  	*pipe = (queue_id >> 4) & (port->n_pipes_per_subport - 1);
> -	*traffic_class = (queue_id >> 2) &
> -				(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE -
> 1);
> -	*queue = queue_id & (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS -
> 1);
> +	*traffic_class = tc_id;
> +	*queue = queue_id & (RTE_SCHED_QUEUES_PER_PIPE - 1);
>  }
> 
>  enum rte_color
> @@ -1136,7 +1162,7 @@ static inline void
>  rte_sched_port_update_subport_stats(struct rte_sched_port *port,
> uint32_t qindex, struct rte_mbuf *pkt)
>  {
>  	struct rte_sched_subport *s = port->subport + (qindex /
> rte_sched_port_queues_per_subport(port));
> -	uint32_t tc_index = (qindex >> 2) & 0x3;
> +	uint32_t tc_index = rte_mbuf_sched_traffic_class_get(pkt);

See above comment.

>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	s->stats.n_pkts_tc[tc_index] += 1;
> @@ -1156,7 +1182,7 @@
> rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
>  #endif
>  {
>  	struct rte_sched_subport *s = port->subport + (qindex /
> rte_sched_port_queues_per_subport(port));
> -	uint32_t tc_index = (qindex >> 2) & 0x3;
> +	uint32_t tc_index = rte_mbuf_sched_traffic_class_get(pkt);

See above comment.

>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	s->stats.n_pkts_tc_dropped[tc_index] += 1;
> @@ -1211,7 +1237,7 @@ rte_sched_port_red_drop(struct rte_sched_port
> *port, struct rte_mbuf *pkt, uint3
>  	uint32_t tc_index;
>  	enum rte_color color;
> 
> -	tc_index = (qindex >> 2) & 0x3;
> +	tc_index = rte_mbuf_sched_traffic_class_get(pkt);
>  	color = rte_sched_port_pkt_read_color(pkt);
>  	red_cfg = &port->red_config[tc_index][color];
> 
> @@ -1528,6 +1554,7 @@ grinder_credits_update(struct rte_sched_port
> *port, uint32_t pos)
>  	struct rte_sched_pipe *pipe = grinder->pipe;
>  	struct rte_sched_pipe_profile *params = grinder->pipe_params;
>  	uint64_t n_periods;
> +	uint32_t i;
> 
>  	/* Subport TB */
>  	n_periods = (port->time - subport->tb_time) / subport->tb_period;
> @@ -1543,19 +1570,17 @@ grinder_credits_update(struct rte_sched_port
> *port, uint32_t pos)
> 
>  	/* Subport TCs */
>  	if (unlikely(port->time >= subport->tc_time)) {
> -		subport->tc_credits[0] = subport->tc_credits_per_period[0];
> -		subport->tc_credits[1] = subport->tc_credits_per_period[1];
> -		subport->tc_credits[2] = subport->tc_credits_per_period[2];
> -		subport->tc_credits[3] = subport->tc_credits_per_period[3];
> +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> +			subport->tc_credits[i] = subport-
> >tc_credits_per_period[i];
> +
>  		subport->tc_time = port->time + subport->tc_period;
>  	}
> 
>  	/* Pipe TCs */
>  	if (unlikely(port->time >= pipe->tc_time)) {
> -		pipe->tc_credits[0] = params->tc_credits_per_period[0];
> -		pipe->tc_credits[1] = params->tc_credits_per_period[1];
> -		pipe->tc_credits[2] = params->tc_credits_per_period[2];
> -		pipe->tc_credits[3] = params->tc_credits_per_period[3];
> +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> +			pipe->tc_credits[i] = params-
> >tc_credits_per_period[i];
> +
>  		pipe->tc_time = port->time + params->tc_period;
>  	}
>  }
> @@ -1568,21 +1593,29 @@ grinder_tc_ov_credits_update(struct
> rte_sched_port *port, uint32_t pos)
>  	struct rte_sched_grinder *grinder = port->grinder + pos;
>  	struct rte_sched_subport *subport = grinder->subport;
>  	uint32_t
> tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> -	uint32_t tc_ov_consumption_max;
> +	uint32_t tc_consumption = 0, tc_ov_consumption_max;
>  	uint32_t tc_ov_wm = subport->tc_ov_wm;
> +	uint32_t i;
> 
>  	if (subport->tc_ov == 0)
>  		return subport->tc_ov_wm_max;
> 
> -	tc_ov_consumption[0] = subport->tc_credits_per_period[0] -
> subport->tc_credits[0];
> -	tc_ov_consumption[1] = subport->tc_credits_per_period[1] -
> subport->tc_credits[1];
> -	tc_ov_consumption[2] = subport->tc_credits_per_period[2] -
> subport->tc_credits[2];
> -	tc_ov_consumption[3] = subport->tc_credits_per_period[3] -
> subport->tc_credits[3];
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
> +		tc_ov_consumption[i] =
> +			subport->tc_credits_per_period[i] - subport-
> >tc_credits[i];
> +		tc_consumption += tc_ov_consumption[i];
> +	}
> +
> +	tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
> +		subport-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
> +		subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
> +
> 
> -	tc_ov_consumption_max = subport->tc_credits_per_period[3] -
> -		(tc_ov_consumption[0] + tc_ov_consumption[1] +
> tc_ov_consumption[2]);
> +	tc_ov_consumption_max =
> +		subport-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] - tc_consumption;
> 
> -	if (tc_ov_consumption[3] > (tc_ov_consumption_max - port->mtu))
> {
> +	if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
> +		(tc_ov_consumption_max - port->mtu)) {
>  		tc_ov_wm  -= tc_ov_wm >> 7;
>  		if (tc_ov_wm < subport->tc_ov_wm_min)
>  			tc_ov_wm = subport->tc_ov_wm_min;
> @@ -1605,6 +1638,7 @@ grinder_credits_update(struct rte_sched_port
> *port, uint32_t pos)
>  	struct rte_sched_pipe *pipe = grinder->pipe;
>  	struct rte_sched_pipe_profile *params = grinder->pipe_params;
>  	uint64_t n_periods;
> +	uint32_t i;
> 
>  	/* Subport TB */
>  	n_periods = (port->time - subport->tb_time) / subport->tb_period;
> @@ -1622,10 +1656,8 @@ grinder_credits_update(struct rte_sched_port
> *port, uint32_t pos)
>  	if (unlikely(port->time >= subport->tc_time)) {
>  		subport->tc_ov_wm = grinder_tc_ov_credits_update(port,
> pos);
> 
> -		subport->tc_credits[0] = subport->tc_credits_per_period[0];
> -		subport->tc_credits[1] = subport->tc_credits_per_period[1];
> -		subport->tc_credits[2] = subport->tc_credits_per_period[2];
> -		subport->tc_credits[3] = subport->tc_credits_per_period[3];
> +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> +			subport->tc_credits[i] = subport-
> >tc_credits_per_period[i];
> 
>  		subport->tc_time = port->time + subport->tc_period;
>  		subport->tc_ov_period_id++;
> @@ -1633,10 +1665,8 @@ grinder_credits_update(struct rte_sched_port
> *port, uint32_t pos)
> 
>  	/* Pipe TCs */
>  	if (unlikely(port->time >= pipe->tc_time)) {
> -		pipe->tc_credits[0] = params->tc_credits_per_period[0];
> -		pipe->tc_credits[1] = params->tc_credits_per_period[1];
> -		pipe->tc_credits[2] = params->tc_credits_per_period[2];
> -		pipe->tc_credits[3] = params->tc_credits_per_period[3];
> +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> +			pipe->tc_credits[i] = params-
> >tc_credits_per_period[i];
>  		pipe->tc_time = port->time + params->tc_period;
>  	}
> 
> @@ -1701,11 +1731,18 @@ grinder_credits_check(struct rte_sched_port
> *port, uint32_t pos)
>  	uint32_t subport_tc_credits = subport->tc_credits[tc_index];
>  	uint32_t pipe_tb_credits = pipe->tb_credits;
>  	uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
> -	uint32_t pipe_tc_ov_mask1[] = {UINT32_MAX, UINT32_MAX,
> UINT32_MAX, pipe->tc_ov_credits};
> -	uint32_t pipe_tc_ov_mask2[] = {0, 0, 0, UINT32_MAX};
> -	uint32_t pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
> +	uint32_t
> pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	uint32_t
> pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
> +	uint32_t pipe_tc_ov_credits, i;
>  	int enough_credits;
> 
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> +		pipe_tc_ov_mask1[i] = UINT32_MAX;
> +
> +	pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe-
> >tc_ov_credits;
> +	pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] =
> UINT32_MAX;
> +	pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
> +
>  	/* Check pipe and subport credits */
>  	enough_credits = (pkt_len <= subport_tb_credits) &&
>  		(pkt_len <= subport_tc_credits) &&
> @@ -1860,68 +1897,65 @@ static inline void
>  grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos,
> uint32_t qindex, uint16_t qmask)
>  {
>  	struct rte_sched_grinder *grinder = port->grinder + pos;
> -	uint8_t b[4];
> +	uint8_t b, i;
> 
>  	grinder->tccache_w = 0;
>  	grinder->tccache_r = 0;
> 
> -	b[0] = (uint8_t) (qmask & 0xF);
> -	b[1] = (uint8_t) ((qmask >> 4) & 0xF);
> -	b[2] = (uint8_t) ((qmask >> 8) & 0xF);
> -	b[3] = (uint8_t) ((qmask >> 12) & 0xF);
> -
> -	grinder->tccache_qmask[grinder->tccache_w] = b[0];
> -	grinder->tccache_qindex[grinder->tccache_w] = qindex;
> -	grinder->tccache_w += (b[0] != 0);
> -
> -	grinder->tccache_qmask[grinder->tccache_w] = b[1];
> -	grinder->tccache_qindex[grinder->tccache_w] = qindex + 4;
> -	grinder->tccache_w += (b[1] != 0);
> -
> -	grinder->tccache_qmask[grinder->tccache_w] = b[2];
> -	grinder->tccache_qindex[grinder->tccache_w] = qindex + 8;
> -	grinder->tccache_w += (b[2] != 0);
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
> +		b = (uint8_t) ((qmask >> i) & 0x1);
> +		grinder->tccache_qmask[grinder->tccache_w] = b;
> +		grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
> +		grinder->tccache_w += (b != 0);
> +	}
> 
> -	grinder->tccache_qmask[grinder->tccache_w] = b[3];
> -	grinder->tccache_qindex[grinder->tccache_w] = qindex + 12;
> -	grinder->tccache_w += (b[3] != 0);
> +	b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
> +	grinder->tccache_qmask[grinder->tccache_w] = b;
> +	grinder->tccache_qindex[grinder->tccache_w] = qindex +
> +		RTE_SCHED_TRAFFIC_CLASS_BE;
> +	grinder->tccache_w += (b != 0);
>  }
> 
>  static inline int
>  grinder_next_tc(struct rte_sched_port *port, uint32_t pos)
>  {
>  	struct rte_sched_grinder *grinder = port->grinder + pos;
> +	struct rte_sched_pipe *pipe = grinder->pipe;
>  	struct rte_mbuf **qbase;
> -	uint32_t qindex;
> +	uint32_t qindex, qpos = 0;
>  	uint16_t qsize;
> 
>  	if (grinder->tccache_r == grinder->tccache_w)
>  		return 0;
> 
>  	qindex = grinder->tccache_qindex[grinder->tccache_r];
> +	grinder->tc_index = qindex & 0xf;
>  	qbase = rte_sched_port_qbase(port, qindex);
> -	qsize = rte_sched_port_qsize(port, qindex);
> -
> -	grinder->tc_index = (qindex >> 2) & 0x3;
> -	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
> -	grinder->qsize[grinder->tc_index] = qsize;
> 
> -	grinder->qindex[0] = qindex;
> -	grinder->qindex[1] = qindex + 1;
> -	grinder->qindex[2] = qindex + 2;
> -	grinder->qindex[3] = qindex + 3;
> +	if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
> +		qsize = rte_sched_port_qsize(port, qindex);
> 
> -	grinder->queue[0] = port->queue + qindex;
> -	grinder->queue[1] = port->queue + qindex + 1;
> -	grinder->queue[2] = port->queue + qindex + 2;
> -	grinder->queue[3] = port->queue + qindex + 3;
> +		grinder->queue[qpos] = port->queue + qindex;
> +		grinder->qbase[qpos] = qbase;
> +		grinder->qindex[qpos] = qindex;
> +		grinder->qsize[qpos] = qsize;
> +		grinder->qmask = grinder->tccache_qmask[grinder-
> >tccache_r];
> +		grinder->tccache_r++;
> 
> -	grinder->qbase[0] = qbase;
> -	grinder->qbase[1] = qbase + qsize;
> -	grinder->qbase[2] = qbase + 2 * qsize;
> -	grinder->qbase[3] = qbase + 3 * qsize;
> +		return 1;
> +	}
> 
> +	for ( ; qpos < pipe->n_be_queues; qpos++) {
> +		qsize = rte_sched_port_qsize(port, qindex + qpos);
> +		grinder->queue[qpos] = port->queue + qindex + qpos;
> +		grinder->qbase[qpos] = qbase + qpos * qsize;
> +		grinder->qindex[qpos] = qindex + qpos;
> +		grinder->qsize[qpos] = qsize;
> +	}
> +	grinder->tc_index = RTE_SCHED_TRAFFIC_CLASS_BE;
> +	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
>  	grinder->tccache_r++;
> +
>  	return 1;
>  }
> 
> diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
> index 2a935998a..ae4dfb311 100644
> --- a/lib/librte_sched/rte_sched.h
> +++ b/lib/librte_sched/rte_sched.h
> @@ -83,9 +83,9 @@ extern "C" {
>  #define RTE_SCHED_BE_QUEUES_PER_PIPE    4
> 
>  /** Number of traffic classes per pipe (as well as subport).
> - * Cannot be changed.
>   */
> -#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    4
> +#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    \
> +(RTE_SCHED_QUEUES_PER_PIPE - RTE_SCHED_BE_QUEUES_PER_PIPE + 1)
> 
>  /** Number of queues per pipe traffic class. Cannot be changed. */
>  #define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS    4
> @@ -171,9 +171,7 @@ struct rte_sched_pipe_params {
>  	/**< Traffic class rates (measured in bytes per second) */
>  	uint32_t tc_period;
>  	/**< Enforcement period (measured in milliseconds) */
> -#ifdef RTE_SCHED_SUBPORT_TC_OV
>  	uint8_t tc_ov_weight;		 /**< Weight Traffic class 3
> oversubscription */
> -#endif
> 
>  	/* Pipe queues */
>  	uint8_t  wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; /**<
> WRR weights */
> @@ -206,11 +204,9 @@ struct rte_sched_port_params {
>  					  * (measured in bytes) */
>  	uint32_t n_subports_per_port;    /**< Number of subports */
>  	uint32_t n_pipes_per_subport;    /**< Number of pipes per subport
> */
> -	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	uint16_t qsize[RTE_SCHED_QUEUES_PER_PIPE];
>  	/**< Packet queue size for each traffic class.
> -	 * All queues within the same pipe traffic class have the same
> -	 * size. Queues from different pipes serving the same traffic
> -	 * class have the same size. */
> +	 * Queues which are not needed are allowed to have zero size. */
>  	struct rte_sched_pipe_params *pipe_profiles;
>  	/**< Pipe profile table.
>  	 * Every pipe is configured using one of the profiles from this table.
> */
> --
> 2.21.0
  
Cristian Dumitrescu July 16, 2019, 12:57 a.m. UTC | #2
> -----Original Message-----
> From: Singh, Jasvinder
> Sent: Friday, July 12, 2019 11:57 AM
> To: dev@dpdk.org
> Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Tovar, AbrahamX
> <abrahamx.tovar@intel.com>; Krakowiak, LukaszX
> <lukaszx.krakowiak@intel.com>
> Subject: [PATCH v4 02/11] sched: add config flexibility to tc queue sizes
> 
> Add support for zero queue sizes of the traffic classes. The queues
> which are not used can be set to zero size. This helps in reducing
> memory footprint of the hierarchical scheduler.
> 
> Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>
> Signed-off-by: Abraham Tovar <abrahamx.tovar@intel.com>
> Signed-off-by: Lukasz Krakowiak <lukaszx.krakowiak@intel.com>
> ---
>  lib/librte_sched/rte_sched.c | 354 +++++++++++++++++++----------------
>  lib/librte_sched/rte_sched.h |  12 +-
>  2 files changed, 198 insertions(+), 168 deletions(-)
> 
> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
> index b1f521794..411924c36 100644
> --- a/lib/librte_sched/rte_sched.c
> +++ b/lib/librte_sched/rte_sched.c
> @@ -149,15 +149,15 @@ struct rte_sched_grinder {
>  	struct rte_sched_pipe_profile *pipe_params;
> 
>  	/* TC cache */
> -	uint8_t tccache_qmask[4];
> -	uint32_t tccache_qindex[4];
> +	uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
>  	uint32_t tccache_w;
>  	uint32_t tccache_r;
> 
>  	/* Current TC */
>  	uint32_t tc_index;
> -	struct rte_sched_queue
> *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> -	struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	struct rte_sched_queue
> *queue[RTE_SCHED_MAX_QUEUES_PER_TC];
> +	struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC];
>  	uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
>  	uint16_t qsize[RTE_SCHED_MAX_QUEUES_PER_TC];
>  	uint32_t qmask;
> @@ -178,7 +178,7 @@ struct rte_sched_port {
>  	uint32_t rate;
>  	uint32_t mtu;
>  	uint32_t frame_overhead;
> -	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	uint16_t qsize[RTE_SCHED_QUEUES_PER_PIPE];
>  	uint32_t n_pipe_profiles;
>  	uint32_t pipe_tc3_rate_max;
>  #ifdef RTE_SCHED_RED
> @@ -260,14 +260,13 @@ rte_sched_port_qbase(struct rte_sched_port
> *port, uint32_t qindex)
>  static inline uint16_t
>  rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex)
>  {
> -	uint32_t tc = (qindex >> 2) & 0x3;
> -
> -	return port->qsize[tc];
> +	uint32_t qpos = qindex & 0xF;
> +	return port->qsize[qpos];
>  }
> 
>  static int
>  pipe_profile_check(struct rte_sched_pipe_params *params,
> -	uint32_t rate)
> +	uint32_t rate, uint16_t *qsize)
>  {
>  	uint32_t i;
> 
> @@ -285,12 +284,17 @@ pipe_profile_check(struct rte_sched_pipe_params
> *params,
>  		return -12;
> 
>  	/* TC rate: non-zero, less than pipe rate */
> -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -		if (params->tc_rate[i] == 0 ||
> -			params->tc_rate[i] > params->tb_rate)
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
> +		if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
> +			(qsize[i] != 0 && (params->tc_rate[i] == 0 ||
> +			params->tc_rate[i] > params->tb_rate)))
>  			return -13;
> +
>  	}
> 
> +	if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0)
> +		return -13;
> +
>  	/* TC period: non-zero */
>  	if (params->tc_period == 0)
>  		return -14;
> @@ -302,8 +306,10 @@ pipe_profile_check(struct rte_sched_pipe_params
> *params,
>  #endif
> 
>  	/* Queue WRR weights: non-zero */
> -	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
> -		if (params->wrr_weights[i] == 0)
> +	for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
> +		uint32_t qindex = RTE_SCHED_TRAFFIC_CLASS_BE + i;
> +		if ((qsize[qindex] != 0 && params->wrr_weights[i] == 0) ||
> +			(qsize[qindex] == 0 && params->wrr_weights[i] !=
> 0))
>  			return -16;
>  	}
>  	return 0;
> @@ -343,10 +349,10 @@ rte_sched_port_check_params(struct
> rte_sched_port_params *params)
>  	/* qsize: non-zero, power of 2,
>  	 * no bigger than 32K (due to 16-bit read/write pointers)
>  	 */
> -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
>  		uint16_t qsize = params->qsize[i];
> -
> -		if (qsize == 0 || !rte_is_power_of_2(qsize))
> +		if ((qsize != 0 && !rte_is_power_of_2(qsize)) ||
> +			((i == RTE_SCHED_TRAFFIC_CLASS_BE) && (qsize ==
> 0)))
>  			return -8;
>  	}
> 
> @@ -360,7 +366,7 @@ rte_sched_port_check_params(struct
> rte_sched_port_params *params)
>  		struct rte_sched_pipe_params *p = params->pipe_profiles +
> i;
>  		int status;
> 
> -		status = pipe_profile_check(p, params->rate);
> +		status = pipe_profile_check(p, params->rate, &params-
> >qsize[0]);
>  		if (status != 0)
>  			return status;
>  	}
> @@ -389,9 +395,9 @@ rte_sched_port_get_array_base(struct
> rte_sched_port_params *params, enum rte_sch
>  	uint32_t base, i;
> 
>  	size_per_pipe_queue_array = 0;
> -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -		size_per_pipe_queue_array +=
> RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS
> -			* params->qsize[i] * sizeof(struct rte_mbuf *);
> +	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
> +		size_per_pipe_queue_array +=
> +			params->qsize[i] * sizeof(struct rte_mbuf *);
>  	}
>  	size_queue_array = n_pipes_per_port *
> size_per_pipe_queue_array;
> 
> @@ -451,31 +457,12 @@ rte_sched_port_get_memory_footprint(struct
> rte_sched_port_params *params)
>  static void
>  rte_sched_port_config_qsize(struct rte_sched_port *port)
>  {
> -	/* TC 0 */
> +	uint32_t i;
>  	port->qsize_add[0] = 0;
> -	port->qsize_add[1] = port->qsize_add[0] + port->qsize[0];
> -	port->qsize_add[2] = port->qsize_add[1] + port->qsize[0];
> -	port->qsize_add[3] = port->qsize_add[2] + port->qsize[0];
> -
> -	/* TC 1 */
> -	port->qsize_add[4] = port->qsize_add[3] + port->qsize[0];
> -	port->qsize_add[5] = port->qsize_add[4] + port->qsize[1];
> -	port->qsize_add[6] = port->qsize_add[5] + port->qsize[1];
> -	port->qsize_add[7] = port->qsize_add[6] + port->qsize[1];
> -
> -	/* TC 2 */
> -	port->qsize_add[8] = port->qsize_add[7] + port->qsize[1];
> -	port->qsize_add[9] = port->qsize_add[8] + port->qsize[2];
> -	port->qsize_add[10] = port->qsize_add[9] + port->qsize[2];
> -	port->qsize_add[11] = port->qsize_add[10] + port->qsize[2];
> -
> -	/* TC 3 */
> -	port->qsize_add[12] = port->qsize_add[11] + port->qsize[2];
> -	port->qsize_add[13] = port->qsize_add[12] + port->qsize[3];
> -	port->qsize_add[14] = port->qsize_add[13] + port->qsize[3];
> -	port->qsize_add[15] = port->qsize_add[14] + port->qsize[3];
> -
> -	port->qsize_sum = port->qsize_add[15] + port->qsize[3];
> +	for (i = 1; i < RTE_SCHED_QUEUES_PER_PIPE; i++)
> +		port->qsize_add[i] = port->qsize_add[i-1] + port->qsize[i-1];
> +
> +	port->qsize_sum = port->qsize_add[15] + port->qsize[15];
>  }
> 
>  static void
> @@ -484,10 +471,11 @@ rte_sched_port_log_pipe_profile(struct
> rte_sched_port *port, uint32_t i)
>  	struct rte_sched_pipe_profile *p = port->pipe_profiles + i;
> 
>  	RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
> -		"    Token bucket: period = %u, credits per period = %u, size =
> %u\n"
> -		"    Traffic classes: period = %u, credits per period = [%u, %u,
> %u, %u]\n"
> -		"    Traffic class 3 oversubscription: weight = %hhu\n"
> -		"    WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
> +		"	Token bucket: period = %u, credits per period = %u,
> size = %u\n"
> +		"	Traffic classes: period = %u,\n"
> +		"	credits per period = [%u, %u, %u, %u, %u, %u, %u,
> %u, %u, %u, %u, %u, %u]\n"
> +		"	Best-effort traffic class oversubscription: weight =
> %hhu\n"
> +		"	WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
>  		i,
> 
>  		/* Token bucket */
> @@ -501,8 +489,17 @@ rte_sched_port_log_pipe_profile(struct
> rte_sched_port *port, uint32_t i)
>  		p->tc_credits_per_period[1],
>  		p->tc_credits_per_period[2],
>  		p->tc_credits_per_period[3],
> -
> -		/* Traffic class 3 oversubscription */
> +		p->tc_credits_per_period[4],
> +		p->tc_credits_per_period[5],
> +		p->tc_credits_per_period[6],
> +		p->tc_credits_per_period[7],
> +		p->tc_credits_per_period[8],
> +		p->tc_credits_per_period[9],
> +		p->tc_credits_per_period[10],
> +		p->tc_credits_per_period[11],
> +		p->tc_credits_per_period[12],
> +
> +		/* Best-effort traffic class oversubscription */
>  		p->tc_ov_weight,
> 
>  		/* WRR */
> @@ -548,9 +545,10 @@ rte_sched_pipe_profile_convert(struct
> rte_sched_port *port,
>  						rate);
> 
>  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> -		dst->tc_credits_per_period[i]
> -			= rte_sched_time_ms_to_bytes(src->tc_period,
> -				src->tc_rate[i]);
> +		if (port->qsize[i])
> +			dst->tc_credits_per_period[i]
> +				= rte_sched_time_ms_to_bytes(src-
> >tc_period,
> +					src->tc_rate[i]);
> 
>  #ifdef RTE_SCHED_SUBPORT_TC_OV
>  	dst->tc_ov_weight = src->tc_ov_weight;
> @@ -558,7 +556,7 @@ rte_sched_pipe_profile_convert(struct
> rte_sched_port *port,
> 
>  	/* WRR queues */
>  	for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++)
> -		if (port->qsize[i])
> +		if (port->qsize[RTE_SCHED_TRAFFIC_CLASS_BE + i])
>  			dst->n_be_queues++;
> 
>  	if (dst->n_be_queues == 1)
> @@ -620,7 +618,7 @@ rte_sched_port_config_pipe_profile_table(struct
> rte_sched_port *port,
>  	port->pipe_tc3_rate_max = 0;
>  	for (i = 0; i < port->n_pipe_profiles; i++) {
>  		struct rte_sched_pipe_params *src = params->pipe_profiles
> + i;
> -		uint32_t pipe_tc3_rate = src->tc_rate[3];
> +		uint32_t pipe_tc3_rate = src-
> >tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
> 
>  		if (port->pipe_tc3_rate_max < pipe_tc3_rate)
>  			port->pipe_tc3_rate_max = pipe_tc3_rate;
> @@ -762,12 +760,14 @@ rte_sched_port_free(struct rte_sched_port *port)
>  	for (qindex = 0; qindex < n_queues_per_port; qindex++) {
>  		struct rte_mbuf **mbufs = rte_sched_port_qbase(port,
> qindex);
>  		uint16_t qsize = rte_sched_port_qsize(port, qindex);
> -		struct rte_sched_queue *queue = port->queue + qindex;
> -		uint16_t qr = queue->qr & (qsize - 1);
> -		uint16_t qw = queue->qw & (qsize - 1);
> +		if (qsize != 0) {
> +			struct rte_sched_queue *queue = port->queue +
> qindex;
> +			uint16_t qr = queue->qr & (qsize - 1);
> +			uint16_t qw = queue->qw & (qsize - 1);
> 
> -		for (; qr != qw; qr = (qr + 1) & (qsize - 1))
> -			rte_pktmbuf_free(mbufs[qr]);
> +			for (; qr != qw; qr = (qr + 1) & (qsize - 1))
> +				rte_pktmbuf_free(mbufs[qr]);
> +		}
>  	}
> 
>  	rte_bitmap_free(port->bmp);
> @@ -780,9 +780,10 @@ rte_sched_port_log_subport_config(struct
> rte_sched_port *port, uint32_t i)
>  	struct rte_sched_subport *s = port->subport + i;
> 
>  	RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n"
> -		"    Token bucket: period = %u, credits per period = %u, size =
> %u\n"
> -		"    Traffic classes: period = %u, credits per period = [%u, %u,
> %u, %u]\n"
> -		"    Traffic class 3 oversubscription: wm min = %u, wm max =
> %u\n",
> +		"	Token bucket: period = %u, credits per period = %u,
> size = %u\n"
> +		"	Traffic classes: period = %u\n"
> +		"	credits per period = [%u, %u, %u, %u, %u, %u, %u,
> %u, %u, %u, %u, %u, %u]\n"
> +		"	Best effort traffic class oversubscription: wm min =
> %u, wm max = %u\n",
>  		i,
> 
>  		/* Token bucket */
> @@ -796,8 +797,17 @@ rte_sched_port_log_subport_config(struct
> rte_sched_port *port, uint32_t i)
>  		s->tc_credits_per_period[1],
>  		s->tc_credits_per_period[2],
>  		s->tc_credits_per_period[3],
> -
> -		/* Traffic class 3 oversubscription */
> +		s->tc_credits_per_period[4],
> +		s->tc_credits_per_period[5],
> +		s->tc_credits_per_period[6],
> +		s->tc_credits_per_period[7],
> +		s->tc_credits_per_period[8],
> +		s->tc_credits_per_period[9],
> +		s->tc_credits_per_period[10],
> +		s->tc_credits_per_period[11],
> +		s->tc_credits_per_period[12],
> +
> +		/* Best effort traffic class oversubscription */
>  		s->tc_ov_wm_min,
>  		s->tc_ov_wm_max);
>  }
> @@ -808,7 +818,7 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
>  	struct rte_sched_subport_params *params)
>  {
>  	struct rte_sched_subport *s;
> -	uint32_t i;
> +	uint32_t i, j;
> 
>  	/* Check user parameters */
>  	if (port == NULL ||
> @@ -822,12 +832,24 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
>  	if (params->tb_size == 0)
>  		return -3;
> 
> -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -		if (params->tc_rate[i] == 0 ||
> -		    params->tc_rate[i] > params->tb_rate)
> -			return -4;
> +	for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
> +		uint32_t tc_rate = params->tc_rate[j];
> +		uint16_t qsize = port->qsize[i];
> +
> +		if (((qsize == 0) &&
> +			((tc_rate != 0) && (j !=
> RTE_SCHED_TRAFFIC_CLASS_BE))) ||
> +			((qsize != 0) && (tc_rate == 0)) ||
> +			(tc_rate > params->tb_rate))
> +			return -3;
> +
> +		if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
> +			j++;
>  	}
> 
> +	if (port->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
> +		params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0)
> +		return -3;
> +
>  	if (params->tc_period == 0)
>  		return -5;
> 
> @@ -851,13 +873,16 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
>  	/* Traffic Classes (TCs) */
>  	s->tc_period = rte_sched_time_ms_to_bytes(params->tc_period,
> port->rate);
>  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -		s->tc_credits_per_period[i]
> -			= rte_sched_time_ms_to_bytes(params->tc_period,
> -						     params->tc_rate[i]);
> +		if (port->qsize[i])
> +			s->tc_credits_per_period[i]
> +				= rte_sched_time_ms_to_bytes(params-
> >tc_period,
> +								 params-
> >tc_rate[i]);
> +
>  	}
>  	s->tc_time = port->time + s->tc_period;
>  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> -		s->tc_credits[i] = s->tc_credits_per_period[i];
> +		if (port->qsize[i])
> +			s->tc_credits[i] = s->tc_credits_per_period[i];
> 
>  #ifdef RTE_SCHED_SUBPORT_TC_OV
>  	/* TC oversubscription */
> @@ -910,9 +935,9 @@ rte_sched_pipe_config(struct rte_sched_port *port,
>  		params = port->pipe_profiles + p->profile;
> 
>  #ifdef RTE_SCHED_SUBPORT_TC_OV
> -		double subport_tc3_rate = (double) s-
> >tc_credits_per_period[3]
> +		double subport_tc3_rate = (double) s-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) s->tc_period;
> -		double pipe_tc3_rate = (double) params-
> >tc_credits_per_period[3]
> +		double pipe_tc3_rate = (double) params-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) params->tc_period;
>  		uint32_t tc3_ov = s->tc_ov;
> 
> @@ -945,15 +970,19 @@ rte_sched_pipe_config(struct rte_sched_port
> *port,
> 
>  	/* Traffic Classes (TCs) */
>  	p->tc_time = port->time + params->tc_period;
> +	p->n_be_queues = params->n_be_queues;
>  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> -		p->tc_credits[i] = params->tc_credits_per_period[i];
> +		if (port->qsize[i])
> +			p->tc_credits[i] = params->tc_credits_per_period[i];
> 
>  #ifdef RTE_SCHED_SUBPORT_TC_OV
>  	{
>  		/* Subport TC3 oversubscription */
> -		double subport_tc3_rate = (double) s-
> >tc_credits_per_period[3]
> +		double subport_tc3_rate =
> +			(double) s-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) s->tc_period;
> -		double pipe_tc3_rate = (double) params-
> >tc_credits_per_period[3]
> +		double pipe_tc3_rate =
> +			(double) params-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) params->tc_period;
>  		uint32_t tc3_ov = s->tc_ov;
> 
> @@ -992,7 +1021,7 @@ rte_sched_port_pipe_profile_add(struct
> rte_sched_port *port,
>  		return -2;
> 
>  	/* Pipe params */
> -	status = pipe_profile_check(params, port->rate);
> +	status = pipe_profile_check(params, port->rate, &port->qsize[0]);
>  	if (status != 0)
>  		return status;
> 
> @@ -1008,8 +1037,8 @@ rte_sched_port_pipe_profile_add(struct
> rte_sched_port *port,
>  	*pipe_profile_id = port->n_pipe_profiles;
>  	port->n_pipe_profiles++;
> 
> -	if (port->pipe_tc3_rate_max < params->tc_rate[3])
> -		port->pipe_tc3_rate_max = params->tc_rate[3];
> +	if (port->pipe_tc3_rate_max < params-
> >tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
> +		port->pipe_tc3_rate_max = params-
> >tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
> 
>  	rte_sched_port_log_pipe_profile(port, *pipe_profile_id);
> 
> @@ -1020,15 +1049,12 @@ static inline uint32_t
>  rte_sched_port_qindex(struct rte_sched_port *port,
>  	uint32_t subport,
>  	uint32_t pipe,
> -	uint32_t traffic_class,
>  	uint32_t queue)
>  {
>  	return ((subport & (port->n_subports_per_port - 1)) <<
>  			(port->n_pipes_per_subport_log2 + 4)) |
>  			((pipe & (port->n_pipes_per_subport - 1)) << 4) |
> -			((traffic_class &
> -			    (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)) <<
> 2) |
> -			(queue &
> (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1));
> +			(queue & (RTE_SCHED_QUEUES_PER_PIPE - 1));
>  }
> 
>  void
> @@ -1038,8 +1064,8 @@ rte_sched_port_pkt_write(struct rte_sched_port
> *port,
>  			 uint32_t traffic_class,
>  			 uint32_t queue, enum rte_color color)
>  {
> -	uint32_t queue_id = rte_sched_port_qindex(port, subport, pipe,
> -			traffic_class, queue);
> +	uint32_t queue_id = rte_sched_port_qindex(port, subport, pipe,
> queue);
> +
>  	rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
>  }
> 
> @@ -1050,12 +1076,12 @@ rte_sched_port_pkt_read_tree_path(struct
> rte_sched_port *port,
>  				  uint32_t *traffic_class, uint32_t *queue)
>  {
>  	uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
> +	uint32_t tc_id = rte_mbuf_sched_traffic_class_get(pkt);
> 
>  	*subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
>  	*pipe = (queue_id >> 4) & (port->n_pipes_per_subport - 1);
> -	*traffic_class = (queue_id >> 2) &
> -				(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE -
> 1);
> -	*queue = queue_id & (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS -
> 1);
> +	*traffic_class = tc_id;
> +	*queue = queue_id & (RTE_SCHED_QUEUES_PER_PIPE - 1);
>  }
> 
>  enum rte_color
> @@ -1136,7 +1162,7 @@ static inline void
>  rte_sched_port_update_subport_stats(struct rte_sched_port *port,
> uint32_t qindex, struct rte_mbuf *pkt)
>  {
>  	struct rte_sched_subport *s = port->subport + (qindex /
> rte_sched_port_queues_per_subport(port));
> -	uint32_t tc_index = (qindex >> 2) & 0x3;
> +	uint32_t tc_index = rte_mbuf_sched_traffic_class_get(pkt);
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	s->stats.n_pkts_tc[tc_index] += 1;
> @@ -1156,7 +1182,7 @@
> rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
>  #endif
>  {
>  	struct rte_sched_subport *s = port->subport + (qindex /
> rte_sched_port_queues_per_subport(port));
> -	uint32_t tc_index = (qindex >> 2) & 0x3;
> +	uint32_t tc_index = rte_mbuf_sched_traffic_class_get(pkt);
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	s->stats.n_pkts_tc_dropped[tc_index] += 1;
> @@ -1211,7 +1237,7 @@ rte_sched_port_red_drop(struct rte_sched_port
> *port, struct rte_mbuf *pkt, uint3
>  	uint32_t tc_index;
>  	enum rte_color color;
> 
> -	tc_index = (qindex >> 2) & 0x3;
> +	tc_index = rte_mbuf_sched_traffic_class_get(pkt);
>  	color = rte_sched_port_pkt_read_color(pkt);
>  	red_cfg = &port->red_config[tc_index][color];
> 
> @@ -1528,6 +1554,7 @@ grinder_credits_update(struct rte_sched_port
> *port, uint32_t pos)
>  	struct rte_sched_pipe *pipe = grinder->pipe;
>  	struct rte_sched_pipe_profile *params = grinder->pipe_params;
>  	uint64_t n_periods;
> +	uint32_t i;
> 
>  	/* Subport TB */
>  	n_periods = (port->time - subport->tb_time) / subport->tb_period;
> @@ -1543,19 +1570,17 @@ grinder_credits_update(struct rte_sched_port
> *port, uint32_t pos)
> 
>  	/* Subport TCs */
>  	if (unlikely(port->time >= subport->tc_time)) {
> -		subport->tc_credits[0] = subport->tc_credits_per_period[0];
> -		subport->tc_credits[1] = subport->tc_credits_per_period[1];
> -		subport->tc_credits[2] = subport->tc_credits_per_period[2];
> -		subport->tc_credits[3] = subport->tc_credits_per_period[3];
> +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> +			subport->tc_credits[i] = subport-
> >tc_credits_per_period[i];
> +
>  		subport->tc_time = port->time + subport->tc_period;
>  	}
> 
>  	/* Pipe TCs */
>  	if (unlikely(port->time >= pipe->tc_time)) {
> -		pipe->tc_credits[0] = params->tc_credits_per_period[0];
> -		pipe->tc_credits[1] = params->tc_credits_per_period[1];
> -		pipe->tc_credits[2] = params->tc_credits_per_period[2];
> -		pipe->tc_credits[3] = params->tc_credits_per_period[3];
> +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> +			pipe->tc_credits[i] = params-
> >tc_credits_per_period[i];
> +
>  		pipe->tc_time = port->time + params->tc_period;
>  	}
>  }
> @@ -1568,21 +1593,29 @@ grinder_tc_ov_credits_update(struct
> rte_sched_port *port, uint32_t pos)
>  	struct rte_sched_grinder *grinder = port->grinder + pos;
>  	struct rte_sched_subport *subport = grinder->subport;
>  	uint32_t
> tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> -	uint32_t tc_ov_consumption_max;
> +	uint32_t tc_consumption = 0, tc_ov_consumption_max;
>  	uint32_t tc_ov_wm = subport->tc_ov_wm;
> +	uint32_t i;
> 
>  	if (subport->tc_ov == 0)
>  		return subport->tc_ov_wm_max;
> 
> -	tc_ov_consumption[0] = subport->tc_credits_per_period[0] -
> subport->tc_credits[0];
> -	tc_ov_consumption[1] = subport->tc_credits_per_period[1] -
> subport->tc_credits[1];
> -	tc_ov_consumption[2] = subport->tc_credits_per_period[2] -
> subport->tc_credits[2];
> -	tc_ov_consumption[3] = subport->tc_credits_per_period[3] -
> subport->tc_credits[3];
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
> +		tc_ov_consumption[i] =
> +			subport->tc_credits_per_period[i] - subport-
> >tc_credits[i];
> +		tc_consumption += tc_ov_consumption[i];
> +	}
> +
> +	tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
> +		subport-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
> +		subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
> +
> 
> -	tc_ov_consumption_max = subport->tc_credits_per_period[3] -
> -		(tc_ov_consumption[0] + tc_ov_consumption[1] +
> tc_ov_consumption[2]);
> +	tc_ov_consumption_max =
> +		subport-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] - tc_consumption;
> 
> -	if (tc_ov_consumption[3] > (tc_ov_consumption_max - port->mtu))
> {
> +	if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
> +		(tc_ov_consumption_max - port->mtu)) {
>  		tc_ov_wm  -= tc_ov_wm >> 7;
>  		if (tc_ov_wm < subport->tc_ov_wm_min)
>  			tc_ov_wm = subport->tc_ov_wm_min;
> @@ -1605,6 +1638,7 @@ grinder_credits_update(struct rte_sched_port
> *port, uint32_t pos)
>  	struct rte_sched_pipe *pipe = grinder->pipe;
>  	struct rte_sched_pipe_profile *params = grinder->pipe_params;
>  	uint64_t n_periods;
> +	uint32_t i;
> 
>  	/* Subport TB */
>  	n_periods = (port->time - subport->tb_time) / subport->tb_period;
> @@ -1622,10 +1656,8 @@ grinder_credits_update(struct rte_sched_port
> *port, uint32_t pos)
>  	if (unlikely(port->time >= subport->tc_time)) {
>  		subport->tc_ov_wm = grinder_tc_ov_credits_update(port,
> pos);
> 
> -		subport->tc_credits[0] = subport->tc_credits_per_period[0];
> -		subport->tc_credits[1] = subport->tc_credits_per_period[1];
> -		subport->tc_credits[2] = subport->tc_credits_per_period[2];
> -		subport->tc_credits[3] = subport->tc_credits_per_period[3];
> +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> +			subport->tc_credits[i] = subport-
> >tc_credits_per_period[i];
> 
>  		subport->tc_time = port->time + subport->tc_period;
>  		subport->tc_ov_period_id++;
> @@ -1633,10 +1665,8 @@ grinder_credits_update(struct rte_sched_port
> *port, uint32_t pos)
> 
>  	/* Pipe TCs */
>  	if (unlikely(port->time >= pipe->tc_time)) {
> -		pipe->tc_credits[0] = params->tc_credits_per_period[0];
> -		pipe->tc_credits[1] = params->tc_credits_per_period[1];
> -		pipe->tc_credits[2] = params->tc_credits_per_period[2];
> -		pipe->tc_credits[3] = params->tc_credits_per_period[3];
> +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> +			pipe->tc_credits[i] = params-
> >tc_credits_per_period[i];
>  		pipe->tc_time = port->time + params->tc_period;
>  	}
> 
> @@ -1701,11 +1731,18 @@ grinder_credits_check(struct rte_sched_port
> *port, uint32_t pos)
>  	uint32_t subport_tc_credits = subport->tc_credits[tc_index];
>  	uint32_t pipe_tb_credits = pipe->tb_credits;
>  	uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
> -	uint32_t pipe_tc_ov_mask1[] = {UINT32_MAX, UINT32_MAX,
> UINT32_MAX, pipe->tc_ov_credits};
> -	uint32_t pipe_tc_ov_mask2[] = {0, 0, 0, UINT32_MAX};
> -	uint32_t pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
> +	uint32_t
> pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	uint32_t
> pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
> +	uint32_t pipe_tc_ov_credits, i;
>  	int enough_credits;
> 
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> +		pipe_tc_ov_mask1[i] = UINT32_MAX;
> +
> +	pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe-
> >tc_ov_credits;
> +	pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] =
> UINT32_MAX;
> +	pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
> +
>  	/* Check pipe and subport credits */
>  	enough_credits = (pkt_len <= subport_tb_credits) &&
>  		(pkt_len <= subport_tc_credits) &&
> @@ -1860,68 +1897,65 @@ static inline void
>  grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos,
> uint32_t qindex, uint16_t qmask)
>  {
>  	struct rte_sched_grinder *grinder = port->grinder + pos;
> -	uint8_t b[4];
> +	uint8_t b, i;
> 
>  	grinder->tccache_w = 0;
>  	grinder->tccache_r = 0;
> 
> -	b[0] = (uint8_t) (qmask & 0xF);
> -	b[1] = (uint8_t) ((qmask >> 4) & 0xF);
> -	b[2] = (uint8_t) ((qmask >> 8) & 0xF);
> -	b[3] = (uint8_t) ((qmask >> 12) & 0xF);
> -
> -	grinder->tccache_qmask[grinder->tccache_w] = b[0];
> -	grinder->tccache_qindex[grinder->tccache_w] = qindex;
> -	grinder->tccache_w += (b[0] != 0);
> -
> -	grinder->tccache_qmask[grinder->tccache_w] = b[1];
> -	grinder->tccache_qindex[grinder->tccache_w] = qindex + 4;
> -	grinder->tccache_w += (b[1] != 0);
> -
> -	grinder->tccache_qmask[grinder->tccache_w] = b[2];
> -	grinder->tccache_qindex[grinder->tccache_w] = qindex + 8;
> -	grinder->tccache_w += (b[2] != 0);
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
> +		b = (uint8_t) ((qmask >> i) & 0x1);
> +		grinder->tccache_qmask[grinder->tccache_w] = b;
> +		grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
> +		grinder->tccache_w += (b != 0);
> +	}
> 
> -	grinder->tccache_qmask[grinder->tccache_w] = b[3];
> -	grinder->tccache_qindex[grinder->tccache_w] = qindex + 12;
> -	grinder->tccache_w += (b[3] != 0);
> +	b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
> +	grinder->tccache_qmask[grinder->tccache_w] = b;
> +	grinder->tccache_qindex[grinder->tccache_w] = qindex +
> +		RTE_SCHED_TRAFFIC_CLASS_BE;
> +	grinder->tccache_w += (b != 0);
>  }
> 
>  static inline int
>  grinder_next_tc(struct rte_sched_port *port, uint32_t pos)
>  {
>  	struct rte_sched_grinder *grinder = port->grinder + pos;
> +	struct rte_sched_pipe *pipe = grinder->pipe;
>  	struct rte_mbuf **qbase;
> -	uint32_t qindex;
> +	uint32_t qindex, qpos = 0;
>  	uint16_t qsize;
> 
>  	if (grinder->tccache_r == grinder->tccache_w)
>  		return 0;
> 
>  	qindex = grinder->tccache_qindex[grinder->tccache_r];
> +	grinder->tc_index = qindex & 0xf;
>  	qbase = rte_sched_port_qbase(port, qindex);
> -	qsize = rte_sched_port_qsize(port, qindex);
> -
> -	grinder->tc_index = (qindex >> 2) & 0x3;
> -	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
> -	grinder->qsize[grinder->tc_index] = qsize;
> 
> -	grinder->qindex[0] = qindex;
> -	grinder->qindex[1] = qindex + 1;
> -	grinder->qindex[2] = qindex + 2;
> -	grinder->qindex[3] = qindex + 3;
> +	if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
> +		qsize = rte_sched_port_qsize(port, qindex);
> 
> -	grinder->queue[0] = port->queue + qindex;
> -	grinder->queue[1] = port->queue + qindex + 1;
> -	grinder->queue[2] = port->queue + qindex + 2;
> -	grinder->queue[3] = port->queue + qindex + 3;
> +		grinder->queue[qpos] = port->queue + qindex;
> +		grinder->qbase[qpos] = qbase;
> +		grinder->qindex[qpos] = qindex;
> +		grinder->qsize[qpos] = qsize;
> +		grinder->qmask = grinder->tccache_qmask[grinder-
> >tccache_r];
> +		grinder->tccache_r++;
> 
> -	grinder->qbase[0] = qbase;
> -	grinder->qbase[1] = qbase + qsize;
> -	grinder->qbase[2] = qbase + 2 * qsize;
> -	grinder->qbase[3] = qbase + 3 * qsize;
> +		return 1;
> +	}
> 
> +	for ( ; qpos < pipe->n_be_queues; qpos++) {
> +		qsize = rte_sched_port_qsize(port, qindex + qpos);
> +		grinder->queue[qpos] = port->queue + qindex + qpos;
> +		grinder->qbase[qpos] = qbase + qpos * qsize;
> +		grinder->qindex[qpos] = qindex + qpos;
> +		grinder->qsize[qpos] = qsize;
> +	}
> +	grinder->tc_index = RTE_SCHED_TRAFFIC_CLASS_BE;
> +	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
>  	grinder->tccache_r++;
> +
>  	return 1;
>  }
> 
> diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
> index 2a935998a..ae4dfb311 100644
> --- a/lib/librte_sched/rte_sched.h
> +++ b/lib/librte_sched/rte_sched.h
> @@ -83,9 +83,9 @@ extern "C" {
>  #define RTE_SCHED_BE_QUEUES_PER_PIPE    4
> 
>  /** Number of traffic classes per pipe (as well as subport).
> - * Cannot be changed.
>   */
> -#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    4
> +#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    \
> +(RTE_SCHED_QUEUES_PER_PIPE - RTE_SCHED_BE_QUEUES_PER_PIPE + 1)
> 
>  /** Number of queues per pipe traffic class. Cannot be changed. */
>  #define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS    4
> @@ -171,9 +171,7 @@ struct rte_sched_pipe_params {
>  	/**< Traffic class rates (measured in bytes per second) */
>  	uint32_t tc_period;
>  	/**< Enforcement period (measured in milliseconds) */
> -#ifdef RTE_SCHED_SUBPORT_TC_OV
>  	uint8_t tc_ov_weight;		 /**< Weight Traffic class 3
> oversubscription */
> -#endif
> 
>  	/* Pipe queues */
>  	uint8_t  wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; /**<
> WRR weights */
> @@ -206,11 +204,9 @@ struct rte_sched_port_params {
>  					  * (measured in bytes) */
>  	uint32_t n_subports_per_port;    /**< Number of subports */
>  	uint32_t n_pipes_per_subport;    /**< Number of pipes per subport
> */
> -	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	uint16_t qsize[RTE_SCHED_QUEUES_PER_PIPE];

Given that it does not make any sense to have queues of different lengths within the same traffic class, we should probably keep the size of this array as RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE. Internally though, it makes sense to have the port->qsize array of size RTE_SCHED_QUEUES_PER_PIPE with all BE queues of the same size. Also simplifies implementation, as no need to worry about the n_be_queues (see the other email).

>  	/**< Packet queue size for each traffic class.
> -	 * All queues within the same pipe traffic class have the same
> -	 * size. Queues from different pipes serving the same traffic
> -	 * class have the same size. */
> +	 * Queues which are not needed are allowed to have zero size. */
>  	struct rte_sched_pipe_params *pipe_profiles;
>  	/**< Pipe profile table.
>  	 * Every pipe is configured using one of the profiles from this table.
> */
> --
> 2.21.0
  
Jasvinder Singh July 17, 2019, 2:57 p.m. UTC | #3
<snip>
> >
> >  	/* TC rate: non-zero, less than pipe rate */
> > -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > -		if (params->tc_rate[i] == 0 ||
> > -			params->tc_rate[i] > params->tb_rate)
> > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
> > +		if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
> > +			(qsize[i] != 0 && (params->tc_rate[i] == 0 ||
> > +			params->tc_rate[i] > params->tb_rate)))
> >  			return -13;
> > +
> >  	}
> >
> > +	if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0)
> > +		return -13;
> 
> You forgot to check that pipe BE TC rate is not bigger than the pipe rate, which
> is a check that is enforced for the other higher priority traffic classes.

Check introduced in v5.

> > +
> >  	/* TC period: non-zero */
> >  	if (params->tc_period == 0)
> >  		return -14;
> > @@ -302,8 +306,10 @@ pipe_profile_check(struct rte_sched_pipe_params
> > *params,  #endif
> >
> >  	/* Queue WRR weights: non-zero */
> > -	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
> > -		if (params->wrr_weights[i] == 0)
> > +	for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
> > +		uint32_t qindex = RTE_SCHED_TRAFFIC_CLASS_BE + i;
> > +		if ((qsize[qindex] != 0 && params->wrr_weights[i] == 0) ||
> > +			(qsize[qindex] == 0 && params->wrr_weights[i] !=
> > 0))
> >  			return -16;
> >  	}
> >  	return 0;
> > @@ -343,10 +349,10 @@ rte_sched_port_check_params(struct
> > rte_sched_port_params *params)
> >  	/* qsize: non-zero, power of 2,
> >  	 * no bigger than 32K (due to 16-bit read/write pointers)
> >  	 */
> > -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > +	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
> >  		uint16_t qsize = params->qsize[i];
> > -
> > -		if (qsize == 0 || !rte_is_power_of_2(qsize))
> > +		if ((qsize != 0 && !rte_is_power_of_2(qsize)) ||
> > +			((i == RTE_SCHED_TRAFFIC_CLASS_BE) && (qsize ==
> > 0)))
> >  			return -8;
> >  	}
> >
> > @@ -360,7 +366,7 @@ rte_sched_port_check_params(struct
> > rte_sched_port_params *params)
> >  		struct rte_sched_pipe_params *p = params->pipe_profiles + i;
> >  		int status;
> >
> > -		status = pipe_profile_check(p, params->rate);
> > +		status = pipe_profile_check(p, params->rate, &params-
> > >qsize[0]);
> >  		if (status != 0)
> >  			return status;
> >  	}
> > @@ -389,9 +395,9 @@ rte_sched_port_get_array_base(struct
> > rte_sched_port_params *params, enum rte_sch
> >  	uint32_t base, i;
> >
> >  	size_per_pipe_queue_array = 0;
> > -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > -		size_per_pipe_queue_array +=
> > RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS
> > -			* params->qsize[i] * sizeof(struct rte_mbuf *);
> > +	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
> > +		size_per_pipe_queue_array +=
> > +			params->qsize[i] * sizeof(struct rte_mbuf *);
> >  	}
> >  	size_queue_array = n_pipes_per_port * size_per_pipe_queue_array;
> >
> > @@ -451,31 +457,12 @@ rte_sched_port_get_memory_footprint(struct
> > rte_sched_port_params *params)
> >  static void
> >  rte_sched_port_config_qsize(struct rte_sched_port *port)  {
> > -	/* TC 0 */
> > +	uint32_t i;
> >  	port->qsize_add[0] = 0;
> > -	port->qsize_add[1] = port->qsize_add[0] + port->qsize[0];
> > -	port->qsize_add[2] = port->qsize_add[1] + port->qsize[0];
> > -	port->qsize_add[3] = port->qsize_add[2] + port->qsize[0];
> > -
> > -	/* TC 1 */
> > -	port->qsize_add[4] = port->qsize_add[3] + port->qsize[0];
> > -	port->qsize_add[5] = port->qsize_add[4] + port->qsize[1];
> > -	port->qsize_add[6] = port->qsize_add[5] + port->qsize[1];
> > -	port->qsize_add[7] = port->qsize_add[6] + port->qsize[1];
> > -
> > -	/* TC 2 */
> > -	port->qsize_add[8] = port->qsize_add[7] + port->qsize[1];
> > -	port->qsize_add[9] = port->qsize_add[8] + port->qsize[2];
> > -	port->qsize_add[10] = port->qsize_add[9] + port->qsize[2];
> > -	port->qsize_add[11] = port->qsize_add[10] + port->qsize[2];
> > -
> > -	/* TC 3 */
> > -	port->qsize_add[12] = port->qsize_add[11] + port->qsize[2];
> > -	port->qsize_add[13] = port->qsize_add[12] + port->qsize[3];
> > -	port->qsize_add[14] = port->qsize_add[13] + port->qsize[3];
> > -	port->qsize_add[15] = port->qsize_add[14] + port->qsize[3];
> > -
> > -	port->qsize_sum = port->qsize_add[15] + port->qsize[3];
> > +	for (i = 1; i < RTE_SCHED_QUEUES_PER_PIPE; i++)
> > +		port->qsize_add[i] = port->qsize_add[i-1] + port->qsize[i-1];
> > +
> > +	port->qsize_sum = port->qsize_add[15] + port->qsize[15];
> >  }
> >
> >  static void
> > @@ -484,10 +471,11 @@ rte_sched_port_log_pipe_profile(struct
> > rte_sched_port *port, uint32_t i)
> >  	struct rte_sched_pipe_profile *p = port->pipe_profiles + i;
> >
> >  	RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
> > -		"    Token bucket: period = %u, credits per period = %u, size =
> > %u\n"
> > -		"    Traffic classes: period = %u, credits per period = [%u, %u,
> > %u, %u]\n"
> > -		"    Traffic class 3 oversubscription: weight = %hhu\n"
> > -		"    WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
> > +		"	Token bucket: period = %u, credits per period = %u,
> > size = %u\n"
> > +		"	Traffic classes: period = %u,\n"
> > +		"	credits per period = [%u, %u, %u, %u, %u, %u, %u,
> > %u, %u, %u, %u, %u, %u]\n"
> > +		"	Best-effort traffic class oversubscription: weight =
> > %hhu\n"
> > +		"	WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
> >  		i,
> >
> >  		/* Token bucket */
> > @@ -501,8 +489,17 @@ rte_sched_port_log_pipe_profile(struct
> > rte_sched_port *port, uint32_t i)
> >  		p->tc_credits_per_period[1],
> >  		p->tc_credits_per_period[2],
> >  		p->tc_credits_per_period[3],
> > -
> > -		/* Traffic class 3 oversubscription */
> > +		p->tc_credits_per_period[4],
> > +		p->tc_credits_per_period[5],
> > +		p->tc_credits_per_period[6],
> > +		p->tc_credits_per_period[7],
> > +		p->tc_credits_per_period[8],
> > +		p->tc_credits_per_period[9],
> > +		p->tc_credits_per_period[10],
> > +		p->tc_credits_per_period[11],
> > +		p->tc_credits_per_period[12],
> > +
> > +		/* Best-effort traffic class oversubscription */
> >  		p->tc_ov_weight,
> >
> >  		/* WRR */
> > @@ -548,9 +545,10 @@ rte_sched_pipe_profile_convert(struct
> > rte_sched_port *port,
> >  						rate);
> >
> >  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> > -		dst->tc_credits_per_period[i]
> > -			= rte_sched_time_ms_to_bytes(src->tc_period,
> > -				src->tc_rate[i]);
> > +		if (port->qsize[i])
> > +			dst->tc_credits_per_period[i]
> > +				= rte_sched_time_ms_to_bytes(src-
> > >tc_period,
> > +					src->tc_rate[i]);
> >
> >  #ifdef RTE_SCHED_SUBPORT_TC_OV
> 
> The TC_OV logic should be enabled for all the configuration/initialization code;
> the only place it should be conditionally enabled is the run-time code (e.g.
> grinder update/schedule), so no need for this #ifdef here. Same in many other
> places, should be easy to change for a consistent approach.

Enabled the oversubscription part for configuration and initlaisation in v5.

> >  	dst->tc_ov_weight = src->tc_ov_weight; @@ -558,7 +556,7 @@
> > rte_sched_pipe_profile_convert(struct
> > rte_sched_port *port,
> >
> >  	/* WRR queues */
> >  	for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++)
> > -		if (port->qsize[i])
> > +		if (port->qsize[RTE_SCHED_TRAFFIC_CLASS_BE + i])
> >  			dst->n_be_queues++;
> >
> >  	if (dst->n_be_queues == 1)
> > @@ -620,7 +618,7 @@ rte_sched_port_config_pipe_profile_table(struct
> > rte_sched_port *port,
> >  	port->pipe_tc3_rate_max = 0;
> >  	for (i = 0; i < port->n_pipe_profiles; i++) {
> >  		struct rte_sched_pipe_params *src = params->pipe_profiles
> > + i;
> > -		uint32_t pipe_tc3_rate = src->tc_rate[3];
> > +		uint32_t pipe_tc3_rate = src-
> > >tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
> >
> >  		if (port->pipe_tc3_rate_max < pipe_tc3_rate)
> >  			port->pipe_tc3_rate_max = pipe_tc3_rate; @@ -
> 762,12 +760,14 @@
> > rte_sched_port_free(struct rte_sched_port *port)
> >  	for (qindex = 0; qindex < n_queues_per_port; qindex++) {
> >  		struct rte_mbuf **mbufs = rte_sched_port_qbase(port,
> qindex);
> >  		uint16_t qsize = rte_sched_port_qsize(port, qindex);
> > -		struct rte_sched_queue *queue = port->queue + qindex;
> > -		uint16_t qr = queue->qr & (qsize - 1);
> > -		uint16_t qw = queue->qw & (qsize - 1);
> > +		if (qsize != 0) {
> > +			struct rte_sched_queue *queue = port->queue +
> > qindex;
> > +			uint16_t qr = queue->qr & (qsize - 1);
> > +			uint16_t qw = queue->qw & (qsize - 1);
> >
> > -		for (; qr != qw; qr = (qr + 1) & (qsize - 1))
> > -			rte_pktmbuf_free(mbufs[qr]);
> > +			for (; qr != qw; qr = (qr + 1) & (qsize - 1))
> > +				rte_pktmbuf_free(mbufs[qr]);
> > +		}
> >  	}
> >
> >  	rte_bitmap_free(port->bmp);
> > @@ -780,9 +780,10 @@ rte_sched_port_log_subport_config(struct
> > rte_sched_port *port, uint32_t i)
> >  	struct rte_sched_subport *s = port->subport + i;
> >
> >  	RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n"
> > -		"    Token bucket: period = %u, credits per period = %u, size =
> > %u\n"
> > -		"    Traffic classes: period = %u, credits per period = [%u, %u,
> > %u, %u]\n"
> > -		"    Traffic class 3 oversubscription: wm min = %u, wm max =
> > %u\n",
> > +		"	Token bucket: period = %u, credits per period = %u,
> > size = %u\n"
> > +		"	Traffic classes: period = %u\n"
> > +		"	credits per period = [%u, %u, %u, %u, %u, %u, %u,
> > %u, %u, %u, %u, %u, %u]\n"
> > +		"	Best effort traffic class oversubscription: wm min =
> > %u, wm max = %u\n",
> >  		i,
> >
> >  		/* Token bucket */
> > @@ -796,8 +797,17 @@ rte_sched_port_log_subport_config(struct
> > rte_sched_port *port, uint32_t i)
> >  		s->tc_credits_per_period[1],
> >  		s->tc_credits_per_period[2],
> >  		s->tc_credits_per_period[3],
> > -
> > -		/* Traffic class 3 oversubscription */
> > +		s->tc_credits_per_period[4],
> > +		s->tc_credits_per_period[5],
> > +		s->tc_credits_per_period[6],
> > +		s->tc_credits_per_period[7],
> > +		s->tc_credits_per_period[8],
> > +		s->tc_credits_per_period[9],
> > +		s->tc_credits_per_period[10],
> > +		s->tc_credits_per_period[11],
> > +		s->tc_credits_per_period[12],
> > +
> > +		/* Best effort traffic class oversubscription */
> >  		s->tc_ov_wm_min,
> >  		s->tc_ov_wm_max);
> >  }
> > @@ -808,7 +818,7 @@ rte_sched_subport_config(struct rte_sched_port
> > *port,
> >  	struct rte_sched_subport_params *params)  {
> >  	struct rte_sched_subport *s;
> > -	uint32_t i;
> > +	uint32_t i, j;
> >
> >  	/* Check user parameters */
> >  	if (port == NULL ||
> > @@ -822,12 +832,24 @@ rte_sched_subport_config(struct rte_sched_port
> > *port,
> >  	if (params->tb_size == 0)
> >  		return -3;
> >
> > -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > -		if (params->tc_rate[i] == 0 ||
> > -		    params->tc_rate[i] > params->tb_rate)
> > -			return -4;
> > +	for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
> > +		uint32_t tc_rate = params->tc_rate[j];
> > +		uint16_t qsize = port->qsize[i];
> > +
> > +		if (((qsize == 0) &&
> > +			((tc_rate != 0) && (j !=
> > RTE_SCHED_TRAFFIC_CLASS_BE))) ||
> > +			((qsize != 0) && (tc_rate == 0)) ||
> > +			(tc_rate > params->tb_rate))
> > +			return -3;
> > +
> > +		if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
> > +			j++;
> >  	}
> >
> > +	if (port->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
> > +		params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0)
> > +		return -3;
> > +
> >  	if (params->tc_period == 0)
> >  		return -5;
> >
> > @@ -851,13 +873,16 @@ rte_sched_subport_config(struct rte_sched_port
> > *port,
> >  	/* Traffic Classes (TCs) */
> >  	s->tc_period = rte_sched_time_ms_to_bytes(params->tc_period,
> > port->rate);
> >  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > -		s->tc_credits_per_period[i]
> > -			= rte_sched_time_ms_to_bytes(params->tc_period,
> > -						     params->tc_rate[i]);
> > +		if (port->qsize[i])
> > +			s->tc_credits_per_period[i]
> > +				= rte_sched_time_ms_to_bytes(params-
> > >tc_period,
> > +								 params-
> > >tc_rate[i]);
> > +
> >  	}
> >  	s->tc_time = port->time + s->tc_period;
> >  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> > -		s->tc_credits[i] = s->tc_credits_per_period[i];
> > +		if (port->qsize[i])
> > +			s->tc_credits[i] = s->tc_credits_per_period[i];
> >
> >  #ifdef RTE_SCHED_SUBPORT_TC_OV
> >  	/* TC oversubscription */
> > @@ -910,9 +935,9 @@ rte_sched_pipe_config(struct rte_sched_port *port,
> >  		params = port->pipe_profiles + p->profile;
> >
> >  #ifdef RTE_SCHED_SUBPORT_TC_OV
> > -		double subport_tc3_rate = (double) s-
> > >tc_credits_per_period[3]
> > +		double subport_tc3_rate = (double) s-
> > >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
> >  			/ (double) s->tc_period;
> > -		double pipe_tc3_rate = (double) params-
> > >tc_credits_per_period[3]
> > +		double pipe_tc3_rate = (double) params-
> > >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
> >  			/ (double) params->tc_period;
> >  		uint32_t tc3_ov = s->tc_ov;
> >
> > @@ -945,15 +970,19 @@ rte_sched_pipe_config(struct rte_sched_port
> > *port,
> >
> >  	/* Traffic Classes (TCs) */
> >  	p->tc_time = port->time + params->tc_period;
> > +	p->n_be_queues = params->n_be_queues;
> >  	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> > -		p->tc_credits[i] = params->tc_credits_per_period[i];
> > +		if (port->qsize[i])
> > +			p->tc_credits[i] = params->tc_credits_per_period[i];
> >
> >  #ifdef RTE_SCHED_SUBPORT_TC_OV
> >  	{
> >  		/* Subport TC3 oversubscription */
> > -		double subport_tc3_rate = (double) s-
> > >tc_credits_per_period[3]
> > +		double subport_tc3_rate =
> > +			(double) s-
> > >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
> >  			/ (double) s->tc_period;
> > -		double pipe_tc3_rate = (double) params-
> > >tc_credits_per_period[3]
> > +		double pipe_tc3_rate =
> > +			(double) params-
> > >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
> >  			/ (double) params->tc_period;
> >  		uint32_t tc3_ov = s->tc_ov;
> >
> > @@ -992,7 +1021,7 @@ rte_sched_port_pipe_profile_add(struct
> > rte_sched_port *port,
> >  		return -2;
> >
> >  	/* Pipe params */
> > -	status = pipe_profile_check(params, port->rate);
> > +	status = pipe_profile_check(params, port->rate, &port->qsize[0]);
> >  	if (status != 0)
> >  		return status;
> >
> > @@ -1008,8 +1037,8 @@ rte_sched_port_pipe_profile_add(struct
> > rte_sched_port *port,
> >  	*pipe_profile_id = port->n_pipe_profiles;
> >  	port->n_pipe_profiles++;
> >
> > -	if (port->pipe_tc3_rate_max < params->tc_rate[3])
> > -		port->pipe_tc3_rate_max = params->tc_rate[3];
> > +	if (port->pipe_tc3_rate_max < params-
> > >tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
> > +		port->pipe_tc3_rate_max = params-
> > >tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
> >
> >  	rte_sched_port_log_pipe_profile(port, *pipe_profile_id);
> >
> > @@ -1020,15 +1049,12 @@ static inline uint32_t
> > rte_sched_port_qindex(struct rte_sched_port *port,
> >  	uint32_t subport,
> >  	uint32_t pipe,
> > -	uint32_t traffic_class,
> >  	uint32_t queue)
> >  {
> >  	return ((subport & (port->n_subports_per_port - 1)) <<
> >  			(port->n_pipes_per_subport_log2 + 4)) |
> >  			((pipe & (port->n_pipes_per_subport - 1)) << 4) |
> > -			((traffic_class &
> > -			    (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)) <<
> > 2) |
> > -			(queue &
> > (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1));
> > +			(queue & (RTE_SCHED_QUEUES_PER_PIPE - 1));
> >  }
> >
> >  void
> > @@ -1038,8 +1064,8 @@ rte_sched_port_pkt_write(struct rte_sched_port
> > *port,
> >  			 uint32_t traffic_class,
> >  			 uint32_t queue, enum rte_color color)  {
> > -	uint32_t queue_id = rte_sched_port_qindex(port, subport, pipe,
> > -			traffic_class, queue);
> > +	uint32_t queue_id = rte_sched_port_qindex(port, subport, pipe,
> > queue);
> > +
> >  	rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);  }
> 
> Ouch, this is a bug! The queue parameter of public function
> rte_sched_port_pkt_write() is not the "queue within the pipe" (0 .. 15), it is
> actually the "queue within the traffic class" (0 for high priority TCs, 0 .. 3 for BE
> TC), as explained in the API Doxygen description.
> 
> You removed the traffic class parameter in the above rte_sched_port_qindex,
> we should put it back and keep the initial meaning of the queue parameter for
> the _qindex function, which is internal function.
> 
> The way to quickly fix this is simply having a per-port pre-computed small table
> of 16 entries indexed by the traffic class ID and provides the ID of the first pipe
> queue of that traffic class; then you simply add "queue within traffic class"  ID
> on top of this to get the "queue within pipe", which provides the lower bits of
> the "queue within port" that this function is looking to compute. Makes sense?
> 
> It would be good to do some more tests to trace the packet through different
> pipe traffic classes and queues.
> 

Thanks for the proposal. I have made changes as per above in the v5. Did some tests to verify the function, looks good.


> > @@ -1050,12 +1076,12 @@ rte_sched_port_pkt_read_tree_path(struct
> > rte_sched_port *port,
> >  				  uint32_t *traffic_class, uint32_t *queue)  {
> >  	uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
> > +	uint32_t tc_id = rte_mbuf_sched_traffic_class_get(pkt);
> 
> I am afraid this is also incorrect, as although the mbuf->sched.traffic_class
> should be in sync with the traffic class built into the librte_sched hierarchy (and
> mbuf->sched.queue_id), there is no guarantee that:
> 1. The application is using and populating the mbuf->sched.traffic_class field 2.
> The application is assigning the same meaning or equivalence levels between
> the mbuf->sched.traffic_class and the librte_sched hierarchy traffic class.
> 
> We need to obtain the traffic class ID from queue ID, please. Thank you for
> your understanding!
> 
> There is also a quick way to do this as well through a similar small
> precomputed table: input = "queue within pipe" ID, outputs are traffic class ID
> and "queue within traffic class" ID. Makes sense?

Yes, thanks for proposal.

 
> We need to use this strategy for all the places in this file, so in this file we only
> use mbuf->sched.queue and not use mbuf->sched.traffic_class (nor its accessor
> function rte_mbuf_sched_traffic_class_get()) at all. This accessor function
> shows up 4 times currently.
> 

Changes are done in v5.


> >  	*subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
> >  	*pipe = (queue_id >> 4) & (port->n_pipes_per_subport - 1);
> > -	*traffic_class = (queue_id >> 2) &
> > -				(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE -
> > 1);
> > -	*queue = queue_id & (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS -
> > 1);
> > +	*traffic_class = tc_id;
> > +	*queue = queue_id & (RTE_SCHED_QUEUES_PER_PIPE - 1);
> >  }
> >
> >  enum rte_color
> > @@ -1136,7 +1162,7 @@ static inline void
> > rte_sched_port_update_subport_stats(struct rte_sched_port *port,
> > uint32_t qindex, struct rte_mbuf *pkt)  {
> >  	struct rte_sched_subport *s = port->subport + (qindex /
> > rte_sched_port_queues_per_subport(port));
> > -	uint32_t tc_index = (qindex >> 2) & 0x3;
> > +	uint32_t tc_index = rte_mbuf_sched_traffic_class_get(pkt);
> 
> See above comment.

fixed in v5.

> >  	uint32_t pkt_len = pkt->pkt_len;
> >
> >  	s->stats.n_pkts_tc[tc_index] += 1;
> > @@ -1156,7 +1182,7 @@
> > rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> > *port,  #endif  {
> >  	struct rte_sched_subport *s = port->subport + (qindex /
> > rte_sched_port_queues_per_subport(port));
> > -	uint32_t tc_index = (qindex >> 2) & 0x3;
> > +	uint32_t tc_index = rte_mbuf_sched_traffic_class_get(pkt);
> 
> See above comment.

Fixed in v5.
 
> >  	uint32_t pkt_len = pkt->pkt_len;
> >
> >  	s->stats.n_pkts_tc_dropped[tc_index] += 1; @@ -1211,7 +1237,7 @@
> > rte_sched_port_red_drop(struct rte_sched_port *port, struct rte_mbuf
> > *pkt, uint3
> >  	uint32_t tc_index;
> >  	enum rte_color color;
> >
> > -	tc_index = (qindex >> 2) & 0x3;
> > +	tc_index = rte_mbuf_sched_traffic_class_get(pkt);
> >  	color = rte_sched_port_pkt_read_color(pkt);
> >  	red_cfg = &port->red_config[tc_index][color];
> >
> > @@ -1528,6 +1554,7 @@ grinder_credits_update(struct rte_sched_port
> > *port, uint32_t pos)
> >  	struct rte_sched_pipe *pipe = grinder->pipe;
> >  	struct rte_sched_pipe_profile *params = grinder->pipe_params;
> >  	uint64_t n_periods;
> > +	uint32_t i;
> >
> >  	/* Subport TB */
> >  	n_periods = (port->time - subport->tb_time) / subport->tb_period; @@
> > -1543,19 +1570,17 @@ grinder_credits_update(struct rte_sched_port
> > *port, uint32_t pos)
> >
> >  	/* Subport TCs */
> >  	if (unlikely(port->time >= subport->tc_time)) {
> > -		subport->tc_credits[0] = subport->tc_credits_per_period[0];
> > -		subport->tc_credits[1] = subport->tc_credits_per_period[1];
> > -		subport->tc_credits[2] = subport->tc_credits_per_period[2];
> > -		subport->tc_credits[3] = subport->tc_credits_per_period[3];
> > +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> > +			subport->tc_credits[i] = subport-
> > >tc_credits_per_period[i];
> > +
> >  		subport->tc_time = port->time + subport->tc_period;
> >  	}
> >
> >  	/* Pipe TCs */
> >  	if (unlikely(port->time >= pipe->tc_time)) {
> > -		pipe->tc_credits[0] = params->tc_credits_per_period[0];
> > -		pipe->tc_credits[1] = params->tc_credits_per_period[1];
> > -		pipe->tc_credits[2] = params->tc_credits_per_period[2];
> > -		pipe->tc_credits[3] = params->tc_credits_per_period[3];
> > +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> > +			pipe->tc_credits[i] = params-
> > >tc_credits_per_period[i];
> > +
> >  		pipe->tc_time = port->time + params->tc_period;
> >  	}
> >  }
> > @@ -1568,21 +1593,29 @@ grinder_tc_ov_credits_update(struct
> > rte_sched_port *port, uint32_t pos)
> >  	struct rte_sched_grinder *grinder = port->grinder + pos;
> >  	struct rte_sched_subport *subport = grinder->subport;
> >  	uint32_t
> > tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> > -	uint32_t tc_ov_consumption_max;
> > +	uint32_t tc_consumption = 0, tc_ov_consumption_max;
> >  	uint32_t tc_ov_wm = subport->tc_ov_wm;
> > +	uint32_t i;
> >
> >  	if (subport->tc_ov == 0)
> >  		return subport->tc_ov_wm_max;
> >
> > -	tc_ov_consumption[0] = subport->tc_credits_per_period[0] -
> > subport->tc_credits[0];
> > -	tc_ov_consumption[1] = subport->tc_credits_per_period[1] -
> > subport->tc_credits[1];
> > -	tc_ov_consumption[2] = subport->tc_credits_per_period[2] -
> > subport->tc_credits[2];
> > -	tc_ov_consumption[3] = subport->tc_credits_per_period[3] -
> > subport->tc_credits[3];
> > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
> > +		tc_ov_consumption[i] =
> > +			subport->tc_credits_per_period[i] - subport-
> > >tc_credits[i];
> > +		tc_consumption += tc_ov_consumption[i];
> > +	}
> > +
> > +	tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
> > +		subport-
> > >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
> > +		subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
> > +
> >
> > -	tc_ov_consumption_max = subport->tc_credits_per_period[3] -
> > -		(tc_ov_consumption[0] + tc_ov_consumption[1] +
> > tc_ov_consumption[2]);
> > +	tc_ov_consumption_max =
> > +		subport-
> > >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] - tc_consumption;
> >
> > -	if (tc_ov_consumption[3] > (tc_ov_consumption_max - port->mtu))
> > {
> > +	if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
> > +		(tc_ov_consumption_max - port->mtu)) {
> >  		tc_ov_wm  -= tc_ov_wm >> 7;
> >  		if (tc_ov_wm < subport->tc_ov_wm_min)
> >  			tc_ov_wm = subport->tc_ov_wm_min;
> > @@ -1605,6 +1638,7 @@ grinder_credits_update(struct rte_sched_port
> > *port, uint32_t pos)
> >  	struct rte_sched_pipe *pipe = grinder->pipe;
> >  	struct rte_sched_pipe_profile *params = grinder->pipe_params;
> >  	uint64_t n_periods;
> > +	uint32_t i;
> >
> >  	/* Subport TB */
> >  	n_periods = (port->time - subport->tb_time) / subport->tb_period; @@
> > -1622,10 +1656,8 @@ grinder_credits_update(struct rte_sched_port
> > *port, uint32_t pos)
> >  	if (unlikely(port->time >= subport->tc_time)) {
> >  		subport->tc_ov_wm = grinder_tc_ov_credits_update(port,
> > pos);
> >
> > -		subport->tc_credits[0] = subport->tc_credits_per_period[0];
> > -		subport->tc_credits[1] = subport->tc_credits_per_period[1];
> > -		subport->tc_credits[2] = subport->tc_credits_per_period[2];
> > -		subport->tc_credits[3] = subport->tc_credits_per_period[3];
> > +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> > +			subport->tc_credits[i] = subport-
> > >tc_credits_per_period[i];
> >
> >  		subport->tc_time = port->time + subport->tc_period;
> >  		subport->tc_ov_period_id++;
> > @@ -1633,10 +1665,8 @@ grinder_credits_update(struct rte_sched_port
> > *port, uint32_t pos)
> >
> >  	/* Pipe TCs */
> >  	if (unlikely(port->time >= pipe->tc_time)) {
> > -		pipe->tc_credits[0] = params->tc_credits_per_period[0];
> > -		pipe->tc_credits[1] = params->tc_credits_per_period[1];
> > -		pipe->tc_credits[2] = params->tc_credits_per_period[2];
> > -		pipe->tc_credits[3] = params->tc_credits_per_period[3];
> > +		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
> > +			pipe->tc_credits[i] = params-
> > >tc_credits_per_period[i];
> >  		pipe->tc_time = port->time + params->tc_period;
> >  	}
> >
> > @@ -1701,11 +1731,18 @@ grinder_credits_check(struct rte_sched_port
> > *port, uint32_t pos)
> >  	uint32_t subport_tc_credits = subport->tc_credits[tc_index];
> >  	uint32_t pipe_tb_credits = pipe->tb_credits;
> >  	uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
> > -	uint32_t pipe_tc_ov_mask1[] = {UINT32_MAX, UINT32_MAX,
> > UINT32_MAX, pipe->tc_ov_credits};
> > -	uint32_t pipe_tc_ov_mask2[] = {0, 0, 0, UINT32_MAX};
> > -	uint32_t pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
> > +	uint32_t
> > pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> > +	uint32_t
> > pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
> > +	uint32_t pipe_tc_ov_credits, i;
> >  	int enough_credits;
> >
> > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> > +		pipe_tc_ov_mask1[i] = UINT32_MAX;
> > +
> > +	pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe-
> > >tc_ov_credits;
> > +	pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] =
> > UINT32_MAX;
> > +	pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
> > +
> >  	/* Check pipe and subport credits */
> >  	enough_credits = (pkt_len <= subport_tb_credits) &&
> >  		(pkt_len <= subport_tc_credits) &&
> > @@ -1860,68 +1897,65 @@ static inline void
> > grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos,
> > uint32_t qindex, uint16_t qmask)  {
> >  	struct rte_sched_grinder *grinder = port->grinder + pos;
> > -	uint8_t b[4];
> > +	uint8_t b, i;
> >
> >  	grinder->tccache_w = 0;
> >  	grinder->tccache_r = 0;
> >
> > -	b[0] = (uint8_t) (qmask & 0xF);
> > -	b[1] = (uint8_t) ((qmask >> 4) & 0xF);
> > -	b[2] = (uint8_t) ((qmask >> 8) & 0xF);
> > -	b[3] = (uint8_t) ((qmask >> 12) & 0xF);
> > -
> > -	grinder->tccache_qmask[grinder->tccache_w] = b[0];
> > -	grinder->tccache_qindex[grinder->tccache_w] = qindex;
> > -	grinder->tccache_w += (b[0] != 0);
> > -
> > -	grinder->tccache_qmask[grinder->tccache_w] = b[1];
> > -	grinder->tccache_qindex[grinder->tccache_w] = qindex + 4;
> > -	grinder->tccache_w += (b[1] != 0);
> > -
> > -	grinder->tccache_qmask[grinder->tccache_w] = b[2];
> > -	grinder->tccache_qindex[grinder->tccache_w] = qindex + 8;
> > -	grinder->tccache_w += (b[2] != 0);
> > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
> > +		b = (uint8_t) ((qmask >> i) & 0x1);
> > +		grinder->tccache_qmask[grinder->tccache_w] = b;
> > +		grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
> > +		grinder->tccache_w += (b != 0);
> > +	}
> >
> > -	grinder->tccache_qmask[grinder->tccache_w] = b[3];
> > -	grinder->tccache_qindex[grinder->tccache_w] = qindex + 12;
> > -	grinder->tccache_w += (b[3] != 0);
> > +	b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
> > +	grinder->tccache_qmask[grinder->tccache_w] = b;
> > +	grinder->tccache_qindex[grinder->tccache_w] = qindex +
> > +		RTE_SCHED_TRAFFIC_CLASS_BE;
> > +	grinder->tccache_w += (b != 0);
> >  }
> >
> >  static inline int
> >  grinder_next_tc(struct rte_sched_port *port, uint32_t pos)  {
> >  	struct rte_sched_grinder *grinder = port->grinder + pos;
> > +	struct rte_sched_pipe *pipe = grinder->pipe;
> >  	struct rte_mbuf **qbase;
> > -	uint32_t qindex;
> > +	uint32_t qindex, qpos = 0;
> >  	uint16_t qsize;
> >
> >  	if (grinder->tccache_r == grinder->tccache_w)
> >  		return 0;
> >
> >  	qindex = grinder->tccache_qindex[grinder->tccache_r];
> > +	grinder->tc_index = qindex & 0xf;
> >  	qbase = rte_sched_port_qbase(port, qindex);
> > -	qsize = rte_sched_port_qsize(port, qindex);
> > -
> > -	grinder->tc_index = (qindex >> 2) & 0x3;
> > -	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
> > -	grinder->qsize[grinder->tc_index] = qsize;
> >
> > -	grinder->qindex[0] = qindex;
> > -	grinder->qindex[1] = qindex + 1;
> > -	grinder->qindex[2] = qindex + 2;
> > -	grinder->qindex[3] = qindex + 3;
> > +	if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
> > +		qsize = rte_sched_port_qsize(port, qindex);
> >
> > -	grinder->queue[0] = port->queue + qindex;
> > -	grinder->queue[1] = port->queue + qindex + 1;
> > -	grinder->queue[2] = port->queue + qindex + 2;
> > -	grinder->queue[3] = port->queue + qindex + 3;
> > +		grinder->queue[qpos] = port->queue + qindex;
> > +		grinder->qbase[qpos] = qbase;
> > +		grinder->qindex[qpos] = qindex;
> > +		grinder->qsize[qpos] = qsize;
> > +		grinder->qmask = grinder->tccache_qmask[grinder-
> > >tccache_r];
> > +		grinder->tccache_r++;
> >
> > -	grinder->qbase[0] = qbase;
> > -	grinder->qbase[1] = qbase + qsize;
> > -	grinder->qbase[2] = qbase + 2 * qsize;
> > -	grinder->qbase[3] = qbase + 3 * qsize;
> > +		return 1;
> > +	}
> >
> > +	for ( ; qpos < pipe->n_be_queues; qpos++) {
> > +		qsize = rte_sched_port_qsize(port, qindex + qpos);
> > +		grinder->queue[qpos] = port->queue + qindex + qpos;
> > +		grinder->qbase[qpos] = qbase + qpos * qsize;
> > +		grinder->qindex[qpos] = qindex + qpos;
> > +		grinder->qsize[qpos] = qsize;
> > +	}
> > +	grinder->tc_index = RTE_SCHED_TRAFFIC_CLASS_BE;
> > +	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
> >  	grinder->tccache_r++;
> > +
> >  	return 1;
> >  }
> >
> > diff --git a/lib/librte_sched/rte_sched.h
> > b/lib/librte_sched/rte_sched.h index 2a935998a..ae4dfb311 100644
> > --- a/lib/librte_sched/rte_sched.h
> > +++ b/lib/librte_sched/rte_sched.h
> > @@ -83,9 +83,9 @@ extern "C" {
> >  #define RTE_SCHED_BE_QUEUES_PER_PIPE    4
> >
> >  /** Number of traffic classes per pipe (as well as subport).
> > - * Cannot be changed.
> >   */
> > -#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    4
> > +#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    \
> > +(RTE_SCHED_QUEUES_PER_PIPE - RTE_SCHED_BE_QUEUES_PER_PIPE + 1)
> >
> >  /** Number of queues per pipe traffic class. Cannot be changed. */
> >  #define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS    4
> > @@ -171,9 +171,7 @@ struct rte_sched_pipe_params {
> >  	/**< Traffic class rates (measured in bytes per second) */
> >  	uint32_t tc_period;
> >  	/**< Enforcement period (measured in milliseconds) */ -#ifdef
> > RTE_SCHED_SUBPORT_TC_OV
> >  	uint8_t tc_ov_weight;		 /**< Weight Traffic class 3
> > oversubscription */
> > -#endif
> >
> >  	/* Pipe queues */
> >  	uint8_t  wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; /**< WRR
> weights
> > */ @@ -206,11 +204,9 @@ struct rte_sched_port_params {
> >  					  * (measured in bytes) */
> >  	uint32_t n_subports_per_port;    /**< Number of subports */
> >  	uint32_t n_pipes_per_subport;    /**< Number of pipes per subport
> > */
> > -	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> > +	uint16_t qsize[RTE_SCHED_QUEUES_PER_PIPE];
> >  	/**< Packet queue size for each traffic class.
> > -	 * All queues within the same pipe traffic class have the same
> > -	 * size. Queues from different pipes serving the same traffic
> > -	 * class have the same size. */
> > +	 * Queues which are not needed are allowed to have zero size. */
> >  	struct rte_sched_pipe_params *pipe_profiles;
> >  	/**< Pipe profile table.
> >  	 * Every pipe is configured using one of the profiles from this table.
> > */
> > --
> > 2.21.0
  
Jasvinder Singh July 17, 2019, 3:03 p.m. UTC | #4
<snip>

> >
> >  /** Number of traffic classes per pipe (as well as subport).
> > - * Cannot be changed.
> >   */
> > -#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    4
> > +#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    \
> > +(RTE_SCHED_QUEUES_PER_PIPE - RTE_SCHED_BE_QUEUES_PER_PIPE + 1)
> >
> >  /** Number of queues per pipe traffic class. Cannot be changed. */
> >  #define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS    4
> > @@ -171,9 +171,7 @@ struct rte_sched_pipe_params {
> >  	/**< Traffic class rates (measured in bytes per second) */
> >  	uint32_t tc_period;
> >  	/**< Enforcement period (measured in milliseconds) */ -#ifdef
> > RTE_SCHED_SUBPORT_TC_OV
> >  	uint8_t tc_ov_weight;		 /**< Weight Traffic class 3
> > oversubscription */
> > -#endif
> >
> >  	/* Pipe queues */
> >  	uint8_t  wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; /**< WRR
> weights
> > */ @@ -206,11 +204,9 @@ struct rte_sched_port_params {
> >  					  * (measured in bytes) */
> >  	uint32_t n_subports_per_port;    /**< Number of subports */
> >  	uint32_t n_pipes_per_subport;    /**< Number of pipes per subport
> > */
> > -	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> > +	uint16_t qsize[RTE_SCHED_QUEUES_PER_PIPE];
> 
> Given that it does not make any sense to have queues of different lengths
> within the same traffic class, we should probably keep the size of this array as
> RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE. Internally though, it makes sense to
> have the port->qsize array of size RTE_SCHED_QUEUES_PER_PIPE with all BE
> queues of the same size. Also simplifies implementation, as no need to worry
> about the n_be_queues (see the other email).


array size is reversed to RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE in v5, thanks.


> >  	/**< Packet queue size for each traffic class.
> > -	 * All queues within the same pipe traffic class have the same
> > -	 * size. Queues from different pipes serving the same traffic
> > -	 * class have the same size. */
> > +	 * Queues which are not needed are allowed to have zero size. */
> >  	struct rte_sched_pipe_params *pipe_profiles;
> >  	/**< Pipe profile table.
> >  	 * Every pipe is configured using one of the profiles from this table.
> > */
> > --
> > 2.21.0
  

Patch

diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index b1f521794..411924c36 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -149,15 +149,15 @@  struct rte_sched_grinder {
 	struct rte_sched_pipe_profile *pipe_params;
 
 	/* TC cache */
-	uint8_t tccache_qmask[4];
-	uint32_t tccache_qindex[4];
+	uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 	uint32_t tccache_w;
 	uint32_t tccache_r;
 
 	/* Current TC */
 	uint32_t tc_index;
-	struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-	struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	struct rte_sched_queue *queue[RTE_SCHED_MAX_QUEUES_PER_TC];
+	struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC];
 	uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
 	uint16_t qsize[RTE_SCHED_MAX_QUEUES_PER_TC];
 	uint32_t qmask;
@@ -178,7 +178,7 @@  struct rte_sched_port {
 	uint32_t rate;
 	uint32_t mtu;
 	uint32_t frame_overhead;
-	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint16_t qsize[RTE_SCHED_QUEUES_PER_PIPE];
 	uint32_t n_pipe_profiles;
 	uint32_t pipe_tc3_rate_max;
 #ifdef RTE_SCHED_RED
@@ -260,14 +260,13 @@  rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex)
 static inline uint16_t
 rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex)
 {
-	uint32_t tc = (qindex >> 2) & 0x3;
-
-	return port->qsize[tc];
+	uint32_t qpos = qindex & 0xF;
+	return port->qsize[qpos];
 }
 
 static int
 pipe_profile_check(struct rte_sched_pipe_params *params,
-	uint32_t rate)
+	uint32_t rate, uint16_t *qsize)
 {
 	uint32_t i;
 
@@ -285,12 +284,17 @@  pipe_profile_check(struct rte_sched_pipe_params *params,
 		return -12;
 
 	/* TC rate: non-zero, less than pipe rate */
-	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-		if (params->tc_rate[i] == 0 ||
-			params->tc_rate[i] > params->tb_rate)
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
+		if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
+			(qsize[i] != 0 && (params->tc_rate[i] == 0 ||
+			params->tc_rate[i] > params->tb_rate)))
 			return -13;
+
 	}
 
+	if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0)
+		return -13;
+
 	/* TC period: non-zero */
 	if (params->tc_period == 0)
 		return -14;
@@ -302,8 +306,10 @@  pipe_profile_check(struct rte_sched_pipe_params *params,
 #endif
 
 	/* Queue WRR weights: non-zero */
-	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
-		if (params->wrr_weights[i] == 0)
+	for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
+		uint32_t qindex = RTE_SCHED_TRAFFIC_CLASS_BE + i;
+		if ((qsize[qindex] != 0 && params->wrr_weights[i] == 0) ||
+			(qsize[qindex] == 0 && params->wrr_weights[i] != 0))
 			return -16;
 	}
 	return 0;
@@ -343,10 +349,10 @@  rte_sched_port_check_params(struct rte_sched_port_params *params)
 	/* qsize: non-zero, power of 2,
 	 * no bigger than 32K (due to 16-bit read/write pointers)
 	 */
-	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
 		uint16_t qsize = params->qsize[i];
-
-		if (qsize == 0 || !rte_is_power_of_2(qsize))
+		if ((qsize != 0 && !rte_is_power_of_2(qsize)) ||
+			((i == RTE_SCHED_TRAFFIC_CLASS_BE) && (qsize == 0)))
 			return -8;
 	}
 
@@ -360,7 +366,7 @@  rte_sched_port_check_params(struct rte_sched_port_params *params)
 		struct rte_sched_pipe_params *p = params->pipe_profiles + i;
 		int status;
 
-		status = pipe_profile_check(p, params->rate);
+		status = pipe_profile_check(p, params->rate, &params->qsize[0]);
 		if (status != 0)
 			return status;
 	}
@@ -389,9 +395,9 @@  rte_sched_port_get_array_base(struct rte_sched_port_params *params, enum rte_sch
 	uint32_t base, i;
 
 	size_per_pipe_queue_array = 0;
-	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-		size_per_pipe_queue_array += RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS
-			* params->qsize[i] * sizeof(struct rte_mbuf *);
+	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
+		size_per_pipe_queue_array +=
+			params->qsize[i] * sizeof(struct rte_mbuf *);
 	}
 	size_queue_array = n_pipes_per_port * size_per_pipe_queue_array;
 
@@ -451,31 +457,12 @@  rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params)
 static void
 rte_sched_port_config_qsize(struct rte_sched_port *port)
 {
-	/* TC 0 */
+	uint32_t i;
 	port->qsize_add[0] = 0;
-	port->qsize_add[1] = port->qsize_add[0] + port->qsize[0];
-	port->qsize_add[2] = port->qsize_add[1] + port->qsize[0];
-	port->qsize_add[3] = port->qsize_add[2] + port->qsize[0];
-
-	/* TC 1 */
-	port->qsize_add[4] = port->qsize_add[3] + port->qsize[0];
-	port->qsize_add[5] = port->qsize_add[4] + port->qsize[1];
-	port->qsize_add[6] = port->qsize_add[5] + port->qsize[1];
-	port->qsize_add[7] = port->qsize_add[6] + port->qsize[1];
-
-	/* TC 2 */
-	port->qsize_add[8] = port->qsize_add[7] + port->qsize[1];
-	port->qsize_add[9] = port->qsize_add[8] + port->qsize[2];
-	port->qsize_add[10] = port->qsize_add[9] + port->qsize[2];
-	port->qsize_add[11] = port->qsize_add[10] + port->qsize[2];
-
-	/* TC 3 */
-	port->qsize_add[12] = port->qsize_add[11] + port->qsize[2];
-	port->qsize_add[13] = port->qsize_add[12] + port->qsize[3];
-	port->qsize_add[14] = port->qsize_add[13] + port->qsize[3];
-	port->qsize_add[15] = port->qsize_add[14] + port->qsize[3];
-
-	port->qsize_sum = port->qsize_add[15] + port->qsize[3];
+	for (i = 1; i < RTE_SCHED_QUEUES_PER_PIPE; i++)
+		port->qsize_add[i] = port->qsize_add[i-1] + port->qsize[i-1];
+
+	port->qsize_sum = port->qsize_add[15] + port->qsize[15];
 }
 
 static void
@@ -484,10 +471,11 @@  rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i)
 	struct rte_sched_pipe_profile *p = port->pipe_profiles + i;
 
 	RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
-		"    Token bucket: period = %u, credits per period = %u, size = %u\n"
-		"    Traffic classes: period = %u, credits per period = [%u, %u, %u, %u]\n"
-		"    Traffic class 3 oversubscription: weight = %hhu\n"
-		"    WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
+		"	Token bucket: period = %u, credits per period = %u, size = %u\n"
+		"	Traffic classes: period = %u,\n"
+		"	credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n"
+		"	Best-effort traffic class oversubscription: weight = %hhu\n"
+		"	WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
 		i,
 
 		/* Token bucket */
@@ -501,8 +489,17 @@  rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i)
 		p->tc_credits_per_period[1],
 		p->tc_credits_per_period[2],
 		p->tc_credits_per_period[3],
-
-		/* Traffic class 3 oversubscription */
+		p->tc_credits_per_period[4],
+		p->tc_credits_per_period[5],
+		p->tc_credits_per_period[6],
+		p->tc_credits_per_period[7],
+		p->tc_credits_per_period[8],
+		p->tc_credits_per_period[9],
+		p->tc_credits_per_period[10],
+		p->tc_credits_per_period[11],
+		p->tc_credits_per_period[12],
+
+		/* Best-effort traffic class oversubscription */
 		p->tc_ov_weight,
 
 		/* WRR */
@@ -548,9 +545,10 @@  rte_sched_pipe_profile_convert(struct rte_sched_port *port,
 						rate);
 
 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
-		dst->tc_credits_per_period[i]
-			= rte_sched_time_ms_to_bytes(src->tc_period,
-				src->tc_rate[i]);
+		if (port->qsize[i])
+			dst->tc_credits_per_period[i]
+				= rte_sched_time_ms_to_bytes(src->tc_period,
+					src->tc_rate[i]);
 
 #ifdef RTE_SCHED_SUBPORT_TC_OV
 	dst->tc_ov_weight = src->tc_ov_weight;
@@ -558,7 +556,7 @@  rte_sched_pipe_profile_convert(struct rte_sched_port *port,
 
 	/* WRR queues */
 	for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++)
-		if (port->qsize[i])
+		if (port->qsize[RTE_SCHED_TRAFFIC_CLASS_BE + i])
 			dst->n_be_queues++;
 
 	if (dst->n_be_queues == 1)
@@ -620,7 +618,7 @@  rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port,
 	port->pipe_tc3_rate_max = 0;
 	for (i = 0; i < port->n_pipe_profiles; i++) {
 		struct rte_sched_pipe_params *src = params->pipe_profiles + i;
-		uint32_t pipe_tc3_rate = src->tc_rate[3];
+		uint32_t pipe_tc3_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
 
 		if (port->pipe_tc3_rate_max < pipe_tc3_rate)
 			port->pipe_tc3_rate_max = pipe_tc3_rate;
@@ -762,12 +760,14 @@  rte_sched_port_free(struct rte_sched_port *port)
 	for (qindex = 0; qindex < n_queues_per_port; qindex++) {
 		struct rte_mbuf **mbufs = rte_sched_port_qbase(port, qindex);
 		uint16_t qsize = rte_sched_port_qsize(port, qindex);
-		struct rte_sched_queue *queue = port->queue + qindex;
-		uint16_t qr = queue->qr & (qsize - 1);
-		uint16_t qw = queue->qw & (qsize - 1);
+		if (qsize != 0) {
+			struct rte_sched_queue *queue = port->queue + qindex;
+			uint16_t qr = queue->qr & (qsize - 1);
+			uint16_t qw = queue->qw & (qsize - 1);
 
-		for (; qr != qw; qr = (qr + 1) & (qsize - 1))
-			rte_pktmbuf_free(mbufs[qr]);
+			for (; qr != qw; qr = (qr + 1) & (qsize - 1))
+				rte_pktmbuf_free(mbufs[qr]);
+		}
 	}
 
 	rte_bitmap_free(port->bmp);
@@ -780,9 +780,10 @@  rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i)
 	struct rte_sched_subport *s = port->subport + i;
 
 	RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n"
-		"    Token bucket: period = %u, credits per period = %u, size = %u\n"
-		"    Traffic classes: period = %u, credits per period = [%u, %u, %u, %u]\n"
-		"    Traffic class 3 oversubscription: wm min = %u, wm max = %u\n",
+		"	Token bucket: period = %u, credits per period = %u, size = %u\n"
+		"	Traffic classes: period = %u\n"
+		"	credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n"
+		"	Best effort traffic class oversubscription: wm min = %u, wm max = %u\n",
 		i,
 
 		/* Token bucket */
@@ -796,8 +797,17 @@  rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i)
 		s->tc_credits_per_period[1],
 		s->tc_credits_per_period[2],
 		s->tc_credits_per_period[3],
-
-		/* Traffic class 3 oversubscription */
+		s->tc_credits_per_period[4],
+		s->tc_credits_per_period[5],
+		s->tc_credits_per_period[6],
+		s->tc_credits_per_period[7],
+		s->tc_credits_per_period[8],
+		s->tc_credits_per_period[9],
+		s->tc_credits_per_period[10],
+		s->tc_credits_per_period[11],
+		s->tc_credits_per_period[12],
+
+		/* Best effort traffic class oversubscription */
 		s->tc_ov_wm_min,
 		s->tc_ov_wm_max);
 }
@@ -808,7 +818,7 @@  rte_sched_subport_config(struct rte_sched_port *port,
 	struct rte_sched_subport_params *params)
 {
 	struct rte_sched_subport *s;
-	uint32_t i;
+	uint32_t i, j;
 
 	/* Check user parameters */
 	if (port == NULL ||
@@ -822,12 +832,24 @@  rte_sched_subport_config(struct rte_sched_port *port,
 	if (params->tb_size == 0)
 		return -3;
 
-	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-		if (params->tc_rate[i] == 0 ||
-		    params->tc_rate[i] > params->tb_rate)
-			return -4;
+	for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
+		uint32_t tc_rate = params->tc_rate[j];
+		uint16_t qsize = port->qsize[i];
+
+		if (((qsize == 0) &&
+			((tc_rate != 0) && (j != RTE_SCHED_TRAFFIC_CLASS_BE))) ||
+			((qsize != 0) && (tc_rate == 0)) ||
+			(tc_rate > params->tb_rate))
+			return -3;
+
+		if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
+			j++;
 	}
 
+	if (port->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
+		params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0)
+		return -3;
+
 	if (params->tc_period == 0)
 		return -5;
 
@@ -851,13 +873,16 @@  rte_sched_subport_config(struct rte_sched_port *port,
 	/* Traffic Classes (TCs) */
 	s->tc_period = rte_sched_time_ms_to_bytes(params->tc_period, port->rate);
 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-		s->tc_credits_per_period[i]
-			= rte_sched_time_ms_to_bytes(params->tc_period,
-						     params->tc_rate[i]);
+		if (port->qsize[i])
+			s->tc_credits_per_period[i]
+				= rte_sched_time_ms_to_bytes(params->tc_period,
+								 params->tc_rate[i]);
+
 	}
 	s->tc_time = port->time + s->tc_period;
 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
-		s->tc_credits[i] = s->tc_credits_per_period[i];
+		if (port->qsize[i])
+			s->tc_credits[i] = s->tc_credits_per_period[i];
 
 #ifdef RTE_SCHED_SUBPORT_TC_OV
 	/* TC oversubscription */
@@ -910,9 +935,9 @@  rte_sched_pipe_config(struct rte_sched_port *port,
 		params = port->pipe_profiles + p->profile;
 
 #ifdef RTE_SCHED_SUBPORT_TC_OV
-		double subport_tc3_rate = (double) s->tc_credits_per_period[3]
+		double subport_tc3_rate = (double) s->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
 			/ (double) s->tc_period;
-		double pipe_tc3_rate = (double) params->tc_credits_per_period[3]
+		double pipe_tc3_rate = (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
 			/ (double) params->tc_period;
 		uint32_t tc3_ov = s->tc_ov;
 
@@ -945,15 +970,19 @@  rte_sched_pipe_config(struct rte_sched_port *port,
 
 	/* Traffic Classes (TCs) */
 	p->tc_time = port->time + params->tc_period;
+	p->n_be_queues = params->n_be_queues;
 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
-		p->tc_credits[i] = params->tc_credits_per_period[i];
+		if (port->qsize[i])
+			p->tc_credits[i] = params->tc_credits_per_period[i];
 
 #ifdef RTE_SCHED_SUBPORT_TC_OV
 	{
 		/* Subport TC3 oversubscription */
-		double subport_tc3_rate = (double) s->tc_credits_per_period[3]
+		double subport_tc3_rate =
+			(double) s->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
 			/ (double) s->tc_period;
-		double pipe_tc3_rate = (double) params->tc_credits_per_period[3]
+		double pipe_tc3_rate =
+			(double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
 			/ (double) params->tc_period;
 		uint32_t tc3_ov = s->tc_ov;
 
@@ -992,7 +1021,7 @@  rte_sched_port_pipe_profile_add(struct rte_sched_port *port,
 		return -2;
 
 	/* Pipe params */
-	status = pipe_profile_check(params, port->rate);
+	status = pipe_profile_check(params, port->rate, &port->qsize[0]);
 	if (status != 0)
 		return status;
 
@@ -1008,8 +1037,8 @@  rte_sched_port_pipe_profile_add(struct rte_sched_port *port,
 	*pipe_profile_id = port->n_pipe_profiles;
 	port->n_pipe_profiles++;
 
-	if (port->pipe_tc3_rate_max < params->tc_rate[3])
-		port->pipe_tc3_rate_max = params->tc_rate[3];
+	if (port->pipe_tc3_rate_max < params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
+		port->pipe_tc3_rate_max = params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
 
 	rte_sched_port_log_pipe_profile(port, *pipe_profile_id);
 
@@ -1020,15 +1049,12 @@  static inline uint32_t
 rte_sched_port_qindex(struct rte_sched_port *port,
 	uint32_t subport,
 	uint32_t pipe,
-	uint32_t traffic_class,
 	uint32_t queue)
 {
 	return ((subport & (port->n_subports_per_port - 1)) <<
 			(port->n_pipes_per_subport_log2 + 4)) |
 			((pipe & (port->n_pipes_per_subport - 1)) << 4) |
-			((traffic_class &
-			    (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)) << 2) |
-			(queue & (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1));
+			(queue & (RTE_SCHED_QUEUES_PER_PIPE - 1));
 }
 
 void
@@ -1038,8 +1064,8 @@  rte_sched_port_pkt_write(struct rte_sched_port *port,
 			 uint32_t traffic_class,
 			 uint32_t queue, enum rte_color color)
 {
-	uint32_t queue_id = rte_sched_port_qindex(port, subport, pipe,
-			traffic_class, queue);
+	uint32_t queue_id = rte_sched_port_qindex(port, subport, pipe, queue);
+
 	rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
 }
 
@@ -1050,12 +1076,12 @@  rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
 				  uint32_t *traffic_class, uint32_t *queue)
 {
 	uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
+	uint32_t tc_id = rte_mbuf_sched_traffic_class_get(pkt);
 
 	*subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
 	*pipe = (queue_id >> 4) & (port->n_pipes_per_subport - 1);
-	*traffic_class = (queue_id >> 2) &
-				(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1);
-	*queue = queue_id & (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1);
+	*traffic_class = tc_id;
+	*queue = queue_id & (RTE_SCHED_QUEUES_PER_PIPE - 1);
 }
 
 enum rte_color
@@ -1136,7 +1162,7 @@  static inline void
 rte_sched_port_update_subport_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
 {
 	struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
-	uint32_t tc_index = (qindex >> 2) & 0x3;
+	uint32_t tc_index = rte_mbuf_sched_traffic_class_get(pkt);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	s->stats.n_pkts_tc[tc_index] += 1;
@@ -1156,7 +1182,7 @@  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 #endif
 {
 	struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
-	uint32_t tc_index = (qindex >> 2) & 0x3;
+	uint32_t tc_index = rte_mbuf_sched_traffic_class_get(pkt);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	s->stats.n_pkts_tc_dropped[tc_index] += 1;
@@ -1211,7 +1237,7 @@  rte_sched_port_red_drop(struct rte_sched_port *port, struct rte_mbuf *pkt, uint3
 	uint32_t tc_index;
 	enum rte_color color;
 
-	tc_index = (qindex >> 2) & 0x3;
+	tc_index = rte_mbuf_sched_traffic_class_get(pkt);
 	color = rte_sched_port_pkt_read_color(pkt);
 	red_cfg = &port->red_config[tc_index][color];
 
@@ -1528,6 +1554,7 @@  grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
 	struct rte_sched_pipe *pipe = grinder->pipe;
 	struct rte_sched_pipe_profile *params = grinder->pipe_params;
 	uint64_t n_periods;
+	uint32_t i;
 
 	/* Subport TB */
 	n_periods = (port->time - subport->tb_time) / subport->tb_period;
@@ -1543,19 +1570,17 @@  grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
 
 	/* Subport TCs */
 	if (unlikely(port->time >= subport->tc_time)) {
-		subport->tc_credits[0] = subport->tc_credits_per_period[0];
-		subport->tc_credits[1] = subport->tc_credits_per_period[1];
-		subport->tc_credits[2] = subport->tc_credits_per_period[2];
-		subport->tc_credits[3] = subport->tc_credits_per_period[3];
+		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
+			subport->tc_credits[i] = subport->tc_credits_per_period[i];
+
 		subport->tc_time = port->time + subport->tc_period;
 	}
 
 	/* Pipe TCs */
 	if (unlikely(port->time >= pipe->tc_time)) {
-		pipe->tc_credits[0] = params->tc_credits_per_period[0];
-		pipe->tc_credits[1] = params->tc_credits_per_period[1];
-		pipe->tc_credits[2] = params->tc_credits_per_period[2];
-		pipe->tc_credits[3] = params->tc_credits_per_period[3];
+		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
+			pipe->tc_credits[i] = params->tc_credits_per_period[i];
+
 		pipe->tc_time = port->time + params->tc_period;
 	}
 }
@@ -1568,21 +1593,29 @@  grinder_tc_ov_credits_update(struct rte_sched_port *port, uint32_t pos)
 	struct rte_sched_grinder *grinder = port->grinder + pos;
 	struct rte_sched_subport *subport = grinder->subport;
 	uint32_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-	uint32_t tc_ov_consumption_max;
+	uint32_t tc_consumption = 0, tc_ov_consumption_max;
 	uint32_t tc_ov_wm = subport->tc_ov_wm;
+	uint32_t i;
 
 	if (subport->tc_ov == 0)
 		return subport->tc_ov_wm_max;
 
-	tc_ov_consumption[0] = subport->tc_credits_per_period[0] - subport->tc_credits[0];
-	tc_ov_consumption[1] = subport->tc_credits_per_period[1] - subport->tc_credits[1];
-	tc_ov_consumption[2] = subport->tc_credits_per_period[2] - subport->tc_credits[2];
-	tc_ov_consumption[3] = subport->tc_credits_per_period[3] - subport->tc_credits[3];
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
+		tc_ov_consumption[i] =
+			subport->tc_credits_per_period[i] - subport->tc_credits[i];
+		tc_consumption += tc_ov_consumption[i];
+	}
+
+	tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
+		subport->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
+		subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
+
 
-	tc_ov_consumption_max = subport->tc_credits_per_period[3] -
-		(tc_ov_consumption[0] + tc_ov_consumption[1] + tc_ov_consumption[2]);
+	tc_ov_consumption_max =
+		subport->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] - tc_consumption;
 
-	if (tc_ov_consumption[3] > (tc_ov_consumption_max - port->mtu)) {
+	if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
+		(tc_ov_consumption_max - port->mtu)) {
 		tc_ov_wm  -= tc_ov_wm >> 7;
 		if (tc_ov_wm < subport->tc_ov_wm_min)
 			tc_ov_wm = subport->tc_ov_wm_min;
@@ -1605,6 +1638,7 @@  grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
 	struct rte_sched_pipe *pipe = grinder->pipe;
 	struct rte_sched_pipe_profile *params = grinder->pipe_params;
 	uint64_t n_periods;
+	uint32_t i;
 
 	/* Subport TB */
 	n_periods = (port->time - subport->tb_time) / subport->tb_period;
@@ -1622,10 +1656,8 @@  grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
 	if (unlikely(port->time >= subport->tc_time)) {
 		subport->tc_ov_wm = grinder_tc_ov_credits_update(port, pos);
 
-		subport->tc_credits[0] = subport->tc_credits_per_period[0];
-		subport->tc_credits[1] = subport->tc_credits_per_period[1];
-		subport->tc_credits[2] = subport->tc_credits_per_period[2];
-		subport->tc_credits[3] = subport->tc_credits_per_period[3];
+		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
+			subport->tc_credits[i] = subport->tc_credits_per_period[i];
 
 		subport->tc_time = port->time + subport->tc_period;
 		subport->tc_ov_period_id++;
@@ -1633,10 +1665,8 @@  grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
 
 	/* Pipe TCs */
 	if (unlikely(port->time >= pipe->tc_time)) {
-		pipe->tc_credits[0] = params->tc_credits_per_period[0];
-		pipe->tc_credits[1] = params->tc_credits_per_period[1];
-		pipe->tc_credits[2] = params->tc_credits_per_period[2];
-		pipe->tc_credits[3] = params->tc_credits_per_period[3];
+		for (i = 0; i <= RTE_SCHED_TRAFFIC_CLASS_BE; i++)
+			pipe->tc_credits[i] = params->tc_credits_per_period[i];
 		pipe->tc_time = port->time + params->tc_period;
 	}
 
@@ -1701,11 +1731,18 @@  grinder_credits_check(struct rte_sched_port *port, uint32_t pos)
 	uint32_t subport_tc_credits = subport->tc_credits[tc_index];
 	uint32_t pipe_tb_credits = pipe->tb_credits;
 	uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
-	uint32_t pipe_tc_ov_mask1[] = {UINT32_MAX, UINT32_MAX, UINT32_MAX, pipe->tc_ov_credits};
-	uint32_t pipe_tc_ov_mask2[] = {0, 0, 0, UINT32_MAX};
-	uint32_t pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
+	uint32_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
+	uint32_t pipe_tc_ov_credits, i;
 	int enough_credits;
 
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
+		pipe_tc_ov_mask1[i] = UINT32_MAX;
+
+	pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits;
+	pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = UINT32_MAX;
+	pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
+
 	/* Check pipe and subport credits */
 	enough_credits = (pkt_len <= subport_tb_credits) &&
 		(pkt_len <= subport_tc_credits) &&
@@ -1860,68 +1897,65 @@  static inline void
 grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t qindex, uint16_t qmask)
 {
 	struct rte_sched_grinder *grinder = port->grinder + pos;
-	uint8_t b[4];
+	uint8_t b, i;
 
 	grinder->tccache_w = 0;
 	grinder->tccache_r = 0;
 
-	b[0] = (uint8_t) (qmask & 0xF);
-	b[1] = (uint8_t) ((qmask >> 4) & 0xF);
-	b[2] = (uint8_t) ((qmask >> 8) & 0xF);
-	b[3] = (uint8_t) ((qmask >> 12) & 0xF);
-
-	grinder->tccache_qmask[grinder->tccache_w] = b[0];
-	grinder->tccache_qindex[grinder->tccache_w] = qindex;
-	grinder->tccache_w += (b[0] != 0);
-
-	grinder->tccache_qmask[grinder->tccache_w] = b[1];
-	grinder->tccache_qindex[grinder->tccache_w] = qindex + 4;
-	grinder->tccache_w += (b[1] != 0);
-
-	grinder->tccache_qmask[grinder->tccache_w] = b[2];
-	grinder->tccache_qindex[grinder->tccache_w] = qindex + 8;
-	grinder->tccache_w += (b[2] != 0);
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
+		b = (uint8_t) ((qmask >> i) & 0x1);
+		grinder->tccache_qmask[grinder->tccache_w] = b;
+		grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
+		grinder->tccache_w += (b != 0);
+	}
 
-	grinder->tccache_qmask[grinder->tccache_w] = b[3];
-	grinder->tccache_qindex[grinder->tccache_w] = qindex + 12;
-	grinder->tccache_w += (b[3] != 0);
+	b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
+	grinder->tccache_qmask[grinder->tccache_w] = b;
+	grinder->tccache_qindex[grinder->tccache_w] = qindex +
+		RTE_SCHED_TRAFFIC_CLASS_BE;
+	grinder->tccache_w += (b != 0);
 }
 
 static inline int
 grinder_next_tc(struct rte_sched_port *port, uint32_t pos)
 {
 	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_pipe *pipe = grinder->pipe;
 	struct rte_mbuf **qbase;
-	uint32_t qindex;
+	uint32_t qindex, qpos = 0;
 	uint16_t qsize;
 
 	if (grinder->tccache_r == grinder->tccache_w)
 		return 0;
 
 	qindex = grinder->tccache_qindex[grinder->tccache_r];
+	grinder->tc_index = qindex & 0xf;
 	qbase = rte_sched_port_qbase(port, qindex);
-	qsize = rte_sched_port_qsize(port, qindex);
-
-	grinder->tc_index = (qindex >> 2) & 0x3;
-	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
-	grinder->qsize[grinder->tc_index] = qsize;
 
-	grinder->qindex[0] = qindex;
-	grinder->qindex[1] = qindex + 1;
-	grinder->qindex[2] = qindex + 2;
-	grinder->qindex[3] = qindex + 3;
+	if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
+		qsize = rte_sched_port_qsize(port, qindex);
 
-	grinder->queue[0] = port->queue + qindex;
-	grinder->queue[1] = port->queue + qindex + 1;
-	grinder->queue[2] = port->queue + qindex + 2;
-	grinder->queue[3] = port->queue + qindex + 3;
+		grinder->queue[qpos] = port->queue + qindex;
+		grinder->qbase[qpos] = qbase;
+		grinder->qindex[qpos] = qindex;
+		grinder->qsize[qpos] = qsize;
+		grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
+		grinder->tccache_r++;
 
-	grinder->qbase[0] = qbase;
-	grinder->qbase[1] = qbase + qsize;
-	grinder->qbase[2] = qbase + 2 * qsize;
-	grinder->qbase[3] = qbase + 3 * qsize;
+		return 1;
+	}
 
+	for ( ; qpos < pipe->n_be_queues; qpos++) {
+		qsize = rte_sched_port_qsize(port, qindex + qpos);
+		grinder->queue[qpos] = port->queue + qindex + qpos;
+		grinder->qbase[qpos] = qbase + qpos * qsize;
+		grinder->qindex[qpos] = qindex + qpos;
+		grinder->qsize[qpos] = qsize;
+	}
+	grinder->tc_index = RTE_SCHED_TRAFFIC_CLASS_BE;
+	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
 	grinder->tccache_r++;
+
 	return 1;
 }
 
diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
index 2a935998a..ae4dfb311 100644
--- a/lib/librte_sched/rte_sched.h
+++ b/lib/librte_sched/rte_sched.h
@@ -83,9 +83,9 @@  extern "C" {
 #define RTE_SCHED_BE_QUEUES_PER_PIPE    4
 
 /** Number of traffic classes per pipe (as well as subport).
- * Cannot be changed.
  */
-#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    4
+#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    \
+(RTE_SCHED_QUEUES_PER_PIPE - RTE_SCHED_BE_QUEUES_PER_PIPE + 1)
 
 /** Number of queues per pipe traffic class. Cannot be changed. */
 #define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS    4
@@ -171,9 +171,7 @@  struct rte_sched_pipe_params {
 	/**< Traffic class rates (measured in bytes per second) */
 	uint32_t tc_period;
 	/**< Enforcement period (measured in milliseconds) */
-#ifdef RTE_SCHED_SUBPORT_TC_OV
 	uint8_t tc_ov_weight;		 /**< Weight Traffic class 3 oversubscription */
-#endif
 
 	/* Pipe queues */
 	uint8_t  wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; /**< WRR weights */
@@ -206,11 +204,9 @@  struct rte_sched_port_params {
 					  * (measured in bytes) */
 	uint32_t n_subports_per_port;    /**< Number of subports */
 	uint32_t n_pipes_per_subport;    /**< Number of pipes per subport */
-	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint16_t qsize[RTE_SCHED_QUEUES_PER_PIPE];
 	/**< Packet queue size for each traffic class.
-	 * All queues within the same pipe traffic class have the same
-	 * size. Queues from different pipes serving the same traffic
-	 * class have the same size. */
+	 * Queues which are not needed are allowed to have zero size. */
 	struct rte_sched_pipe_params *pipe_profiles;
 	/**< Pipe profile table.
 	 * Every pipe is configured using one of the profiles from this table. */