[v3] sched: enable/disable TC OV at runtime

Message ID 20220427085848.491395-1-marcinx.danilewicz@intel.com (mailing list archive)
State Changes Requested, archived
Delegated to: Thomas Monjalon
Headers
Series [v3] sched: enable/disable TC OV at runtime |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-abi-testing success Testing PASS

Commit Message

Danilewicz, MarcinX April 27, 2022, 8:58 a.m. UTC
  Added new API to enable or disable TC over subscription for best
effort traffic class at subport level.
Added changes after review and increased throughput.

By default TC OV is disabled.

Signed-off-by: Marcin Danilewicz <marcinx.danilewicz@intel.com>
---
 lib/sched/rte_sched.c | 189 +++++++++++++++++++++++++++++++++++-------
 lib/sched/rte_sched.h |  18 ++++
 lib/sched/version.map |   3 +
 3 files changed, 178 insertions(+), 32 deletions(-)
  

Comments

Cristian Dumitrescu April 27, 2022, 9:36 a.m. UTC | #1
Marcin,

Every time you send a new version, you need to copy the maintainers and the other relevant people, otherwise there is a high chance we are not going to see your patch, thanks! I only saw this one due to pure chance ;)

Regards,
Cristian

> -----Original Message-----
> From: Marcin Danilewicz <marcinx.danilewicz@intel.com>
> Sent: Wednesday, April 27, 2022 9:59 AM
> To: dev@dpdk.org
> Subject: [PATCH v3] sched: enable/disable TC OV at runtime
> 
> Added new API to enable or disable TC over subscription for best
> effort traffic class at subport level.
> Added changes after review and increased throughput.
> 
> By default TC OV is disabled.
> 
> Signed-off-by: Marcin Danilewicz <marcinx.danilewicz@intel.com>
> ---
>  lib/sched/rte_sched.c | 189 +++++++++++++++++++++++++++++++++++-------
>  lib/sched/rte_sched.h |  18 ++++
>  lib/sched/version.map |   3 +
>  3 files changed, 178 insertions(+), 32 deletions(-)
> 
> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
> index ec74bee939..6e7d81df46 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c
> @@ -213,6 +213,9 @@ struct rte_sched_subport {
>  	uint8_t *bmp_array;
>  	struct rte_mbuf **queue_array;
>  	uint8_t memory[0] __rte_cache_aligned;
> +
> +	/* TC oversubscription activation */
> +	int is_tc_ov_enabled;
>  } __rte_cache_aligned;
> 
>  struct rte_sched_port {
> @@ -1165,6 +1168,45 @@ rte_sched_cman_config(struct rte_sched_port
> *port,
>  }
>  #endif
> 
> +int
> +rte_sched_subport_tc_ov_config(struct rte_sched_port *port,
> +	uint32_t subport_id,
> +	bool tc_ov_enable)
> +{
> +	struct rte_sched_subport *s;
> +	struct rte_sched_subport_profile *profile;
> +
> +	if (port == NULL) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for parameter port\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (subport_id >= port->n_subports_per_port) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for parameter subport id\n",
> __func__);
> +		return  -EINVAL;
> +	}
> +
> +	s = port->subports[subport_id];
> +	s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0;
> +
> +	if (s->is_tc_ov_enabled) {
> +		/* TC oversubscription */
> +		s->tc_ov_wm_min = port->mtu;
> +		s->tc_ov_period_id = 0;
> +		s->tc_ov = 0;
> +		s->tc_ov_n = 0;
> +		s->tc_ov_rate = 0;
> +
> +		profile = port->subport_profiles + s->profile;
> +		s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile-
> >tc_period,
> +				s->pipe_tc_be_rate_max);
> +		s->tc_ov_wm = s->tc_ov_wm_max;
> +	}
> +	return 0;
> +}
> +
>  int
>  rte_sched_subport_config(struct rte_sched_port *port,
>  	uint32_t subport_id,
> @@ -1254,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
>  		s->n_pipe_profiles = params->n_pipe_profiles;
>  		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> 
> +		/* TC over-subscription is disabled by default */
> +		s->is_tc_ov_enabled = 0;
> +
>  #ifdef RTE_SCHED_CMAN
>  		if (params->cman_params != NULL) {
>  			s->cman_enabled = true;
> @@ -1316,13 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
> 
>  		for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
>  			s->grinder_base_bmp_pos[i] =
> RTE_SCHED_PIPE_INVALID;
> -
> -		/* TC oversubscription */
> -		s->tc_ov_wm_min = port->mtu;
> -		s->tc_ov_period_id = 0;
> -		s->tc_ov = 0;
> -		s->tc_ov_n = 0;
> -		s->tc_ov_rate = 0;
>  	}
> 
>  	{
> @@ -1342,9 +1380,6 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
>  			else
>  				profile->tc_credits_per_period[i] = 0;
> 
> -		s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile-
> >tc_period,
> -							s-
> >pipe_tc_be_rate_max);
> -		s->tc_ov_wm = s->tc_ov_wm_max;
>  		s->profile = subport_profile_id;
> 
>  	}
> @@ -1417,17 +1452,20 @@ rte_sched_pipe_config(struct rte_sched_port
> *port,
>  		double pipe_tc_be_rate =
>  			(double) params-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) params->tc_period;
> -		uint32_t tc_be_ov = s->tc_ov;
> 
> -		/* Unplug pipe from its subport */
> -		s->tc_ov_n -= params->tc_ov_weight;
> -		s->tc_ov_rate -= pipe_tc_be_rate;
> -		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> +		if (s->is_tc_ov_enabled) {
> +			uint32_t tc_be_ov = s->tc_ov;
> 
> -		if (s->tc_ov != tc_be_ov) {
> -			RTE_LOG(DEBUG, SCHED,
> -				"Subport %u Best-effort TC oversubscription is
> OFF (%.4lf >= %.4lf)\n",
> -				subport_id, subport_tc_be_rate, s-
> >tc_ov_rate);
> +			/* Unplug pipe from its subport */
> +			s->tc_ov_n -= params->tc_ov_weight;
> +			s->tc_ov_rate -= pipe_tc_be_rate;
> +			s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> +
> +			if (s->tc_ov != tc_be_ov) {
> +				RTE_LOG(DEBUG, SCHED,
> +					"Subport %u Best-effort TC
> oversubscription is OFF (%.4lf >= %.4lf)\n",
> +					subport_id, subport_tc_be_rate, s-
> >tc_ov_rate);
> +			}
>  		}
> 
>  		/* Reset the pipe */
> @@ -1460,19 +1498,22 @@ rte_sched_pipe_config(struct rte_sched_port
> *port,
>  		double pipe_tc_be_rate =
>  			(double) params-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) params->tc_period;
> -		uint32_t tc_be_ov = s->tc_ov;
> 
> -		s->tc_ov_n += params->tc_ov_weight;
> -		s->tc_ov_rate += pipe_tc_be_rate;
> -		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> +		if (s->is_tc_ov_enabled) {
> +			uint32_t tc_be_ov = s->tc_ov;
> +
> +			s->tc_ov_n += params->tc_ov_weight;
> +			s->tc_ov_rate += pipe_tc_be_rate;
> +			s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> 
> -		if (s->tc_ov != tc_be_ov) {
> -			RTE_LOG(DEBUG, SCHED,
> -				"Subport %u Best effort TC oversubscription is
> ON (%.4lf < %.4lf)\n",
> -				subport_id, subport_tc_be_rate, s-
> >tc_ov_rate);
> +			if (s->tc_ov != tc_be_ov) {
> +				RTE_LOG(DEBUG, SCHED,
> +					"Subport %u Best effort TC
> oversubscription is ON (%.4lf < %.4lf)\n",
> +					subport_id, subport_tc_be_rate, s-
> >tc_ov_rate);
> +			}
> +			p->tc_ov_period_id = s->tc_ov_period_id;
> +			p->tc_ov_credits = s->tc_ov_wm;
>  		}
> -		p->tc_ov_period_id = s->tc_ov_period_id;
> -		p->tc_ov_credits = s->tc_ov_wm;
>  	}
> 
>  	return 0;
> @@ -2318,6 +2359,45 @@ grinder_credits_update(struct rte_sched_port
> *port,
>  	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
>  	pipe->tb_time += n_periods * params->tb_period;
> 
> +	/* Subport TCs */
> +	if (unlikely(port->time >= subport->tc_time)) {
> +		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> +			subport->tc_credits[i] = sp->tc_credits_per_period[i];
> +
> +		subport->tc_time = port->time + sp->tc_period;
> +	}
> +
> +	/* Pipe TCs */
> +	if (unlikely(port->time >= pipe->tc_time)) {
> +		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> +			pipe->tc_credits[i] = params->tc_credits_per_period[i];
> +		pipe->tc_time = port->time + params->tc_period;
> +	}
> +}
> +
> +static inline void
> +grinder_credits_update_with_tc_ov(struct rte_sched_port *port,
> +	struct rte_sched_subport *subport, uint32_t pos)
> +{
> +	struct rte_sched_grinder *grinder = subport->grinder + pos;
> +	struct rte_sched_pipe *pipe = grinder->pipe;
> +	struct rte_sched_pipe_profile *params = grinder->pipe_params;
> +	struct rte_sched_subport_profile *sp = grinder->subport_params;
> +	uint64_t n_periods;
> +	uint32_t i;
> +
> +	/* Subport TB */
> +	n_periods = (port->time - subport->tb_time) / sp->tb_period;
> +	subport->tb_credits += n_periods * sp->tb_credits_per_period;
> +	subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
> +	subport->tb_time += n_periods * sp->tb_period;
> +
> +	/* Pipe TB */
> +	n_periods = (port->time - pipe->tb_time) / params->tb_period;
> +	pipe->tb_credits += n_periods * params->tb_credits_per_period;
> +	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
> +	pipe->tb_time += n_periods * params->tb_period;
> +
>  	/* Subport TCs */
>  	if (unlikely(port->time >= subport->tc_time)) {
>  		subport->tc_ov_wm =
> @@ -2348,6 +2428,39 @@ grinder_credits_update(struct rte_sched_port
> *port,
>  static inline int
>  grinder_credits_check(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport, uint32_t pos)
> +{
> +	struct rte_sched_grinder *grinder = subport->grinder + pos;
> +	struct rte_sched_pipe *pipe = grinder->pipe;
> +	struct rte_mbuf *pkt = grinder->pkt;
> +	uint32_t tc_index = grinder->tc_index;
> +	uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
> +	uint64_t subport_tb_credits = subport->tb_credits;
> +	uint64_t subport_tc_credits = subport->tc_credits[tc_index];
> +	uint64_t pipe_tb_credits = pipe->tb_credits;
> +	uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
> +	int enough_credits;
> +
> +	/* Check pipe and subport credits */
> +	enough_credits = (pkt_len <= subport_tb_credits) &&
> +		(pkt_len <= subport_tc_credits) &&
> +		(pkt_len <= pipe_tb_credits) &&
> +		(pkt_len <= pipe_tc_credits);
> +
> +	if (!enough_credits)
> +		return 0;
> +
> +	/* Update pipe and subport credits */
> +	subport->tb_credits -= pkt_len;
> +	subport->tc_credits[tc_index] -= pkt_len;
> +	pipe->tb_credits -= pkt_len;
> +	pipe->tc_credits[tc_index] -= pkt_len;
> +
> +	return 1;
> +}
> +
> +static inline int
> +grinder_credits_check_with_tc_ov(struct rte_sched_port *port,
> +	struct rte_sched_subport *subport, uint32_t pos)
>  {
>  	struct rte_sched_grinder *grinder = subport->grinder + pos;
>  	struct rte_sched_pipe *pipe = grinder->pipe;
> @@ -2403,8 +2516,16 @@ grinder_schedule(struct rte_sched_port *port,
>  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
>  	uint32_t be_tc_active;
> 
> -	if (!grinder_credits_check(port, subport, pos))
> -		return 0;
> +	switch (subport->is_tc_ov_enabled) {
> +	case 1:
> +		if (!grinder_credits_check_with_tc_ov(port, subport, pos))
> +			return 0;
> +		break;
> +	case 0:
> +		if (!grinder_credits_check(port, subport, pos))
> +			return 0;
> +		break;
> +	}
> 
>  	/* Advance port time */
>  	port->time += pkt_len;
> @@ -2770,7 +2891,11 @@ grinder_handle(struct rte_sched_port *port,
>  						subport->profile;
> 
>  		grinder_prefetch_tc_queue_arrays(subport, pos);
> -		grinder_credits_update(port, subport, pos);
> +
> +		if (unlikely(subport->is_tc_ov_enabled))
> +			grinder_credits_update_with_tc_ov(port, subport, pos);
> +		else
> +			grinder_credits_update(port, subport, pos);
> 
>  		grinder->state = e_GRINDER_PREFETCH_MBUF;
>  		return 0;
> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> index 5ece64e527..94febe1d94 100644
> --- a/lib/sched/rte_sched.h
> +++ b/lib/sched/rte_sched.h
> @@ -579,6 +579,24 @@ rte_sched_port_enqueue(struct rte_sched_port *port,
> struct rte_mbuf **pkts, uint
>  int
>  rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts,
> uint32_t n_pkts);
> 
> +/**
> + * Hierarchical scheduler subport TC OV enable/disable config.
> + * Note that this function is safe to use at runtime
> + * to enable/disable TC OV for subport.
> + *
> + * @param port
> + *   Handle to port scheduler instance
> + * @param subport_id
> + *   Subport ID
> + * @param tc_ov_enable
> + *  Boolean flag to enable/disable TC OV
> + * @return
> + *   0 upon success, error code otherwise
> + */
> +__rte_experimental
> +int
> +rte_sched_subport_tc_ov_config(struct rte_sched_port *port, uint32_t
> subport_id, bool tc_ov_enable);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/sched/version.map b/lib/sched/version.map
> index d22c07fc9f..c6e994d8df 100644
> --- a/lib/sched/version.map
> +++ b/lib/sched/version.map
> @@ -34,4 +34,7 @@ EXPERIMENTAL {
>  	# added in 21.11
>  	rte_pie_rt_data_init;
>  	rte_pie_config_init;
> +
> +	# added in 22.03
> +	rte_sched_subport_tc_ov_config;
>  };
> --
> 2.25.1
> 
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> Registered Number: 308263
> 
> 
> This e-mail and any attachments may contain confidential material for the sole
> use of the intended recipient(s). Any review or distribution by others is
> strictly prohibited. If you are not the intended recipient, please contact the
> sender and delete all copies.
  
Cristian Dumitrescu April 27, 2022, 9:37 a.m. UTC | #2
Adding Jasvinder

> -----Original Message-----
> From: Dumitrescu, Cristian
> Sent: Wednesday, April 27, 2022 10:37 AM
> To: Marcin Danilewicz <marcinx.danilewicz@intel.com>; dev@dpdk.org
> Subject: RE: [PATCH v3] sched: enable/disable TC OV at runtime
> 
> Marcin,
> 
> Every time you send a new version, you need to copy the maintainers and the
> other relevant people, otherwise there is a high chance we are not going to see
> your patch, thanks! I only saw this one due to pure chance ;)
> 
> Regards,
> Cristian
> 
> > -----Original Message-----
> > From: Marcin Danilewicz <marcinx.danilewicz@intel.com>
> > Sent: Wednesday, April 27, 2022 9:59 AM
> > To: dev@dpdk.org
> > Subject: [PATCH v3] sched: enable/disable TC OV at runtime
> >
> > Added new API to enable or disable TC over subscription for best
> > effort traffic class at subport level.
> > Added changes after review and increased throughput.
> >
> > By default TC OV is disabled.
> >
> > Signed-off-by: Marcin Danilewicz <marcinx.danilewicz@intel.com>
> > ---
> >  lib/sched/rte_sched.c | 189 +++++++++++++++++++++++++++++++++++-------
> >  lib/sched/rte_sched.h |  18 ++++
> >  lib/sched/version.map |   3 +
> >  3 files changed, 178 insertions(+), 32 deletions(-)
> >
> > diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
> > index ec74bee939..6e7d81df46 100644
> > --- a/lib/sched/rte_sched.c
> > +++ b/lib/sched/rte_sched.c
> > @@ -213,6 +213,9 @@ struct rte_sched_subport {
> >  	uint8_t *bmp_array;
> >  	struct rte_mbuf **queue_array;
> >  	uint8_t memory[0] __rte_cache_aligned;
> > +
> > +	/* TC oversubscription activation */
> > +	int is_tc_ov_enabled;
> >  } __rte_cache_aligned;
> >
> >  struct rte_sched_port {
> > @@ -1165,6 +1168,45 @@ rte_sched_cman_config(struct rte_sched_port
> > *port,
> >  }
> >  #endif
> >
> > +int
> > +rte_sched_subport_tc_ov_config(struct rte_sched_port *port,
> > +	uint32_t subport_id,
> > +	bool tc_ov_enable)
> > +{
> > +	struct rte_sched_subport *s;
> > +	struct rte_sched_subport_profile *profile;
> > +
> > +	if (port == NULL) {
> > +		RTE_LOG(ERR, SCHED,
> > +			"%s: Incorrect value for parameter port\n", __func__);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (subport_id >= port->n_subports_per_port) {
> > +		RTE_LOG(ERR, SCHED,
> > +			"%s: Incorrect value for parameter subport id\n",
> > __func__);
> > +		return  -EINVAL;
> > +	}
> > +
> > +	s = port->subports[subport_id];
> > +	s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0;
> > +
> > +	if (s->is_tc_ov_enabled) {
> > +		/* TC oversubscription */
> > +		s->tc_ov_wm_min = port->mtu;
> > +		s->tc_ov_period_id = 0;
> > +		s->tc_ov = 0;
> > +		s->tc_ov_n = 0;
> > +		s->tc_ov_rate = 0;
> > +
> > +		profile = port->subport_profiles + s->profile;
> > +		s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile-
> > >tc_period,
> > +				s->pipe_tc_be_rate_max);
> > +		s->tc_ov_wm = s->tc_ov_wm_max;
> > +	}
> > +	return 0;
> > +}
> > +
> >  int
> >  rte_sched_subport_config(struct rte_sched_port *port,
> >  	uint32_t subport_id,
> > @@ -1254,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port
> > *port,
> >  		s->n_pipe_profiles = params->n_pipe_profiles;
> >  		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> >
> > +		/* TC over-subscription is disabled by default */
> > +		s->is_tc_ov_enabled = 0;
> > +
> >  #ifdef RTE_SCHED_CMAN
> >  		if (params->cman_params != NULL) {
> >  			s->cman_enabled = true;
> > @@ -1316,13 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port
> > *port,
> >
> >  		for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
> >  			s->grinder_base_bmp_pos[i] =
> > RTE_SCHED_PIPE_INVALID;
> > -
> > -		/* TC oversubscription */
> > -		s->tc_ov_wm_min = port->mtu;
> > -		s->tc_ov_period_id = 0;
> > -		s->tc_ov = 0;
> > -		s->tc_ov_n = 0;
> > -		s->tc_ov_rate = 0;
> >  	}
> >
> >  	{
> > @@ -1342,9 +1380,6 @@ rte_sched_subport_config(struct rte_sched_port
> > *port,
> >  			else
> >  				profile->tc_credits_per_period[i] = 0;
> >
> > -		s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile-
> > >tc_period,
> > -							s-
> > >pipe_tc_be_rate_max);
> > -		s->tc_ov_wm = s->tc_ov_wm_max;
> >  		s->profile = subport_profile_id;
> >
> >  	}
> > @@ -1417,17 +1452,20 @@ rte_sched_pipe_config(struct rte_sched_port
> > *port,
> >  		double pipe_tc_be_rate =
> >  			(double) params-
> > >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
> >  			/ (double) params->tc_period;
> > -		uint32_t tc_be_ov = s->tc_ov;
> >
> > -		/* Unplug pipe from its subport */
> > -		s->tc_ov_n -= params->tc_ov_weight;
> > -		s->tc_ov_rate -= pipe_tc_be_rate;
> > -		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> > +		if (s->is_tc_ov_enabled) {
> > +			uint32_t tc_be_ov = s->tc_ov;
> >
> > -		if (s->tc_ov != tc_be_ov) {
> > -			RTE_LOG(DEBUG, SCHED,
> > -				"Subport %u Best-effort TC oversubscription is
> > OFF (%.4lf >= %.4lf)\n",
> > -				subport_id, subport_tc_be_rate, s-
> > >tc_ov_rate);
> > +			/* Unplug pipe from its subport */
> > +			s->tc_ov_n -= params->tc_ov_weight;
> > +			s->tc_ov_rate -= pipe_tc_be_rate;
> > +			s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> > +
> > +			if (s->tc_ov != tc_be_ov) {
> > +				RTE_LOG(DEBUG, SCHED,
> > +					"Subport %u Best-effort TC
> > oversubscription is OFF (%.4lf >= %.4lf)\n",
> > +					subport_id, subport_tc_be_rate, s-
> > >tc_ov_rate);
> > +			}
> >  		}
> >
> >  		/* Reset the pipe */
> > @@ -1460,19 +1498,22 @@ rte_sched_pipe_config(struct rte_sched_port
> > *port,
> >  		double pipe_tc_be_rate =
> >  			(double) params-
> > >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
> >  			/ (double) params->tc_period;
> > -		uint32_t tc_be_ov = s->tc_ov;
> >
> > -		s->tc_ov_n += params->tc_ov_weight;
> > -		s->tc_ov_rate += pipe_tc_be_rate;
> > -		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> > +		if (s->is_tc_ov_enabled) {
> > +			uint32_t tc_be_ov = s->tc_ov;
> > +
> > +			s->tc_ov_n += params->tc_ov_weight;
> > +			s->tc_ov_rate += pipe_tc_be_rate;
> > +			s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> >
> > -		if (s->tc_ov != tc_be_ov) {
> > -			RTE_LOG(DEBUG, SCHED,
> > -				"Subport %u Best effort TC oversubscription is
> > ON (%.4lf < %.4lf)\n",
> > -				subport_id, subport_tc_be_rate, s-
> > >tc_ov_rate);
> > +			if (s->tc_ov != tc_be_ov) {
> > +				RTE_LOG(DEBUG, SCHED,
> > +					"Subport %u Best effort TC
> > oversubscription is ON (%.4lf < %.4lf)\n",
> > +					subport_id, subport_tc_be_rate, s-
> > >tc_ov_rate);
> > +			}
> > +			p->tc_ov_period_id = s->tc_ov_period_id;
> > +			p->tc_ov_credits = s->tc_ov_wm;
> >  		}
> > -		p->tc_ov_period_id = s->tc_ov_period_id;
> > -		p->tc_ov_credits = s->tc_ov_wm;
> >  	}
> >
> >  	return 0;
> > @@ -2318,6 +2359,45 @@ grinder_credits_update(struct rte_sched_port
> > *port,
> >  	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
> >  	pipe->tb_time += n_periods * params->tb_period;
> >
> > +	/* Subport TCs */
> > +	if (unlikely(port->time >= subport->tc_time)) {
> > +		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> > +			subport->tc_credits[i] = sp->tc_credits_per_period[i];
> > +
> > +		subport->tc_time = port->time + sp->tc_period;
> > +	}
> > +
> > +	/* Pipe TCs */
> > +	if (unlikely(port->time >= pipe->tc_time)) {
> > +		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> > +			pipe->tc_credits[i] = params->tc_credits_per_period[i];
> > +		pipe->tc_time = port->time + params->tc_period;
> > +	}
> > +}
> > +
> > +static inline void
> > +grinder_credits_update_with_tc_ov(struct rte_sched_port *port,
> > +	struct rte_sched_subport *subport, uint32_t pos)
> > +{
> > +	struct rte_sched_grinder *grinder = subport->grinder + pos;
> > +	struct rte_sched_pipe *pipe = grinder->pipe;
> > +	struct rte_sched_pipe_profile *params = grinder->pipe_params;
> > +	struct rte_sched_subport_profile *sp = grinder->subport_params;
> > +	uint64_t n_periods;
> > +	uint32_t i;
> > +
> > +	/* Subport TB */
> > +	n_periods = (port->time - subport->tb_time) / sp->tb_period;
> > +	subport->tb_credits += n_periods * sp->tb_credits_per_period;
> > +	subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
> > +	subport->tb_time += n_periods * sp->tb_period;
> > +
> > +	/* Pipe TB */
> > +	n_periods = (port->time - pipe->tb_time) / params->tb_period;
> > +	pipe->tb_credits += n_periods * params->tb_credits_per_period;
> > +	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
> > +	pipe->tb_time += n_periods * params->tb_period;
> > +
> >  	/* Subport TCs */
> >  	if (unlikely(port->time >= subport->tc_time)) {
> >  		subport->tc_ov_wm =
> > @@ -2348,6 +2428,39 @@ grinder_credits_update(struct rte_sched_port
> > *port,
> >  static inline int
> >  grinder_credits_check(struct rte_sched_port *port,
> >  	struct rte_sched_subport *subport, uint32_t pos)
> > +{
> > +	struct rte_sched_grinder *grinder = subport->grinder + pos;
> > +	struct rte_sched_pipe *pipe = grinder->pipe;
> > +	struct rte_mbuf *pkt = grinder->pkt;
> > +	uint32_t tc_index = grinder->tc_index;
> > +	uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
> > +	uint64_t subport_tb_credits = subport->tb_credits;
> > +	uint64_t subport_tc_credits = subport->tc_credits[tc_index];
> > +	uint64_t pipe_tb_credits = pipe->tb_credits;
> > +	uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
> > +	int enough_credits;
> > +
> > +	/* Check pipe and subport credits */
> > +	enough_credits = (pkt_len <= subport_tb_credits) &&
> > +		(pkt_len <= subport_tc_credits) &&
> > +		(pkt_len <= pipe_tb_credits) &&
> > +		(pkt_len <= pipe_tc_credits);
> > +
> > +	if (!enough_credits)
> > +		return 0;
> > +
> > +	/* Update pipe and subport credits */
> > +	subport->tb_credits -= pkt_len;
> > +	subport->tc_credits[tc_index] -= pkt_len;
> > +	pipe->tb_credits -= pkt_len;
> > +	pipe->tc_credits[tc_index] -= pkt_len;
> > +
> > +	return 1;
> > +}
> > +
> > +static inline int
> > +grinder_credits_check_with_tc_ov(struct rte_sched_port *port,
> > +	struct rte_sched_subport *subport, uint32_t pos)
> >  {
> >  	struct rte_sched_grinder *grinder = subport->grinder + pos;
> >  	struct rte_sched_pipe *pipe = grinder->pipe;
> > @@ -2403,8 +2516,16 @@ grinder_schedule(struct rte_sched_port *port,
> >  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
> >  	uint32_t be_tc_active;
> >
> > -	if (!grinder_credits_check(port, subport, pos))
> > -		return 0;
> > +	switch (subport->is_tc_ov_enabled) {
> > +	case 1:
> > +		if (!grinder_credits_check_with_tc_ov(port, subport, pos))
> > +			return 0;
> > +		break;
> > +	case 0:
> > +		if (!grinder_credits_check(port, subport, pos))
> > +			return 0;
> > +		break;
> > +	}
> >
> >  	/* Advance port time */
> >  	port->time += pkt_len;
> > @@ -2770,7 +2891,11 @@ grinder_handle(struct rte_sched_port *port,
> >  						subport->profile;
> >
> >  		grinder_prefetch_tc_queue_arrays(subport, pos);
> > -		grinder_credits_update(port, subport, pos);
> > +
> > +		if (unlikely(subport->is_tc_ov_enabled))
> > +			grinder_credits_update_with_tc_ov(port, subport, pos);
> > +		else
> > +			grinder_credits_update(port, subport, pos);
> >
> >  		grinder->state = e_GRINDER_PREFETCH_MBUF;
> >  		return 0;
> > diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> > index 5ece64e527..94febe1d94 100644
> > --- a/lib/sched/rte_sched.h
> > +++ b/lib/sched/rte_sched.h
> > @@ -579,6 +579,24 @@ rte_sched_port_enqueue(struct rte_sched_port
> *port,
> > struct rte_mbuf **pkts, uint
> >  int
> >  rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf
> **pkts,
> > uint32_t n_pkts);
> >
> > +/**
> > + * Hierarchical scheduler subport TC OV enable/disable config.
> > + * Note that this function is safe to use at runtime
> > + * to enable/disable TC OV for subport.
> > + *
> > + * @param port
> > + *   Handle to port scheduler instance
> > + * @param subport_id
> > + *   Subport ID
> > + * @param tc_ov_enable
> > + *  Boolean flag to enable/disable TC OV
> > + * @return
> > + *   0 upon success, error code otherwise
> > + */
> > +__rte_experimental
> > +int
> > +rte_sched_subport_tc_ov_config(struct rte_sched_port *port, uint32_t
> > subport_id, bool tc_ov_enable);
> > +
> >  #ifdef __cplusplus
> >  }
> >  #endif
> > diff --git a/lib/sched/version.map b/lib/sched/version.map
> > index d22c07fc9f..c6e994d8df 100644
> > --- a/lib/sched/version.map
> > +++ b/lib/sched/version.map
> > @@ -34,4 +34,7 @@ EXPERIMENTAL {
> >  	# added in 21.11
> >  	rte_pie_rt_data_init;
> >  	rte_pie_config_init;
> > +
> > +	# added in 22.03
> > +	rte_sched_subport_tc_ov_config;
> >  };
> > --
> > 2.25.1
> >
> > --------------------------------------------------------------
> > Intel Research and Development Ireland Limited
> > Registered in Ireland
> > Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> > Registered Number: 308263
> >
> >
> > This e-mail and any attachments may contain confidential material for the
> sole
> > use of the intended recipient(s). Any review or distribution by others is
> > strictly prohibited. If you are not the intended recipient, please contact the
> > sender and delete all copies.
  
Stephen Hemminger April 27, 2022, 3:53 p.m. UTC | #3
Unaddressed
On Wed, 27 Apr 2022 08:58:48 +0000
Marcin Danilewicz <marcinx.danilewicz@intel.com> wrote:

> Added new API to enable or disable TC over subscription for best
> effort traffic class at subport level.
> Added changes after review and increased throughput.
> 
> By default TC OV is disabled.
> 
> Signed-off-by: Marcin Danilewicz <marcinx.danilewicz@intel.com>
> ---
>  lib/sched/rte_sched.c | 189 +++++++++++++++++++++++++++++++++++-------
>  lib/sched/rte_sched.h |  18 ++++
>  lib/sched/version.map |   3 +
>  3 files changed, 178 insertions(+), 32 deletions(-)
> 
> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
> index ec74bee939..6e7d81df46 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c
> @@ -213,6 +213,9 @@ struct rte_sched_subport {
>  	uint8_t *bmp_array;
>  	struct rte_mbuf **queue_array;
>  	uint8_t memory[0] __rte_cache_aligned;
> +
> +	/* TC oversubscription activation */
> +	int is_tc_ov_enabled;
>  } __rte_cache_aligned;

Since this is a flag, either use uint8_t or bool?
Also, there are holes in that data structure that should be used.


struct rte_sched_port {
	uint32_t                   n_subports_per_port;  /*     0     4 */
	uint32_t                   n_pipes_per_subport;  /*     4     4 */
	uint32_t                   n_pipes_per_subport_log2; /*     8     4 */
	uint16_t                   pipe_queue[13];       /*    12    26 */
	uint8_t                    pipe_tc[16];          /*    38    16 */
	uint8_t                    tc_queue[16];         /*    54    16 */

	/* XXX 2 bytes hole, try to pack */

	/* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
	uint32_t                   n_subport_profiles;   /*    72     4 */
	uint32_t                   n_max_subport_profiles; /*    76     4 */
	uint64_t                   rate;                 /*    80     8 */
	uint32_t                   mtu;                  /*    88     4 */
	uint32_t                   frame_overhead;       /*    92     4 */
	int                        socket;               /*    96     4 */

	/* XXX 4 bytes hole, try to pack */

	uint64_t                   time_cpu_cycles;      /*   104     8 */
	uint64_t                   time_cpu_bytes;       /*   112     8 */
	uint64_t                   time;                 /*   120     8 */
	/* --- cacheline 2 boundary (128 bytes) --- */
	struct rte_reciprocal      inv_cycles_per_byte;  /*   128     8 */

	/* XXX last struct has 2 bytes of padding */

	uint64_t                   cycles_per_byte;      /*   136     8 */
	struct rte_mbuf * *        pkts_out;             /*   144     8 */
	uint32_t                   n_pkts_out;           /*   152     4 */
	uint32_t                   subport_id;           /*   156     4 */
	struct rte_sched_subport_profile * subport_profiles; /*   160     8 */

	/* XXX 24 bytes hole, try to pack */

	/* --- cacheline 3 boundary (192 bytes) --- */
	struct rte_sched_subport * subports[] __attribute__((__aligned__(64))); /*   192     0 */

	/* size: 192, cachelines: 3, members: 22 */
	/* sum members: 162, holes: 3, sum holes: 30 */
	/* paddings: 1, sum paddings: 2 */
	/* forced alignments: 1, forced holes: 1, sum forced holes: 24 */
} __attribute__((__aligned__(64)));



>  
>  struct rte_sched_port {
> @@ -1165,6 +1168,45 @@ rte_sched_cman_config(struct rte_sched_port *port,
>  }
>  #endif
>  
> +int
> +rte_sched_subport_tc_ov_config(struct rte_sched_port *port,
> +	uint32_t subport_id,
> +	bool tc_ov_enable)
> +{
> +	struct rte_sched_subport *s;
> +	struct rte_sched_subport_profile *profile;
> +
> +	if (port == NULL) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for parameter port\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (subport_id >= port->n_subports_per_port) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for parameter subport id\n", __func__);
> +		return  -EINVAL;
> +	}
> +
> +	s = port->subports[subport_id];
> +	s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0;
> +
> +	if (s->is_tc_ov_enabled) {
> +		/* TC oversubscription */
> +		s->tc_ov_wm_min = port->mtu;
> +		s->tc_ov_period_id = 0;
> +		s->tc_ov = 0;
> +		s->tc_ov_n = 0;
> +		s->tc_ov_rate = 0;
> +
> +		profile = port->subport_profiles + s->profile;
> +		s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
> +				s->pipe_tc_be_rate_max);
> +		s->tc_ov_wm = s->tc_ov_wm_max;


All of tc_ov_XX could be a sub structure and the init might be cleaner.

> +	}
> +	return 0;
> +}
> +
>  int
>  rte_sched_subport_config(struct rte_sched_port *port,
>  	uint32_t subport_id,
> @@ -1254,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port *port,
>  		s->n_pipe_profiles = params->n_pipe_profiles;
>  		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
>  
> +		/* TC over-subscription is disabled by default */
> +		s->is_tc_ov_enabled = 0;
> +
>  #ifdef RTE_SCHED_CMAN
>  		if (params->cman_params != NULL) {
>  			s->cman_enabled = true;
> @@ -1316,13 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port *port,
>  
>  		for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
>  			s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
> -
> -		/* TC oversubscription */
> -		s->tc_ov_wm_min = port->mtu;
> -		s->tc_ov_period_id = 0;
> -		s->tc_ov = 0;
> -		s->tc_ov_n = 0;
> -		s->tc_ov_rate = 0;
>  	}
>  
>  	{
> @@ -1342,9 +1380,6 @@ rte_sched_subport_config(struct rte_sched_port *port,
>  			else
>  				profile->tc_credits_per_period[i] = 0;
>  
> -		s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
> -							s->pipe_tc_be_rate_max);
> -		s->tc_ov_wm = s->tc_ov_wm_max;
>  		s->profile = subport_profile_id;
>  
>  	}
> @@ -1417,17 +1452,20 @@ rte_sched_pipe_config(struct rte_sched_port *port,
>  		double pipe_tc_be_rate =
>  			(double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) params->tc_period;
> -		uint32_t tc_be_ov = s->tc_ov;
>  
> -		/* Unplug pipe from its subport */
> -		s->tc_ov_n -= params->tc_ov_weight;
> -		s->tc_ov_rate -= pipe_tc_be_rate;
> -		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> +		if (s->is_tc_ov_enabled) {
> +			uint32_t tc_be_ov = s->tc_ov;
>  
> -		if (s->tc_ov != tc_be_ov) {
> -			RTE_LOG(DEBUG, SCHED,
> -				"Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
> -				subport_id, subport_tc_be_rate, s->tc_ov_rate);
> +			/* Unplug pipe from its subport */
> +			s->tc_ov_n -= params->tc_ov_weight;
> +			s->tc_ov_rate -= pipe_tc_be_rate;
> +			s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> +
> +			if (s->tc_ov != tc_be_ov) {
> +				RTE_LOG(DEBUG, SCHED,
> +					"Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
> +					subport_id, subport_tc_be_rate, s->tc_ov_rate);
> +			}
>  		}
>  
>  		/* Reset the pipe */
> @@ -1460,19 +1498,22 @@ rte_sched_pipe_config(struct rte_sched_port *port,
>  		double pipe_tc_be_rate =
>  			(double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>  			/ (double) params->tc_period;
> -		uint32_t tc_be_ov = s->tc_ov;
>  
> -		s->tc_ov_n += params->tc_ov_weight;
> -		s->tc_ov_rate += pipe_tc_be_rate;
> -		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> +		if (s->is_tc_ov_enabled) {
> +			uint32_t tc_be_ov = s->tc_ov;
> +
> +			s->tc_ov_n += params->tc_ov_weight;
> +			s->tc_ov_rate += pipe_tc_be_rate;
> +			s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
>  
> -		if (s->tc_ov != tc_be_ov) {
> -			RTE_LOG(DEBUG, SCHED,
> -				"Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
> -				subport_id, subport_tc_be_rate, s->tc_ov_rate);
> +			if (s->tc_ov != tc_be_ov) {
> +				RTE_LOG(DEBUG, SCHED,
> +					"Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
> +					subport_id, subport_tc_be_rate, s->tc_ov_rate);
> +			}
> +			p->tc_ov_period_id = s->tc_ov_period_id;
> +			p->tc_ov_credits = s->tc_ov_wm;
>  		}
> -		p->tc_ov_period_id = s->tc_ov_period_id;
> -		p->tc_ov_credits = s->tc_ov_wm;
>  	}
>  
>  	return 0;
> @@ -2318,6 +2359,45 @@ grinder_credits_update(struct rte_sched_port *port,
>  	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
>  	pipe->tb_time += n_periods * params->tb_period;
>  
> +	/* Subport TCs */
> +	if (unlikely(port->time >= subport->tc_time)) {
> +		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> +			subport->tc_credits[i] = sp->tc_credits_per_period[i];
> +
> +		subport->tc_time = port->time + sp->tc_period;
> +	}
> +
> +	/* Pipe TCs */
> +	if (unlikely(port->time >= pipe->tc_time)) {
> +		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
> +			pipe->tc_credits[i] = params->tc_credits_per_period[i];
> +		pipe->tc_time = port->time + params->tc_period;
> +	}
> +}
> +
> +static inline void
> +grinder_credits_update_with_tc_ov(struct rte_sched_port *port,
> +	struct rte_sched_subport *subport, uint32_t pos)
> +{
> +	struct rte_sched_grinder *grinder = subport->grinder + pos;
> +	struct rte_sched_pipe *pipe = grinder->pipe;
> +	struct rte_sched_pipe_profile *params = grinder->pipe_params;
> +	struct rte_sched_subport_profile *sp = grinder->subport_params;
> +	uint64_t n_periods;
> +	uint32_t i;
> +
> +	/* Subport TB */
> +	n_periods = (port->time - subport->tb_time) / sp->tb_period;
> +	subport->tb_credits += n_periods * sp->tb_credits_per_period;
> +	subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
> +	subport->tb_time += n_periods * sp->tb_period;
> +
> +	/* Pipe TB */
> +	n_periods = (port->time - pipe->tb_time) / params->tb_period;
> +	pipe->tb_credits += n_periods * params->tb_credits_per_period;
> +	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
> +	pipe->tb_time += n_periods * params->tb_period;
> +
>  	/* Subport TCs */
>  	if (unlikely(port->time >= subport->tc_time)) {
>  		subport->tc_ov_wm =
> @@ -2348,6 +2428,39 @@ grinder_credits_update(struct rte_sched_port *port,
>  static inline int
>  grinder_credits_check(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport, uint32_t pos)

Either return negative errno, or make return a boolean.

> +{
> +	struct rte_sched_grinder *grinder = subport->grinder + pos;
> +	struct rte_sched_pipe *pipe = grinder->pipe;
> +	struct rte_mbuf *pkt = grinder->pkt;
> +	uint32_t tc_index = grinder->tc_index;
> +	uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
> +	uint64_t subport_tb_credits = subport->tb_credits;
> +	uint64_t subport_tc_credits = subport->tc_credits[tc_index];
> +	uint64_t pipe_tb_credits = pipe->tb_credits;
> +	uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
> +	int enough_credits;
> +
> +	/* Check pipe and subport credits */
> +	enough_credits = (pkt_len <= subport_tb_credits) &&
> +		(pkt_len <= subport_tc_credits) &&
> +		(pkt_len <= pipe_tb_credits) &&
> +		(pkt_len <= pipe_tc_credits);
> +
> +	if (!enough_credits)
> +		return 0;
> +
> +	/* Update pipe and subport credits */
> +	subport->tb_credits -= pkt_len;
> +	subport->tc_credits[tc_index] -= pkt_len;
> +	pipe->tb_credits -= pkt_len;
> +	pipe->tc_credits[tc_index] -= pkt_len;
> +
> +	return 1;
> +}
> +
> +static inline int
> +grinder_credits_check_with_tc_ov(struct rte_sched_port *port,
> +	struct rte_sched_subport *subport, uint32_t pos)
>  {
>  	struct rte_sched_grinder *grinder = subport->grinder + pos;
>  	struct rte_sched_pipe *pipe = grinder->pipe;
> @@ -2403,8 +2516,16 @@ grinder_schedule(struct rte_sched_port *port,
>  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
>  	uint32_t be_tc_active;
>  
> -	if (!grinder_credits_check(port, subport, pos))
> -		return 0;
> +	switch (subport->is_tc_ov_enabled) {
> +	case 1:
> +		if (!grinder_credits_check_with_tc_ov(port, subport, pos))
> +			return 0;
> +		break;
> +	case 0:
> +		if (!grinder_credits_check(port, subport, pos))
> +			return 0;
> +		break;
> +	}
>  
>  	/* Advance port time */
>  	port->time += pkt_len;
> @@ -2770,7 +2891,11 @@ grinder_handle(struct rte_sched_port *port,
>  						subport->profile;
>  
>  		grinder_prefetch_tc_queue_arrays(subport, pos);
> -		grinder_credits_update(port, subport, pos);
> +
> +		if (unlikely(subport->is_tc_ov_enabled))
> +			grinder_credits_update_with_tc_ov(port, subport, pos);
> +		else
> +			grinder_credits_update(port, subport, pos);
>  
>  		grinder->state = e_GRINDER_PREFETCH_MBUF;
>  		return 0;
> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> index 5ece64e527..94febe1d94 100644
> --- a/lib/sched/rte_sched.h
> +++ b/lib/sched/rte_sched.h
> @@ -579,6 +579,24 @@ rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint
>  int
>  rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
>  
> +/**
> + * Hierarchical scheduler subport TC OV enable/disable config.
> + * Note that this function is safe to use at runtime
> + * to enable/disable TC OV for subport.
> + *
> + * @param port
> + *   Handle to port scheduler instance
> + * @param subport_id
> + *   Subport ID
> + * @param tc_ov_enable
> + *  Boolean flag to enable/disable TC OV
> + * @return
> + *   0 upon success, error code otherwise
> + */
> +__rte_experimental
> +int
> +rte_sched_subport_tc_ov_config(struct rte_sched_port *port, uint32_t subport_id, bool tc_ov_enable);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/sched/version.map b/lib/sched/version.map
> index d22c07fc9f..c6e994d8df 100644
> --- a/lib/sched/version.map
> +++ b/lib/sched/version.map
> @@ -34,4 +34,7 @@ EXPERIMENTAL {
>  	# added in 21.11
>  	rte_pie_rt_data_init;
>  	rte_pie_config_init;
> +
> +	# added in 22.03
> +	rte_sched_subport_tc_ov_config;
>  };
  

Patch

diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index ec74bee939..6e7d81df46 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -213,6 +213,9 @@  struct rte_sched_subport {
 	uint8_t *bmp_array;
 	struct rte_mbuf **queue_array;
 	uint8_t memory[0] __rte_cache_aligned;
+
+	/* TC oversubscription activation */
+	int is_tc_ov_enabled;
 } __rte_cache_aligned;
 
 struct rte_sched_port {
@@ -1165,6 +1168,45 @@  rte_sched_cman_config(struct rte_sched_port *port,
 }
 #endif
 
+int
+rte_sched_subport_tc_ov_config(struct rte_sched_port *port,
+	uint32_t subport_id,
+	bool tc_ov_enable)
+{
+	struct rte_sched_subport *s;
+	struct rte_sched_subport_profile *profile;
+
+	if (port == NULL) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for parameter port\n", __func__);
+		return -EINVAL;
+	}
+
+	if (subport_id >= port->n_subports_per_port) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for parameter subport id\n", __func__);
+		return  -EINVAL;
+	}
+
+	s = port->subports[subport_id];
+	s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0;
+
+	if (s->is_tc_ov_enabled) {
+		/* TC oversubscription */
+		s->tc_ov_wm_min = port->mtu;
+		s->tc_ov_period_id = 0;
+		s->tc_ov = 0;
+		s->tc_ov_n = 0;
+		s->tc_ov_rate = 0;
+
+		profile = port->subport_profiles + s->profile;
+		s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
+				s->pipe_tc_be_rate_max);
+		s->tc_ov_wm = s->tc_ov_wm_max;
+	}
+	return 0;
+}
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1254,6 +1296,9 @@  rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
+		/* TC over-subscription is disabled by default */
+		s->is_tc_ov_enabled = 0;
+
 #ifdef RTE_SCHED_CMAN
 		if (params->cman_params != NULL) {
 			s->cman_enabled = true;
@@ -1316,13 +1361,6 @@  rte_sched_subport_config(struct rte_sched_port *port,
 
 		for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
 			s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
-
-		/* TC oversubscription */
-		s->tc_ov_wm_min = port->mtu;
-		s->tc_ov_period_id = 0;
-		s->tc_ov = 0;
-		s->tc_ov_n = 0;
-		s->tc_ov_rate = 0;
 	}
 
 	{
@@ -1342,9 +1380,6 @@  rte_sched_subport_config(struct rte_sched_port *port,
 			else
 				profile->tc_credits_per_period[i] = 0;
 
-		s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
-							s->pipe_tc_be_rate_max);
-		s->tc_ov_wm = s->tc_ov_wm_max;
 		s->profile = subport_profile_id;
 
 	}
@@ -1417,17 +1452,20 @@  rte_sched_pipe_config(struct rte_sched_port *port,
 		double pipe_tc_be_rate =
 			(double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
 			/ (double) params->tc_period;
-		uint32_t tc_be_ov = s->tc_ov;
 
-		/* Unplug pipe from its subport */
-		s->tc_ov_n -= params->tc_ov_weight;
-		s->tc_ov_rate -= pipe_tc_be_rate;
-		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
+		if (s->is_tc_ov_enabled) {
+			uint32_t tc_be_ov = s->tc_ov;
 
-		if (s->tc_ov != tc_be_ov) {
-			RTE_LOG(DEBUG, SCHED,
-				"Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
-				subport_id, subport_tc_be_rate, s->tc_ov_rate);
+			/* Unplug pipe from its subport */
+			s->tc_ov_n -= params->tc_ov_weight;
+			s->tc_ov_rate -= pipe_tc_be_rate;
+			s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
+
+			if (s->tc_ov != tc_be_ov) {
+				RTE_LOG(DEBUG, SCHED,
+					"Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
+					subport_id, subport_tc_be_rate, s->tc_ov_rate);
+			}
 		}
 
 		/* Reset the pipe */
@@ -1460,19 +1498,22 @@  rte_sched_pipe_config(struct rte_sched_port *port,
 		double pipe_tc_be_rate =
 			(double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
 			/ (double) params->tc_period;
-		uint32_t tc_be_ov = s->tc_ov;
 
-		s->tc_ov_n += params->tc_ov_weight;
-		s->tc_ov_rate += pipe_tc_be_rate;
-		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
+		if (s->is_tc_ov_enabled) {
+			uint32_t tc_be_ov = s->tc_ov;
+
+			s->tc_ov_n += params->tc_ov_weight;
+			s->tc_ov_rate += pipe_tc_be_rate;
+			s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
 
-		if (s->tc_ov != tc_be_ov) {
-			RTE_LOG(DEBUG, SCHED,
-				"Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
-				subport_id, subport_tc_be_rate, s->tc_ov_rate);
+			if (s->tc_ov != tc_be_ov) {
+				RTE_LOG(DEBUG, SCHED,
+					"Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
+					subport_id, subport_tc_be_rate, s->tc_ov_rate);
+			}
+			p->tc_ov_period_id = s->tc_ov_period_id;
+			p->tc_ov_credits = s->tc_ov_wm;
 		}
-		p->tc_ov_period_id = s->tc_ov_period_id;
-		p->tc_ov_credits = s->tc_ov_wm;
 	}
 
 	return 0;
@@ -2318,6 +2359,45 @@  grinder_credits_update(struct rte_sched_port *port,
 	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
 	pipe->tb_time += n_periods * params->tb_period;
 
+	/* Subport TCs */
+	if (unlikely(port->time >= subport->tc_time)) {
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
+			subport->tc_credits[i] = sp->tc_credits_per_period[i];
+
+		subport->tc_time = port->time + sp->tc_period;
+	}
+
+	/* Pipe TCs */
+	if (unlikely(port->time >= pipe->tc_time)) {
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
+			pipe->tc_credits[i] = params->tc_credits_per_period[i];
+		pipe->tc_time = port->time + params->tc_period;
+	}
+}
+
+static inline void
+grinder_credits_update_with_tc_ov(struct rte_sched_port *port,
+	struct rte_sched_subport *subport, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = subport->grinder + pos;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_sched_pipe_profile *params = grinder->pipe_params;
+	struct rte_sched_subport_profile *sp = grinder->subport_params;
+	uint64_t n_periods;
+	uint32_t i;
+
+	/* Subport TB */
+	n_periods = (port->time - subport->tb_time) / sp->tb_period;
+	subport->tb_credits += n_periods * sp->tb_credits_per_period;
+	subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
+	subport->tb_time += n_periods * sp->tb_period;
+
+	/* Pipe TB */
+	n_periods = (port->time - pipe->tb_time) / params->tb_period;
+	pipe->tb_credits += n_periods * params->tb_credits_per_period;
+	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
+	pipe->tb_time += n_periods * params->tb_period;
+
 	/* Subport TCs */
 	if (unlikely(port->time >= subport->tc_time)) {
 		subport->tc_ov_wm =
@@ -2348,6 +2428,39 @@  grinder_credits_update(struct rte_sched_port *port,
 static inline int
 grinder_credits_check(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = subport->grinder + pos;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_mbuf *pkt = grinder->pkt;
+	uint32_t tc_index = grinder->tc_index;
+	uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
+	uint64_t subport_tb_credits = subport->tb_credits;
+	uint64_t subport_tc_credits = subport->tc_credits[tc_index];
+	uint64_t pipe_tb_credits = pipe->tb_credits;
+	uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
+	int enough_credits;
+
+	/* Check pipe and subport credits */
+	enough_credits = (pkt_len <= subport_tb_credits) &&
+		(pkt_len <= subport_tc_credits) &&
+		(pkt_len <= pipe_tb_credits) &&
+		(pkt_len <= pipe_tc_credits);
+
+	if (!enough_credits)
+		return 0;
+
+	/* Update pipe and subport credits */
+	subport->tb_credits -= pkt_len;
+	subport->tc_credits[tc_index] -= pkt_len;
+	pipe->tb_credits -= pkt_len;
+	pipe->tc_credits[tc_index] -= pkt_len;
+
+	return 1;
+}
+
+static inline int
+grinder_credits_check_with_tc_ov(struct rte_sched_port *port,
+	struct rte_sched_subport *subport, uint32_t pos)
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_pipe *pipe = grinder->pipe;
@@ -2403,8 +2516,16 @@  grinder_schedule(struct rte_sched_port *port,
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
 
-	if (!grinder_credits_check(port, subport, pos))
-		return 0;
+	switch (subport->is_tc_ov_enabled) {
+	case 1:
+		if (!grinder_credits_check_with_tc_ov(port, subport, pos))
+			return 0;
+		break;
+	case 0:
+		if (!grinder_credits_check(port, subport, pos))
+			return 0;
+		break;
+	}
 
 	/* Advance port time */
 	port->time += pkt_len;
@@ -2770,7 +2891,11 @@  grinder_handle(struct rte_sched_port *port,
 						subport->profile;
 
 		grinder_prefetch_tc_queue_arrays(subport, pos);
-		grinder_credits_update(port, subport, pos);
+
+		if (unlikely(subport->is_tc_ov_enabled))
+			grinder_credits_update_with_tc_ov(port, subport, pos);
+		else
+			grinder_credits_update(port, subport, pos);
 
 		grinder->state = e_GRINDER_PREFETCH_MBUF;
 		return 0;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index 5ece64e527..94febe1d94 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -579,6 +579,24 @@  rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint
 int
 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
 
+/**
+ * Hierarchical scheduler subport TC OV enable/disable config.
+ * Note that this function is safe to use at runtime
+ * to enable/disable TC OV for subport.
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param subport_id
+ *   Subport ID
+ * @param tc_ov_enable
+ *  Boolean flag to enable/disable TC OV
+ * @return
+ *   0 upon success, error code otherwise
+ */
+__rte_experimental
+int
+rte_sched_subport_tc_ov_config(struct rte_sched_port *port, uint32_t subport_id, bool tc_ov_enable);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/sched/version.map b/lib/sched/version.map
index d22c07fc9f..c6e994d8df 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -34,4 +34,7 @@  EXPERIMENTAL {
 	# added in 21.11
 	rte_pie_rt_data_init;
 	rte_pie_config_init;
+
+	# added in 22.03
+	rte_sched_subport_tc_ov_config;
 };