[v2,1/6] ethdev: support setting lanes

Message ID 20240322070923.244417-2-huangdengdui@huawei.com (mailing list archive)
State Under Review
Delegated to: Ferruh Yigit
Headers
Series support setting lanes |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation warning apply patch failure
ci/iol-testing warning apply patch failure

Commit Message

huangdengdui March 22, 2024, 7:09 a.m. UTC
  Some speeds can be achieved with different number of lanes. For example,
100Gbps can be achieved using two lanes of 50Gbps or four lanes of 25Gbps.
When use different lanes, the port cannot be up. This patch add support
setting lanes and report lanes.

In addition, add a device capability RTE_ETH_DEV_CAPA_SETTING_LANES
When the device does not support it, if a speed supports different
numbers of lanes, the application does not knowe which the lane number
are used by the device.

Signed-off-by: Dengdui Huang <huangdengdui@huawei.com>
---
 doc/guides/rel_notes/release_24_03.rst |   6 +
 drivers/net/bnxt/bnxt_ethdev.c         |   3 +-
 drivers/net/hns3/hns3_ethdev.c         |   1 +
 lib/ethdev/ethdev_linux_ethtool.c      | 208 ++++++++++++-------------
 lib/ethdev/ethdev_private.h            |   4 +
 lib/ethdev/ethdev_trace.h              |   4 +-
 lib/ethdev/meson.build                 |   2 +
 lib/ethdev/rte_ethdev.c                |  85 +++++++---
 lib/ethdev/rte_ethdev.h                |  75 ++++++---
 lib/ethdev/version.map                 |   6 +
 10 files changed, 250 insertions(+), 144 deletions(-)
  

Comments

Thomas Monjalon March 22, 2024, 1:58 p.m. UTC | #1
22/03/2024 08:09, Dengdui Huang:
> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */

I don't think it is a good idea to make this more complex.
It brings nothing as far as I can see, compared to having speed and lanes separated.
Can we have lanes information a separate value? no need for bitmask.
  
Ajit Khaparde March 22, 2024, 3:15 p.m. UTC | #2
On Fri, Mar 22, 2024 at 6:58 AM Thomas Monjalon <thomas@monjalon.net> wrote:
>
> 22/03/2024 08:09, Dengdui Huang:
> > -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
> > -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
> > -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
> > -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
> > -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
> > -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
> > -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
> > -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
> > -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
> > +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
> > +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
> > +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
> > +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
> > +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
> > +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
> > +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
> > +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
> > +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
> > +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
> > +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
> > +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
> > +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
> > +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
> > +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
>
> I don't think it is a good idea to make this more complex.
> It brings nothing as far as I can see, compared to having speed and lanes separated.
> Can we have lanes information a separate value? no need for bitmask.
I agree.

>
>
  
Tyler Retzlaff March 22, 2024, 5:32 p.m. UTC | #3
On Fri, Mar 22, 2024 at 08:15:00AM -0700, Ajit Khaparde wrote:
> On Fri, Mar 22, 2024 at 6:58 AM Thomas Monjalon <thomas@monjalon.net> wrote:
> >
> > 22/03/2024 08:09, Dengdui Huang:
> > > -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
> > > -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
> > > -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
> > > -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
> > > -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
> > > -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
> > > -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
> > > -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
> > > -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
> > > +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
> > > +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
> > > +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
> > > +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
> > > +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
> > > +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
> > > +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
> > > +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
> > > +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
> > > +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
> > > +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
> > > +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
> > > +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
> > > +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
> > > +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
> >
> > I don't think it is a good idea to make this more complex.
> > It brings nothing as far as I can see, compared to having speed and lanes separated.
> > Can we have lanes information a separate value? no need for bitmask.
> I agree.

+1 api design coupling the two together is definitely undesirable it
seems like half the time you end up with redundant RTE_ETH_LANES_UNKNOWN.

> 
> >
> >
  
Damodharam Ammepalli March 22, 2024, 10:30 p.m. UTC | #4
On Fri, Mar 22, 2024 at 10:32 AM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> On Fri, Mar 22, 2024 at 08:15:00AM -0700, Ajit Khaparde wrote:
> > On Fri, Mar 22, 2024 at 6:58 AM Thomas Monjalon <thomas@monjalon.net> wrote:
> > >
> > > 22/03/2024 08:09, Dengdui Huang:
> > > > -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
> > > > -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
> > > > -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
> > > > -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
> > > > -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
> > > > -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
> > > > -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
> > > > -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
> > > > -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
> > > > +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
> > > > +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
> > > > +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
> > > > +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
> > > > +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
> > > > +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
> > > > +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
> > > > +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
> > > > +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
> > > > +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
> > > > +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
> > > > +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
> > > > +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
> > > > +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
> > > > +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
> > >
> > > I don't think it is a good idea to make this more complex.
> > > It brings nothing as far as I can see, compared to having speed and lanes separated.
> > > Can we have lanes information a separate value? no need for bitmask.
> > I agree.
>
> +1 api design coupling the two together is definitely undesirable it
> seems like half the time you end up with redundant RTE_ETH_LANES_UNKNOWN.

https://patchwork.dpdk.org/project/dpdk/list/?series=31606
This is how we have implemented internally and we could use this as a reference.
Ethtool 6.x allows lanes configuration this way.
ethtool -s ens6f0np0 speed <speed> duplex full autoneg off lanes < int >
>
> >
> > >
> > >
>
>
  
huangdengdui March 25, 2024, 6:24 a.m. UTC | #5
On 2024/3/22 21:58, Thomas Monjalon wrote:
> 22/03/2024 08:09, Dengdui Huang:
>> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
>> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
>> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
>> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
>> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
>> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
>> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
>> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
>> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
>> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
>> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
>> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
>> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
>> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
>> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
>> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
>> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
>> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
>> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
>> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
>> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
>> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
>> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
>> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
> 
> I don't think it is a good idea to make this more complex.
> It brings nothing as far as I can see, compared to having speed and lanes separated.
> Can we have lanes information a separate value? no need for bitmask.
> 
Hi,Thomas, Ajit, roretzla, damodharam

I also considered the option at the beginning of the design.
But this option is not used due to the following reasons:
1. For the user, ethtool couples speed and lanes.
The result of querying the NIC capability is as follows:
Supported link modes:
        100000baseSR4/Full
        100000baseSR2/Full
The NIC capability is configured as follows:
ethtool -s eth1 speed 100000 lanes 4 autoneg off
ethtool -s eth1 speed 100000 lanes 2 autoneg off

Therefore, users are more accustomed to the coupling of speed and lanes.

2. For the PHY, When the physical layer capability is configured through the MDIO,
the speed and lanes are also coupled.
For example:
Table 45–7—PMA/PMD control 2 register bit definitions[1]
PMA/PMD type selection
                        1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
                        0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD

Therefore, coupling speeds and lanes is easier to understand.
And it is easier for the driver to report the support lanes.

In addition, the code implementation is compatible with the old version.
When the driver does not support the lanes setting, the code does not need to be modified.

So I think the speed and lanes coupling is better.

[1]
https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9844436
  
Thomas Monjalon March 25, 2024, 9:30 a.m. UTC | #6
25/03/2024 07:24, huangdengdui:
> 
> On 2024/3/22 21:58, Thomas Monjalon wrote:
> > 22/03/2024 08:09, Dengdui Huang:
> >> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
> >> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
> >> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
> >> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
> >> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
> >> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
> >> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
> >> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
> >> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
> >> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
> >> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
> >> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
> >> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
> >> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
> >> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
> >> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
> >> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
> >> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
> >> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
> >> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
> >> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
> >> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
> >> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
> >> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
> > 
> > I don't think it is a good idea to make this more complex.
> > It brings nothing as far as I can see, compared to having speed and lanes separated.
> > Can we have lanes information a separate value? no need for bitmask.
> > 
> Hi,Thomas, Ajit, roretzla, damodharam
> 
> I also considered the option at the beginning of the design.
> But this option is not used due to the following reasons:
> 1. For the user, ethtool couples speed and lanes.
> The result of querying the NIC capability is as follows:
> Supported link modes:
>         100000baseSR4/Full
>         100000baseSR2/Full
> The NIC capability is configured as follows:
> ethtool -s eth1 speed 100000 lanes 4 autoneg off
> ethtool -s eth1 speed 100000 lanes 2 autoneg off
> 
> Therefore, users are more accustomed to the coupling of speed and lanes.
> 
> 2. For the PHY, When the physical layer capability is configured through the MDIO,
> the speed and lanes are also coupled.
> For example:
> Table 45–7—PMA/PMD control 2 register bit definitions[1]
> PMA/PMD type selection
>                         1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
>                         0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD
> 
> Therefore, coupling speeds and lanes is easier to understand.
> And it is easier for the driver to report the support lanes.
> 
> In addition, the code implementation is compatible with the old version.
> When the driver does not support the lanes setting, the code does not need to be modified.
> 
> So I think the speed and lanes coupling is better.

I don't think so.
You are mixing hardware implementation, user tool, and API.
Having a separate and simple API is cleaner and not more difficult to handle
in some get/set style functions.
  
Damodharam Ammepalli March 25, 2024, 9:14 p.m. UTC | #7
On Mon, Mar 25, 2024 at 2:30 AM Thomas Monjalon <thomas@monjalon.net> wrote:
>
> 25/03/2024 07:24, huangdengdui:
> >
> > On 2024/3/22 21:58, Thomas Monjalon wrote:
> > > 22/03/2024 08:09, Dengdui Huang:
> > >> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
> > >> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
> > >> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
> > >> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
> > >> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
> > >> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
> > >> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
> > >> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
> > >> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
> > >> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
> > >> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
> > >> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
> > >> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
> > >> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
> > >> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
> > >> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
> > >> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
> > >> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
> > >> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
> > >> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
> > >> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
> > >> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
> > >> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
> > >> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
> > >
> > > I don't think it is a good idea to make this more complex.
> > > It brings nothing as far as I can see, compared to having speed and lanes separated.
> > > Can we have lanes information a separate value? no need for bitmask.
> > >
> > Hi,Thomas, Ajit, roretzla, damodharam
> >
> > I also considered the option at the beginning of the design.
> > But this option is not used due to the following reasons:
> > 1. For the user, ethtool couples speed and lanes.
> > The result of querying the NIC capability is as follows:
> > Supported link modes:
> >         100000baseSR4/Full
> >         100000baseSR2/Full
> > The NIC capability is configured as follows:
> > ethtool -s eth1 speed 100000 lanes 4 autoneg off
> > ethtool -s eth1 speed 100000 lanes 2 autoneg off
> >
> > Therefore, users are more accustomed to the coupling of speed and lanes.
> >
> > 2. For the PHY, When the physical layer capability is configured through the MDIO,
> > the speed and lanes are also coupled.
> > For example:
> > Table 45–7—PMA/PMD control 2 register bit definitions[1]
> > PMA/PMD type selection
> >                         1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
> >                         0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD
> >
> > Therefore, coupling speeds and lanes is easier to understand.
> > And it is easier for the driver to report the support lanes.
> >
> > In addition, the code implementation is compatible with the old version.
> > When the driver does not support the lanes setting, the code does not need to be modified.
> >
> > So I think the speed and lanes coupling is better.
>
> I don't think so.
> You are mixing hardware implementation, user tool, and API.
> Having a separate and simple API is cleaner and not more difficult to handle
> in some get/set style functions.
>
>
>

Agree with Thomas. DPDK lib/apps should be independent of HW implementation.
  
lihuisong (C) March 26, 2024, 1:42 a.m. UTC | #8
在 2024/3/25 17:30, Thomas Monjalon 写道:
> 25/03/2024 07:24, huangdengdui:
>> On 2024/3/22 21:58, Thomas Monjalon wrote:
>>> 22/03/2024 08:09, Dengdui Huang:
>>>> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
>>>> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
>>>> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
>>>> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
>>>> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
>>>> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
>>>> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
>>>> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
>>>> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
>>>> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
>>>> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
>>>> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
>>>> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
>>>> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
>>>> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
>>>> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
>>>> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
>>>> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
>>>> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
>>>> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
>>>> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
>>>> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
>>>> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
>>>> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
>>> I don't think it is a good idea to make this more complex.
>>> It brings nothing as far as I can see, compared to having speed and lanes separated.
>>> Can we have lanes information a separate value? no need for bitmask.
>>>
>> Hi,Thomas, Ajit, roretzla, damodharam
>>
>> I also considered the option at the beginning of the design.
>> But this option is not used due to the following reasons:
>> 1. For the user, ethtool couples speed and lanes.
>> The result of querying the NIC capability is as follows:
>> Supported link modes:
>>          100000baseSR4/Full
>>          100000baseSR2/Full
>> The NIC capability is configured as follows:
>> ethtool -s eth1 speed 100000 lanes 4 autoneg off
>> ethtool -s eth1 speed 100000 lanes 2 autoneg off
>>
>> Therefore, users are more accustomed to the coupling of speed and lanes.
>>
>> 2. For the PHY, When the physical layer capability is configured through the MDIO,
>> the speed and lanes are also coupled.
>> For example:
>> Table 45–7—PMA/PMD control 2 register bit definitions[1]
>> PMA/PMD type selection
>>                          1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
>>                          0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD
>>
>> Therefore, coupling speeds and lanes is easier to understand.
>> And it is easier for the driver to report the support lanes.
>>
>> In addition, the code implementation is compatible with the old version.
>> When the driver does not support the lanes setting, the code does not need to be modified.
>>
>> So I think the speed and lanes coupling is better.
> I don't think so.
> You are mixing hardware implementation, user tool, and API.
> Having a separate and simple API is cleaner and not more difficult to handle
> in some get/set style functions.
Having a separate and simple API is cleaner. It's good.
But supported lane capabilities have a lot to do with the specified 
speed. This is determined by releated specification.
If we add a separate API for speed lanes, it probably is hard to check 
the validity of the configuration for speed and lanes.
And the setting lane API sepparated from speed is not good for 
uniforming all PMD's behavior in ethdev layer.

The patch[1] is an example for this separate API.
I think it is not very good. It cannot tell user and PMD the follow points:
1) user don't know what lanes should or can be set for a specified speed 
on one NIC.
2) how should PMD do for a supported lanes in their HW?

Anyway, if we add setting speed lanes feature, we must report and set 
speed and lanes capabilities for user well.
otherwise, user will be more confused.

[1] https://patchwork.dpdk.org/project/dpdk/list/?series=31606

BR,
/Huisong
>
>
>
> .
  
Ajit Khaparde March 26, 2024, 3:45 a.m. UTC | #9
On Mon, Mar 25, 2024 at 6:42 PM lihuisong (C) <lihuisong@huawei.com> wrote:
>
>
> 在 2024/3/25 17:30, Thomas Monjalon 写道:
> > 25/03/2024 07:24, huangdengdui:
> >> On 2024/3/22 21:58, Thomas Monjalon wrote:
> >>> 22/03/2024 08:09, Dengdui Huang:
> >>>> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
> >>>> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
> >>>> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
> >>>> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
> >>>> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
> >>>> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
> >>> I don't think it is a good idea to make this more complex.
> >>> It brings nothing as far as I can see, compared to having speed and lanes separated.
> >>> Can we have lanes information a separate value? no need for bitmask.
> >>>
> >> Hi,Thomas, Ajit, roretzla, damodharam
> >>
> >> I also considered the option at the beginning of the design.
> >> But this option is not used due to the following reasons:
> >> 1. For the user, ethtool couples speed and lanes.
> >> The result of querying the NIC capability is as follows:
> >> Supported link modes:
> >>          100000baseSR4/Full
> >>          100000baseSR2/Full
So if DPDK provides a get lanes API, it should be able to tell the
number of lanes supported.
After that, the user should be able to pick one of the supported lane counts?

> >> The NIC capability is configured as follows:
> >> ethtool -s eth1 speed 100000 lanes 4 autoneg off
> >> ethtool -s eth1 speed 100000 lanes 2 autoneg off
> >>
> >> Therefore, users are more accustomed to the coupling of speed and lanes.
> >>
> >> 2. For the PHY, When the physical layer capability is configured through the MDIO,
> >> the speed and lanes are also coupled.
> >> For example:
> >> Table 45–7—PMA/PMD control 2 register bit definitions[1]
> >> PMA/PMD type selection
> >>                          1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
> >>                          0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD
> >>
> >> Therefore, coupling speeds and lanes is easier to understand.
> >> And it is easier for the driver to report the support lanes.
> >>
> >> In addition, the code implementation is compatible with the old version.
> >> When the driver does not support the lanes setting, the code does not need to be modified.
> >>
> >> So I think the speed and lanes coupling is better.
> > I don't think so.
> > You are mixing hardware implementation, user tool, and API.
> > Having a separate and simple API is cleaner and not more difficult to handle
> > in some get/set style functions.
> Having a separate and simple API is cleaner. It's good.
> But supported lane capabilities have a lot to do with the specified
> speed. This is determined by releated specification.
> If we add a separate API for speed lanes, it probably is hard to check
> the validity of the configuration for speed and lanes.
> And the setting lane API sepparated from speed is not good for
> uniforming all PMD's behavior in ethdev layer.
>
> The patch[1] is an example for this separate API.
> I think it is not very good. It cannot tell user and PMD the follow points:
> 1) user don't know what lanes should or can be set for a specified speed
> on one NIC.
> 2) how should PMD do for a supported lanes in their HW?
>
> Anyway, if we add setting speed lanes feature, we must report and set
> speed and lanes capabilities for user well.
> otherwise, user will be more confused.
>
> [1] https://patchwork.dpdk.org/project/dpdk/list/?series=31606
>
> BR,
> /Huisong
> >
> >
> >
> > .
  
Thomas Monjalon March 26, 2024, 10:30 a.m. UTC | #10
26/03/2024 02:42, lihuisong (C):
> 
> 在 2024/3/25 17:30, Thomas Monjalon 写道:
> > 25/03/2024 07:24, huangdengdui:
> >> On 2024/3/22 21:58, Thomas Monjalon wrote:
> >>> 22/03/2024 08:09, Dengdui Huang:
> >>>> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
> >>>> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
> >>>> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
> >>>> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
> >>>> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
> >>>> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
> >>>> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
> >>>> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
> >>> I don't think it is a good idea to make this more complex.
> >>> It brings nothing as far as I can see, compared to having speed and lanes separated.
> >>> Can we have lanes information a separate value? no need for bitmask.
> >>>
> >> Hi,Thomas, Ajit, roretzla, damodharam
> >>
> >> I also considered the option at the beginning of the design.
> >> But this option is not used due to the following reasons:
> >> 1. For the user, ethtool couples speed and lanes.
> >> The result of querying the NIC capability is as follows:
> >> Supported link modes:
> >>          100000baseSR4/Full
> >>          100000baseSR2/Full
> >> The NIC capability is configured as follows:
> >> ethtool -s eth1 speed 100000 lanes 4 autoneg off
> >> ethtool -s eth1 speed 100000 lanes 2 autoneg off
> >>
> >> Therefore, users are more accustomed to the coupling of speed and lanes.
> >>
> >> 2. For the PHY, When the physical layer capability is configured through the MDIO,
> >> the speed and lanes are also coupled.
> >> For example:
> >> Table 45–7—PMA/PMD control 2 register bit definitions[1]
> >> PMA/PMD type selection
> >>                          1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
> >>                          0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD
> >>
> >> Therefore, coupling speeds and lanes is easier to understand.
> >> And it is easier for the driver to report the support lanes.
> >>
> >> In addition, the code implementation is compatible with the old version.
> >> When the driver does not support the lanes setting, the code does not need to be modified.
> >>
> >> So I think the speed and lanes coupling is better.
> > I don't think so.
> > You are mixing hardware implementation, user tool, and API.
> > Having a separate and simple API is cleaner and not more difficult to handle
> > in some get/set style functions.
> Having a separate and simple API is cleaner. It's good.
> But supported lane capabilities have a lot to do with the specified 
> speed. This is determined by releated specification.
> If we add a separate API for speed lanes, it probably is hard to check 
> the validity of the configuration for speed and lanes.
> And the setting lane API sepparated from speed is not good for 
> uniforming all PMD's behavior in ethdev layer.

Please let's be more specific.
There are 3 needs:
	- set PHY lane config
	- get PHY lane config
	- get PHY lane capabilities

There is no problem providing a function to get the number of PHY lanes.
It is possible to set PHY lanes number after defining a fixed speed.

> The patch[1] is an example for this separate API.
> I think it is not very good. It cannot tell user and PMD the follow points:
> 1) user don't know what lanes should or can be set for a specified speed 
> on one NIC.

This is about capabilities.
Can we say a HW will support a maximum number of PHY lanes in general?
We may need to associate the maximum speed per lane?
Do we really need to associate PHY lane and PHY speed numbers for capabilities?
Example: if a HW supports 100G-4-lanes and 200G-2-lanes,
may we assume it is also supporting 200G-4-lanes?

> 2) how should PMD do for a supported lanes in their HW?

I don't understand this question. Please rephrase.

> Anyway, if we add setting speed lanes feature, we must report and set 
> speed and lanes capabilities for user well.
> otherwise, user will be more confused.

Well is not necessarily exposing all raw combinations as ethtool does.

> [1] https://patchwork.dpdk.org/project/dpdk/list/?series=31606
  
lihuisong (C) March 26, 2024, 11:15 a.m. UTC | #11
在 2024/3/26 18:30, Thomas Monjalon 写道:
> 26/03/2024 02:42, lihuisong (C):
>> 在 2024/3/25 17:30, Thomas Monjalon 写道:
>>> 25/03/2024 07:24, huangdengdui:
>>>> On 2024/3/22 21:58, Thomas Monjalon wrote:
>>>>> 22/03/2024 08:09, Dengdui Huang:
>>>>>> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
>>>>>> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
>>>>>> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
>>>>>> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
>>>>>> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
>>>>>> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
>>>>>> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
>>>>>> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
>>>>>> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
>>>>>> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
>>>>>> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
>>>>>> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
>>>>>> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
>>>>>> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
>>>>>> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
>>>>>> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
>>>>>> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
>>>>>> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
>>>>>> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
>>>>>> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
>>>>>> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
>>>>>> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
>>>>>> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
>>>>>> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
>>>>> I don't think it is a good idea to make this more complex.
>>>>> It brings nothing as far as I can see, compared to having speed and lanes separated.
>>>>> Can we have lanes information a separate value? no need for bitmask.
>>>>>
>>>> Hi,Thomas, Ajit, roretzla, damodharam
>>>>
>>>> I also considered the option at the beginning of the design.
>>>> But this option is not used due to the following reasons:
>>>> 1. For the user, ethtool couples speed and lanes.
>>>> The result of querying the NIC capability is as follows:
>>>> Supported link modes:
>>>>           100000baseSR4/Full
>>>>           100000baseSR2/Full
>>>> The NIC capability is configured as follows:
>>>> ethtool -s eth1 speed 100000 lanes 4 autoneg off
>>>> ethtool -s eth1 speed 100000 lanes 2 autoneg off
>>>>
>>>> Therefore, users are more accustomed to the coupling of speed and lanes.
>>>>
>>>> 2. For the PHY, When the physical layer capability is configured through the MDIO,
>>>> the speed and lanes are also coupled.
>>>> For example:
>>>> Table 45–7—PMA/PMD control 2 register bit definitions[1]
>>>> PMA/PMD type selection
>>>>                           1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
>>>>                           0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD
>>>>
>>>> Therefore, coupling speeds and lanes is easier to understand.
>>>> And it is easier for the driver to report the support lanes.
>>>>
>>>> In addition, the code implementation is compatible with the old version.
>>>> When the driver does not support the lanes setting, the code does not need to be modified.
>>>>
>>>> So I think the speed and lanes coupling is better.
>>> I don't think so.
>>> You are mixing hardware implementation, user tool, and API.
>>> Having a separate and simple API is cleaner and not more difficult to handle
>>> in some get/set style functions.
>> Having a separate and simple API is cleaner. It's good.
>> But supported lane capabilities have a lot to do with the specified
>> speed. This is determined by releated specification.
>> If we add a separate API for speed lanes, it probably is hard to check
>> the validity of the configuration for speed and lanes.
>> And the setting lane API sepparated from speed is not good for
>> uniforming all PMD's behavior in ethdev layer.
> Please let's be more specific.
> There are 3 needs:
> 	- set PHY lane config
> 	- get PHY lane config
> 	- get PHY lane capabilities
IMO, this lane capabilities should be reported based on supported speed 
capabilities.
>
> There is no problem providing a function to get the number of PHY lanes.
> It is possible to set PHY lanes number after defining a fixed speed.
yes it's ok.
>
>> The patch[1] is an example for this separate API.
>> I think it is not very good. It cannot tell user and PMD the follow points:
>> 1) user don't know what lanes should or can be set for a specified speed
>> on one NIC.
> This is about capabilities.
> Can we say a HW will support a maximum number of PHY lanes in general?
> We may need to associate the maximum speed per lane?
> Do we really need to associate PHY lane and PHY speed numbers for capabilities?
Personally, it should contain the below releationship at least.
speed 10G  --> 1lane | 4lane
speed 100G  --> 2lane | 4lane
> Example: if a HW supports 100G-4-lanes and 200G-2-lanes,
> may we assume it is also supporting 200G-4-lanes?
I think we cannot assume that NIC also support 200G-4-lanes.
Beause it has a lot to do with HW design.
>
>> 2) how should PMD do for a supported lanes in their HW?
> I don't understand this question. Please rephrase.
I mean that PMD don't know set how many lanes when the lanes from user 
is not supported on a fixed speed by PMD.
So ethdev layer should limit the avaiable lane number based on a fixed 
speed.
>
>> Anyway, if we add setting speed lanes feature, we must report and set
>> speed and lanes capabilities for user well.
>> otherwise, user will be more confused.
> Well is not necessarily exposing all raw combinations as ethtool does.
Agreed.
>
>> [1] https://patchwork.dpdk.org/project/dpdk/list/?series=31606
>
>
> .
  
Ajit Khaparde March 26, 2024, 1:47 p.m. UTC | #12
On Tue, Mar 26, 2024 at 4:15 AM lihuisong (C) <lihuisong@huawei.com> wrote:
>
>
> 在 2024/3/26 18:30, Thomas Monjalon 写道:
> > 26/03/2024 02:42, lihuisong (C):
> >> 在 2024/3/25 17:30, Thomas Monjalon 写道:
> >>> 25/03/2024 07:24, huangdengdui:
> >>>> On 2024/3/22 21:58, Thomas Monjalon wrote:
> >>>>> 22/03/2024 08:09, Dengdui Huang:
> >>>>>> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
> >>>>>> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
> >>>>>> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
> >>>>>> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
> >>>>>> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
> >>>>>> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
> >>>>>> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
> >>>>>> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
> >>>>>> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
> >>>>>> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
> >>>>>> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
> >>>>>> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
> >>>>>> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
> >>>>>> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
> >>>>>> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
> >>>>>> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
> >>>>>> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
> >>>>>> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
> >>>>>> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
> >>>>>> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
> >>>>>> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
> >>>>>> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
> >>>>>> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
> >>>>>> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
> >>>>> I don't think it is a good idea to make this more complex.
> >>>>> It brings nothing as far as I can see, compared to having speed and lanes separated.
> >>>>> Can we have lanes information a separate value? no need for bitmask.
> >>>>>
> >>>> Hi,Thomas, Ajit, roretzla, damodharam
> >>>>
> >>>> I also considered the option at the beginning of the design.
> >>>> But this option is not used due to the following reasons:
> >>>> 1. For the user, ethtool couples speed and lanes.
> >>>> The result of querying the NIC capability is as follows:
> >>>> Supported link modes:
> >>>>           100000baseSR4/Full
> >>>>           100000baseSR2/Full
> >>>> The NIC capability is configured as follows:
> >>>> ethtool -s eth1 speed 100000 lanes 4 autoneg off
> >>>> ethtool -s eth1 speed 100000 lanes 2 autoneg off
> >>>>
> >>>> Therefore, users are more accustomed to the coupling of speed and lanes.
> >>>>
> >>>> 2. For the PHY, When the physical layer capability is configured through the MDIO,
> >>>> the speed and lanes are also coupled.
> >>>> For example:
> >>>> Table 45–7—PMA/PMD control 2 register bit definitions[1]
> >>>> PMA/PMD type selection
> >>>>                           1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
> >>>>                           0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD
> >>>>
> >>>> Therefore, coupling speeds and lanes is easier to understand.
> >>>> And it is easier for the driver to report the support lanes.
> >>>>
> >>>> In addition, the code implementation is compatible with the old version.
> >>>> When the driver does not support the lanes setting, the code does not need to be modified.
> >>>>
> >>>> So I think the speed and lanes coupling is better.
> >>> I don't think so.
> >>> You are mixing hardware implementation, user tool, and API.
> >>> Having a separate and simple API is cleaner and not more difficult to handle
> >>> in some get/set style functions.
> >> Having a separate and simple API is cleaner. It's good.
> >> But supported lane capabilities have a lot to do with the specified
> >> speed. This is determined by releated specification.
> >> If we add a separate API for speed lanes, it probably is hard to check
> >> the validity of the configuration for speed and lanes.
> >> And the setting lane API sepparated from speed is not good for
> >> uniforming all PMD's behavior in ethdev layer.
> > Please let's be more specific.
> > There are 3 needs:
> >       - set PHY lane config
> >       - get PHY lane config
> >       - get PHY lane capabilities
> IMO, this lane capabilities should be reported based on supported speed
> capabilities.
> >
> > There is no problem providing a function to get the number of PHY lanes.
> > It is possible to set PHY lanes number after defining a fixed speed.
> yes it's ok.
> >
> >> The patch[1] is an example for this separate API.
> >> I think it is not very good. It cannot tell user and PMD the follow points:
> >> 1) user don't know what lanes should or can be set for a specified speed
> >> on one NIC.
> > This is about capabilities.
> > Can we say a HW will support a maximum number of PHY lanes in general?
> > We may need to associate the maximum speed per lane?
> > Do we really need to associate PHY lane and PHY speed numbers for capabilities?
> Personally, it should contain the below releationship at least.
> speed 10G  --> 1lane | 4lane
> speed 100G  --> 2lane | 4lane
> > Example: if a HW supports 100G-4-lanes and 200G-2-lanes,
> > may we assume it is also supporting 200G-4-lanes?
> I think we cannot assume that NIC also support 200G-4-lanes.
> Beause it has a lot to do with HW design.
> >
> >> 2) how should PMD do for a supported lanes in their HW?
> > I don't understand this question. Please rephrase.
> I mean that PMD don't know set how many lanes when the lanes from user
> is not supported on a fixed speed by PMD.
> So ethdev layer should limit the avaiable lane number based on a fixed
> speed.

ethdev layer has generally been opaque. We should keep it that way.
The PMD should know what the HW supports.
So it should show the capabilities correctly. Right?
And if the user provides incorrect settings, it should reject it.

> >
> >> Anyway, if we add setting speed lanes feature, we must report and set
> >> speed and lanes capabilities for user well.
> >> otherwise, user will be more confused.
> > Well is not necessarily exposing all raw combinations as ethtool does.
> Agreed.
> >
> >> [1] https://patchwork.dpdk.org/project/dpdk/list/?series=31606
> >
> >
> > .
  
Ajit Khaparde March 26, 2024, 6:11 p.m. UTC | #13
On Tue, Mar 26, 2024 at 6:47 AM Ajit Khaparde
<ajit.khaparde@broadcom.com> wrote:
>
> On Tue, Mar 26, 2024 at 4:15 AM lihuisong (C) <lihuisong@huawei.com> wrote:
> >
> >
> > 在 2024/3/26 18:30, Thomas Monjalon 写道:
> > > 26/03/2024 02:42, lihuisong (C):
> > >> 在 2024/3/25 17:30, Thomas Monjalon 写道:
> > >>> 25/03/2024 07:24, huangdengdui:
> > >>>> On 2024/3/22 21:58, Thomas Monjalon wrote:
> > >>>>> 22/03/2024 08:09, Dengdui Huang:
> > >>>>>> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
> > >>>>>> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
> > >>>>>> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
> > >>>>>> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
> > >>>>>> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
> > >>>>>> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
> > >>>>>> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
> > >>>>>> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
> > >>>>>> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
> > >>>>>> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
> > >>>>> I don't think it is a good idea to make this more complex.
> > >>>>> It brings nothing as far as I can see, compared to having speed and lanes separated.
> > >>>>> Can we have lanes information a separate value? no need for bitmask.
> > >>>>>
> > >>>> Hi,Thomas, Ajit, roretzla, damodharam
> > >>>>
> > >>>> I also considered the option at the beginning of the design.
> > >>>> But this option is not used due to the following reasons:
> > >>>> 1. For the user, ethtool couples speed and lanes.
> > >>>> The result of querying the NIC capability is as follows:
> > >>>> Supported link modes:
> > >>>>           100000baseSR4/Full
> > >>>>           100000baseSR2/Full
> > >>>> The NIC capability is configured as follows:
> > >>>> ethtool -s eth1 speed 100000 lanes 4 autoneg off
> > >>>> ethtool -s eth1 speed 100000 lanes 2 autoneg off
> > >>>>
> > >>>> Therefore, users are more accustomed to the coupling of speed and lanes.
> > >>>>
> > >>>> 2. For the PHY, When the physical layer capability is configured through the MDIO,
> > >>>> the speed and lanes are also coupled.
> > >>>> For example:
> > >>>> Table 45–7—PMA/PMD control 2 register bit definitions[1]
> > >>>> PMA/PMD type selection
> > >>>>                           1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
> > >>>>                           0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD
> > >>>>
> > >>>> Therefore, coupling speeds and lanes is easier to understand.
> > >>>> And it is easier for the driver to report the support lanes.
> > >>>>
> > >>>> In addition, the code implementation is compatible with the old version.
> > >>>> When the driver does not support the lanes setting, the code does not need to be modified.
> > >>>>
> > >>>> So I think the speed and lanes coupling is better.
> > >>> I don't think so.
> > >>> You are mixing hardware implementation, user tool, and API.
> > >>> Having a separate and simple API is cleaner and not more difficult to handle
> > >>> in some get/set style functions.
> > >> Having a separate and simple API is cleaner. It's good.
> > >> But supported lane capabilities have a lot to do with the specified
> > >> speed. This is determined by releated specification.
> > >> If we add a separate API for speed lanes, it probably is hard to check
> > >> the validity of the configuration for speed and lanes.
> > >> And the setting lane API sepparated from speed is not good for
> > >> uniforming all PMD's behavior in ethdev layer.
> > > Please let's be more specific.
> > > There are 3 needs:
> > >       - set PHY lane config
> > >       - get PHY lane config
> > >       - get PHY lane capabilities
> > IMO, this lane capabilities should be reported based on supported speed
> > capabilities.
> > >
> > > There is no problem providing a function to get the number of PHY lanes.
> > > It is possible to set PHY lanes number after defining a fixed speed.
> > yes it's ok.
> > >
> > >> The patch[1] is an example for this separate API.
> > >> I think it is not very good. It cannot tell user and PMD the follow points:
> > >> 1) user don't know what lanes should or can be set for a specified speed
> > >> on one NIC.
> > > This is about capabilities.
> > > Can we say a HW will support a maximum number of PHY lanes in general?
> > > We may need to associate the maximum speed per lane?
> > > Do we really need to associate PHY lane and PHY speed numbers for capabilities?
> > Personally, it should contain the below releationship at least.
> > speed 10G  --> 1lane | 4lane
> > speed 100G  --> 2lane | 4lane
> > > Example: if a HW supports 100G-4-lanes and 200G-2-lanes,
> > > may we assume it is also supporting 200G-4-lanes?
> > I think we cannot assume that NIC also support 200G-4-lanes.
> > Beause it has a lot to do with HW design.
> > >
> > >> 2) how should PMD do for a supported lanes in their HW?
> > > I don't understand this question. Please rephrase.
> > I mean that PMD don't know set how many lanes when the lanes from user
> > is not supported on a fixed speed by PMD.
> > So ethdev layer should limit the avaiable lane number based on a fixed
> > speed.
>
> ethdev layer has generally been opaque. We should keep it that way.
I mis-typed.
%s/opaque/transparent


> The PMD should know what the HW supports.
> So it should show the capabilities correctly. Right?
> And if the user provides incorrect settings, it should reject it.
>
> > >
> > >> Anyway, if we add setting speed lanes feature, we must report and set
> > >> speed and lanes capabilities for user well.
> > >> otherwise, user will be more confused.
> > > Well is not necessarily exposing all raw combinations as ethtool does.
> > Agreed.
> > >
> > >> [1] https://patchwork.dpdk.org/project/dpdk/list/?series=31606
> > >
> > >
> > > .
  
Damodharam Ammepalli March 26, 2024, 6:21 p.m. UTC | #14
On Tue, Mar 26, 2024 at 11:12 AM Ajit Khaparde
<ajit.khaparde@broadcom.com> wrote:
>
> On Tue, Mar 26, 2024 at 6:47 AM Ajit Khaparde
> <ajit.khaparde@broadcom.com> wrote:
> >
> > On Tue, Mar 26, 2024 at 4:15 AM lihuisong (C) <lihuisong@huawei.com> wrote:
> > >
> > >
> > > 在 2024/3/26 18:30, Thomas Monjalon 写道:
> > > > 26/03/2024 02:42, lihuisong (C):
> > > >> 在 2024/3/25 17:30, Thomas Monjalon 写道:
> > > >>> 25/03/2024 07:24, huangdengdui:
> > > >>>> On 2024/3/22 21:58, Thomas Monjalon wrote:
> > > >>>>> 22/03/2024 08:09, Dengdui Huang:
> > > >>>>>> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
> > > >>>>>> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
> > > >>>>>> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
> > > >>>>>> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
> > > >>>>>> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
> > > >>>>>> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
> > > >>>>>> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
> > > >>>>>> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
> > > >>>>>> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
> > > >>>>>> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
> > > >>>>> I don't think it is a good idea to make this more complex.
> > > >>>>> It brings nothing as far as I can see, compared to having speed and lanes separated.
> > > >>>>> Can we have lanes information a separate value? no need for bitmask.
> > > >>>>>
> > > >>>> Hi,Thomas, Ajit, roretzla, damodharam
> > > >>>>
> > > >>>> I also considered the option at the beginning of the design.
> > > >>>> But this option is not used due to the following reasons:
> > > >>>> 1. For the user, ethtool couples speed and lanes.
> > > >>>> The result of querying the NIC capability is as follows:
> > > >>>> Supported link modes:
> > > >>>>           100000baseSR4/Full
> > > >>>>           100000baseSR2/Full
> > > >>>> The NIC capability is configured as follows:
> > > >>>> ethtool -s eth1 speed 100000 lanes 4 autoneg off
> > > >>>> ethtool -s eth1 speed 100000 lanes 2 autoneg off
> > > >>>>
> > > >>>> Therefore, users are more accustomed to the coupling of speed and lanes.
> > > >>>>
> > > >>>> 2. For the PHY, When the physical layer capability is configured through the MDIO,
> > > >>>> the speed and lanes are also coupled.
> > > >>>> For example:
> > > >>>> Table 45–7—PMA/PMD control 2 register bit definitions[1]
> > > >>>> PMA/PMD type selection
> > > >>>>                           1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
> > > >>>>                           0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD
> > > >>>>
> > > >>>> Therefore, coupling speeds and lanes is easier to understand.
> > > >>>> And it is easier for the driver to report the support lanes.
> > > >>>>
> > > >>>> In addition, the code implementation is compatible with the old version.
> > > >>>> When the driver does not support the lanes setting, the code does not need to be modified.
> > > >>>>
> > > >>>> So I think the speed and lanes coupling is better.
> > > >>> I don't think so.
> > > >>> You are mixing hardware implementation, user tool, and API.
> > > >>> Having a separate and simple API is cleaner and not more difficult to handle
> > > >>> in some get/set style functions.
> > > >> Having a separate and simple API is cleaner. It's good.
> > > >> But supported lane capabilities have a lot to do with the specified
> > > >> speed. This is determined by releated specification.
> > > >> If we add a separate API for speed lanes, it probably is hard to check
> > > >> the validity of the configuration for speed and lanes.
> > > >> And the setting lane API sepparated from speed is not good for
> > > >> uniforming all PMD's behavior in ethdev layer.
> > > > Please let's be more specific.
> > > > There are 3 needs:
> > > >       - set PHY lane config
> > > >       - get PHY lane config
> > > >       - get PHY lane capabilities
> > > IMO, this lane capabilities should be reported based on supported speed
> > > capabilities.
> > > >
> > > > There is no problem providing a function to get the number of PHY lanes.
> > > > It is possible to set PHY lanes number after defining a fixed speed.
> > > yes it's ok.
> > > >
> > > >> The patch[1] is an example for this separate API.
> > > >> I think it is not very good. It cannot tell user and PMD the follow points:
> > > >> 1) user don't know what lanes should or can be set for a specified speed
> > > >> on one NIC.
> > > > This is about capabilities.
> > > > Can we say a HW will support a maximum number of PHY lanes in general?
> > > > We may need to associate the maximum speed per lane?
> > > > Do we really need to associate PHY lane and PHY speed numbers for capabilities?
> > > Personally, it should contain the below releationship at least.
> > > speed 10G  --> 1lane | 4lane
> > > speed 100G  --> 2lane | 4lane
> > > > Example: if a HW supports 100G-4-lanes and 200G-2-lanes,
> > > > may we assume it is also supporting 200G-4-lanes?
> > > I think we cannot assume that NIC also support 200G-4-lanes.
> > > Beause it has a lot to do with HW design.
> > > >
> > > >> 2) how should PMD do for a supported lanes in their HW?
> > > > I don't understand this question. Please rephrase.
> > > I mean that PMD don't know set how many lanes when the lanes from user
> > > is not supported on a fixed speed by PMD.
> > > So ethdev layer should limit the avaiable lane number based on a fixed
> > > speed.
> >
> > ethdev layer has generally been opaque. We should keep it that way.
> I mis-typed.
> %s/opaque/transparent
>
>
> > The PMD should know what the HW supports.
> > So it should show the capabilities correctly. Right?
> > And if the user provides incorrect settings, it should reject it.
> >
> > > >
> > > >> Anyway, if we add setting speed lanes feature, we must report and set
> > > >> speed and lanes capabilities for user well.
> > > >> otherwise, user will be more confused.
> > > > Well is not necessarily exposing all raw combinations as ethtool does.
> > > Agreed.
> > > >
> > > >> [1] https://patchwork.dpdk.org/project/dpdk/list/?series=31606
> > > >
> > > >
> > > >
Our RFC patch's cmdline design is inspired by how ethtool works as it
provides carriage return at user choice,
which makes it backward compatible for no lanes config also. testpmd
does not have that flexibility
in the speed command and we resorted to a separate command for lanes
and for all other reasons mentioned
earlier.

2nd, the lanes validation logic resting place, irrespective of lanes
in speed or separate lanes command,
like others said, the AutoNegotiation itself should suffice for link
train and up. Taking this example,
if the link came up at 100G PAM4-112 AN'd, and user for whatever
reasons, even others mentioned earlier,
may want it to force 100Gb NRZ which is 25G per lane 4, lanes), the
user should aware of cmds the tool offers,
and the driver can do final validation, for anomalies.

In any case, in RFC patch we are doing lanes validation in
cmd_validate_lanes(portid_t pid, uint32_t *lanes), that gets populated
by hw/driver based on current
AN's link up speed and signalling type.

Today 400Gig is 4(pam4_56), 8(pam4_112) lanes and in future with a new
HW design,
it may be 2 x 200Gig lanes, at that time. we don't need to update
testpmd, handle it in the driver.
Maybe for a new speed 800Gb+, can demand an update to app/lib entries.
  
lihuisong (C) March 29, 2024, 3:25 a.m. UTC | #15
在 2024/3/26 21:47, Ajit Khaparde 写道:
> On Tue, Mar 26, 2024 at 4:15 AM lihuisong (C) <lihuisong@huawei.com> wrote:
>>
>> 在 2024/3/26 18:30, Thomas Monjalon 写道:
>>> 26/03/2024 02:42, lihuisong (C):
>>>> 在 2024/3/25 17:30, Thomas Monjalon 写道:
>>>>> 25/03/2024 07:24, huangdengdui:
>>>>>> On 2024/3/22 21:58, Thomas Monjalon wrote:
>>>>>>> 22/03/2024 08:09, Dengdui Huang:
>>>>>>>> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
>>>>>>>> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
>>>>>>>> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
>>>>>>>> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
>>>>>>>> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
>>>>>>>> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
>>>>>>>> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
>>>>>>>> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
>>>>>>>> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
>>>>>>>> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
>>>>>>> I don't think it is a good idea to make this more complex.
>>>>>>> It brings nothing as far as I can see, compared to having speed and lanes separated.
>>>>>>> Can we have lanes information a separate value? no need for bitmask.
>>>>>>>
>>>>>> Hi,Thomas, Ajit, roretzla, damodharam
>>>>>>
>>>>>> I also considered the option at the beginning of the design.
>>>>>> But this option is not used due to the following reasons:
>>>>>> 1. For the user, ethtool couples speed and lanes.
>>>>>> The result of querying the NIC capability is as follows:
>>>>>> Supported link modes:
>>>>>>            100000baseSR4/Full
>>>>>>            100000baseSR2/Full
>>>>>> The NIC capability is configured as follows:
>>>>>> ethtool -s eth1 speed 100000 lanes 4 autoneg off
>>>>>> ethtool -s eth1 speed 100000 lanes 2 autoneg off
>>>>>>
>>>>>> Therefore, users are more accustomed to the coupling of speed and lanes.
>>>>>>
>>>>>> 2. For the PHY, When the physical layer capability is configured through the MDIO,
>>>>>> the speed and lanes are also coupled.
>>>>>> For example:
>>>>>> Table 45–7—PMA/PMD control 2 register bit definitions[1]
>>>>>> PMA/PMD type selection
>>>>>>                            1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
>>>>>>                            0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD
>>>>>>
>>>>>> Therefore, coupling speeds and lanes is easier to understand.
>>>>>> And it is easier for the driver to report the support lanes.
>>>>>>
>>>>>> In addition, the code implementation is compatible with the old version.
>>>>>> When the driver does not support the lanes setting, the code does not need to be modified.
>>>>>>
>>>>>> So I think the speed and lanes coupling is better.
>>>>> I don't think so.
>>>>> You are mixing hardware implementation, user tool, and API.
>>>>> Having a separate and simple API is cleaner and not more difficult to handle
>>>>> in some get/set style functions.
>>>> Having a separate and simple API is cleaner. It's good.
>>>> But supported lane capabilities have a lot to do with the specified
>>>> speed. This is determined by releated specification.
>>>> If we add a separate API for speed lanes, it probably is hard to check
>>>> the validity of the configuration for speed and lanes.
>>>> And the setting lane API sepparated from speed is not good for
>>>> uniforming all PMD's behavior in ethdev layer.
>>> Please let's be more specific.
>>> There are 3 needs:
>>>        - set PHY lane config
>>>        - get PHY lane config
>>>        - get PHY lane capabilities
>> IMO, this lane capabilities should be reported based on supported speed
>> capabilities.
>>> There is no problem providing a function to get the number of PHY lanes.
>>> It is possible to set PHY lanes number after defining a fixed speed.
>> yes it's ok.
>>>> The patch[1] is an example for this separate API.
>>>> I think it is not very good. It cannot tell user and PMD the follow points:
>>>> 1) user don't know what lanes should or can be set for a specified speed
>>>> on one NIC.
>>> This is about capabilities.
>>> Can we say a HW will support a maximum number of PHY lanes in general?
>>> We may need to associate the maximum speed per lane?
>>> Do we really need to associate PHY lane and PHY speed numbers for capabilities?
>> Personally, it should contain the below releationship at least.
>> speed 10G  --> 1lane | 4lane
>> speed 100G  --> 2lane | 4lane
>>> Example: if a HW supports 100G-4-lanes and 200G-2-lanes,
>>> may we assume it is also supporting 200G-4-lanes?
>> I think we cannot assume that NIC also support 200G-4-lanes.
>> Beause it has a lot to do with HW design.
>>>> 2) how should PMD do for a supported lanes in their HW?
>>> I don't understand this question. Please rephrase.
>> I mean that PMD don't know set how many lanes when the lanes from user
>> is not supported on a fixed speed by PMD.
>> So ethdev layer should limit the avaiable lane number based on a fixed
>> speed.
> ethdev layer has generally been opaque. We should keep it that way.
> The PMD should know what the HW supports.
If one operation is common for all PMD, which is recommanded to do it in 
ethdev layer.
> So it should show the capabilities correctly. Right?
Yes, the lane capabilities of differrent speed are necessary for user.
> And if the user provides incorrect settings, it should reject it.
Rejecting it in this case is no any reason.
>
>>>> Anyway, if we add setting speed lanes feature, we must report and set
>>>> speed and lanes capabilities for user well.
>>>> otherwise, user will be more confused.
>>> Well is not necessarily exposing all raw combinations as ethtool does.
>> Agreed.
>>>> [1] https://patchwork.dpdk.org/project/dpdk/list/?series=31606
>>>
>>> .
  
huangdengdui March 30, 2024, 11:38 a.m. UTC | #16
On 2024/3/27 2:21, Damodharam Ammepalli wrote:
> On Tue, Mar 26, 2024 at 11:12 AM Ajit Khaparde
> <ajit.khaparde@broadcom.com> wrote:
>>
>> On Tue, Mar 26, 2024 at 6:47 AM Ajit Khaparde
>> <ajit.khaparde@broadcom.com> wrote:
>>>
>>> On Tue, Mar 26, 2024 at 4:15 AM lihuisong (C) <lihuisong@huawei.com> wrote:
>>>>
>>>>
>>>> 在 2024/3/26 18:30, Thomas Monjalon 写道:
>>>>> 26/03/2024 02:42, lihuisong (C):
>>>>>> 在 2024/3/25 17:30, Thomas Monjalon 写道:
>>>>>>> 25/03/2024 07:24, huangdengdui:
>>>>>>>> On 2024/3/22 21:58, Thomas Monjalon wrote:
>>>>>>>>> 22/03/2024 08:09, Dengdui Huang:
>>>>>>>>>> -#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**< 10 Gbps */
>>>>>>>>>> -#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**< 20 Gbps */
>>>>>>>>>> -#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**< 25 Gbps */
>>>>>>>>>> -#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**< 40 Gbps */
>>>>>>>>>> -#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**< 50 Gbps */
>>>>>>>>>> -#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**< 56 Gbps */
>>>>>>>>>> -#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
>>>>>>>>>> -#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
>>>>>>>>>> -#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**< 10 Gbps */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**< 20 Gbps 2lanes */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**< 25 Gbps */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**< 40 Gbps 4lanes */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**< 50 Gbps */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**< 56 Gbps 4lanes */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**< 10 Gbps 4lanes */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**< 50 Gbps 2 lanes */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
>>>>>>>>>> +#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
>>>>>>>>> I don't think it is a good idea to make this more complex.
>>>>>>>>> It brings nothing as far as I can see, compared to having speed and lanes separated.
>>>>>>>>> Can we have lanes information a separate value? no need for bitmask.
>>>>>>>>>
>>>>>>>> Hi,Thomas, Ajit, roretzla, damodharam
>>>>>>>>
>>>>>>>> I also considered the option at the beginning of the design.
>>>>>>>> But this option is not used due to the following reasons:
>>>>>>>> 1. For the user, ethtool couples speed and lanes.
>>>>>>>> The result of querying the NIC capability is as follows:
>>>>>>>> Supported link modes:
>>>>>>>>           100000baseSR4/Full
>>>>>>>>           100000baseSR2/Full
>>>>>>>> The NIC capability is configured as follows:
>>>>>>>> ethtool -s eth1 speed 100000 lanes 4 autoneg off
>>>>>>>> ethtool -s eth1 speed 100000 lanes 2 autoneg off
>>>>>>>>
>>>>>>>> Therefore, users are more accustomed to the coupling of speed and lanes.
>>>>>>>>
>>>>>>>> 2. For the PHY, When the physical layer capability is configured through the MDIO,
>>>>>>>> the speed and lanes are also coupled.
>>>>>>>> For example:
>>>>>>>> Table 45–7—PMA/PMD control 2 register bit definitions[1]
>>>>>>>> PMA/PMD type selection
>>>>>>>>                           1 0 0 1 0 1 0 = 100GBASE-SR2 PMA/PMD
>>>>>>>>                           0 1 0 1 1 1 1 = 100GBASE-SR4 PMA/PMD
>>>>>>>>
>>>>>>>> Therefore, coupling speeds and lanes is easier to understand.
>>>>>>>> And it is easier for the driver to report the support lanes.
>>>>>>>>
>>>>>>>> In addition, the code implementation is compatible with the old version.
>>>>>>>> When the driver does not support the lanes setting, the code does not need to be modified.
>>>>>>>>
>>>>>>>> So I think the speed and lanes coupling is better.
>>>>>>> I don't think so.
>>>>>>> You are mixing hardware implementation, user tool, and API.
>>>>>>> Having a separate and simple API is cleaner and not more difficult to handle
>>>>>>> in some get/set style functions.
>>>>>> Having a separate and simple API is cleaner. It's good.
>>>>>> But supported lane capabilities have a lot to do with the specified
>>>>>> speed. This is determined by releated specification.
>>>>>> If we add a separate API for speed lanes, it probably is hard to check
>>>>>> the validity of the configuration for speed and lanes.
>>>>>> And the setting lane API sepparated from speed is not good for
>>>>>> uniforming all PMD's behavior in ethdev layer.
>>>>> Please let's be more specific.
>>>>> There are 3 needs:
>>>>>       - set PHY lane config
>>>>>       - get PHY lane config
>>>>>       - get PHY lane capabilities
>>>> IMO, this lane capabilities should be reported based on supported speed
>>>> capabilities.
>>>>>
>>>>> There is no problem providing a function to get the number of PHY lanes.
>>>>> It is possible to set PHY lanes number after defining a fixed speed.
>>>> yes it's ok.
>>>>>
>>>>>> The patch[1] is an example for this separate API.
>>>>>> I think it is not very good. It cannot tell user and PMD the follow points:
>>>>>> 1) user don't know what lanes should or can be set for a specified speed
>>>>>> on one NIC.
>>>>> This is about capabilities.
>>>>> Can we say a HW will support a maximum number of PHY lanes in general?
>>>>> We may need to associate the maximum speed per lane?
>>>>> Do we really need to associate PHY lane and PHY speed numbers for capabilities?
>>>> Personally, it should contain the below releationship at least.
>>>> speed 10G  --> 1lane | 4lane
>>>> speed 100G  --> 2lane | 4lane
>>>>> Example: if a HW supports 100G-4-lanes and 200G-2-lanes,
>>>>> may we assume it is also supporting 200G-4-lanes?
>>>> I think we cannot assume that NIC also support 200G-4-lanes.
>>>> Beause it has a lot to do with HW design.
>>>>>
>>>>>> 2) how should PMD do for a supported lanes in their HW?
>>>>> I don't understand this question. Please rephrase.
>>>> I mean that PMD don't know set how many lanes when the lanes from user
>>>> is not supported on a fixed speed by PMD.
>>>> So ethdev layer should limit the avaiable lane number based on a fixed
>>>> speed.
>>>
>>> ethdev layer has generally been opaque. We should keep it that way.
>> I mis-typed.
>> %s/opaque/transparent
>>
>>
>>> The PMD should know what the HW supports.
>>> So it should show the capabilities correctly. Right?
>>> And if the user provides incorrect settings, it should reject it.
>>>
>>>>>
>>>>>> Anyway, if we add setting speed lanes feature, we must report and set
>>>>>> speed and lanes capabilities for user well.
>>>>>> otherwise, user will be more confused.
>>>>> Well is not necessarily exposing all raw combinations as ethtool does.
>>>> Agreed.
>>>>>
>>>>>> [1] https://patchwork.dpdk.org/project/dpdk/list/?series=31606
>>>>>
>>>>>
>>>>>
> Our RFC patch's cmdline design is inspired by how ethtool works as it
> provides carriage return at user choice,
> which makes it backward compatible for no lanes config also. testpmd
> does not have that flexibility
> in the speed command and we resorted to a separate command for lanes
> and for all other reasons mentioned
> earlier.
> 
> 2nd, the lanes validation logic resting place, irrespective of lanes
> in speed or separate lanes command,
> like others said, the AutoNegotiation itself should suffice for link
> train and up. Taking this example,
> if the link came up at 100G PAM4-112 AN'd, and user for whatever
> reasons, even others mentioned earlier,
> may want it to force 100Gb NRZ which is 25G per lane 4, lanes), the
> user should aware of cmds the tool offers,
> and the driver can do final validation, for anomalies.
> 
> In any case, in RFC patch we are doing lanes validation in
> cmd_validate_lanes(portid_t pid, uint32_t *lanes), that gets populated
> by hw/driver based on current
> AN's link up speed and signalling type.
> 
> Today 400Gig is 4(pam4_56), 8(pam4_112) lanes and in future with a new
> HW design,
> it may be 2 x 200Gig lanes, at that time. we don't need to update
> testpmd, handle it in the driver.
> Maybe for a new speed 800Gb+, can demand an update to app/lib entries.
> 
As thomas said, There are 3 needs:
 - set the number of lanes for a device.
 - get number of current lanes for a device.
 - get the number of lanes supported by the device.

For the first two needs, similar to the Damodharam's RFC patch [1],
the lane setting and speed setting are separated, I'm happy to set
the lanes this way.

But, there are different solutions for the device to report the setting
lane capability, as following:
1. Like the current patch, reporting device capabilities in speed and
   lane coupling mode. However, if we use this solution, we will have
   to couple the the lanes setting with speed setting.

2. Like the Damodharam's RFC patch [1], the device reports the maximum
   number of supported lanes. Users can config a lane randomly,
   which is completely separated from the speed.

3. Similar to the FEC capability reported by a device, the device reports the
   relationship table of the number of lanes supported by the speed,
   for example:
      speed    lanes_capa
      50G      1,2
      100G     1,2,4
      200G     2,4

Options 1 and 2 have been discussed a lot above.

For solution 1, the speed and lanes are over-coupled, and the implementation is too
complex. But I think it's easier to understand and easier for the device to report
capabilities. In addition, the ethtool reporting capability also uses this mode.

For solution 2, as huisong said that user don't know what lanes should or can be set
for a specified speed on one NIC.

I think that when the device reports the capability, the lanes should be associated
with the speed. In this way, users can know which lanes are supported by the current
speed and verify the configuration validity.

So I think solution 3 is better. What do you think?

[1] https://patchwork.dpdk.org/project/dpdk/list/?series=31606
  
Thomas Monjalon April 1, 2024, 8:07 p.m. UTC | #17
30/03/2024 12:38, huangdengdui:
> But, there are different solutions for the device to report the setting
> lane capability, as following:
> 1. Like the current patch, reporting device capabilities in speed and
>    lane coupling mode. However, if we use this solution, we will have
>    to couple the the lanes setting with speed setting.
> 
> 2. Like the Damodharam's RFC patch [1], the device reports the maximum
>    number of supported lanes. Users can config a lane randomly,
>    which is completely separated from the speed.
> 
> 3. Similar to the FEC capability reported by a device, the device reports the
>    relationship table of the number of lanes supported by the speed,
>    for example:
>       speed    lanes_capa
>       50G      1,2
>       100G     1,2,4
>       200G     2,4
> 
> Options 1 and 2 have been discussed a lot above.
> 
> For solution 1, the speed and lanes are over-coupled, and the implementation is too
> complex. But I think it's easier to understand and easier for the device to report
> capabilities. In addition, the ethtool reporting capability also uses this mode.
> 
> For solution 2, as huisong said that user don't know what lanes should or can be set
> for a specified speed on one NIC.
> 
> I think that when the device reports the capability, the lanes should be associated
> with the speed. In this way, users can know which lanes are supported by the current
> speed and verify the configuration validity.
> 
> So I think solution 3 is better. What do you think?

I don't understand your proposals.
Please could you show the function signature for each option?
  
Damodharam Ammepalli April 1, 2024, 10:29 p.m. UTC | #18
On Mon, Apr 1, 2024 at 1:07 PM Thomas Monjalon <thomas@monjalon.net> wrote:
>
> 30/03/2024 12:38, huangdengdui:
> > But, there are different solutions for the device to report the setting
> > lane capability, as following:
> > 1. Like the current patch, reporting device capabilities in speed and
> >    lane coupling mode. However, if we use this solution, we will have
> >    to couple the the lanes setting with speed setting.
> >
> > 2. Like the Damodharam's RFC patch [1], the device reports the maximum
> >    number of supported lanes. Users can config a lane randomly,
> >    which is completely separated from the speed.
> >
> > 3. Similar to the FEC capability reported by a device, the device reports the
> >    relationship table of the number of lanes supported by the speed,
> >    for example:
> >       speed    lanes_capa
> >       50G      1,2
> >       100G     1,2,4
> >       200G     2,4
> >
> > Options 1 and 2 have been discussed a lot above.
> >
> > For solution 1, the speed and lanes are over-coupled, and the implementation is too
> > complex. But I think it's easier to understand and easier for the device to report
> > capabilities. In addition, the ethtool reporting capability also uses this mode.
> >
> > For solution 2, as huisong said that user don't know what lanes should or can be set
> > for a specified speed on one NIC.
> >
> > I think that when the device reports the capability, the lanes should be associated
> > with the speed. In this way, users can know which lanes are supported by the current
> > speed and verify the configuration validity.
> >
> > So I think solution 3 is better. What do you think?
>
> I don't understand your proposals.
> Please could you show the function signature for each option?
>
>
>
testpmd can query the driver, and driver can export latest bit-map say in,
rte_eth_speed_lanes_get()->supported_bmap

0  1Gb   link speed
1  10Gb  (NRZ: 10G per lane, 1 lane) link speed
2  25Gb  (NRZ: 25G per lane, 1 lane) link speed
3  40Gb  (NRZ: 10G per lane, 4 lanes) link speed
4  50Gb  (NRZ: 25G per lane, 2 lanes) link speed
5  100Gb (NRZ: 25G per lane, 4 lanes) link speed
6  50Gb  (PAM4-56: 50G per lane, 1 lane) link speed
7  100Gb (PAM4-56: 50G per lane, 2 lanes) link speed
8  200Gb (PAM4-56: 50G per lane, 4 lanes) link speed
9  400Gb (PAM4-56: 50G per lane, 8 lanes) link speed
10 100Gb (PAM4-112: 100G per lane, 1 lane) link speed
11 200Gb (PAM4-112: 100G per lane, 2 lanes) link speed
12 400Gb (PAM4-112: 100G per lane, 4 lanes) link speed
13 800Gb (PAM4-112: 100G per lane, 8 lanes) link speed
14 For future

In cmd_config_speed_specific_parsed()
   if (parse_and_check_speed_duplex(res->value1, res->value2, &link_speed) < 0)
        return;
+ /* validate speed x lanes combo */
+ if (!cmd_validate_lanes(res->id, link_speed))
+ return;

Driver can validate the rest of other internal link parameters in
rte_eth_dev_start() before
applying the config to the hardware.
  
huangdengdui April 2, 2024, 8:37 a.m. UTC | #19
On 2024/4/2 4:07, Thomas Monjalon wrote:
> 30/03/2024 12:38, huangdengdui:
>> But, there are different solutions for the device to report the setting
>> lane capability, as following:
>> 1. Like the current patch, reporting device capabilities in speed and
>>    lane coupling mode. However, if we use this solution, we will have
>>    to couple the the lanes setting with speed setting.
>>
>> 2. Like the Damodharam's RFC patch [1], the device reports the maximum
>>    number of supported lanes. Users can config a lane randomly,
>>    which is completely separated from the speed.
>>
>> 3. Similar to the FEC capability reported by a device, the device reports the
>>    relationship table of the number of lanes supported by the speed,
>>    for example:
>>       speed    lanes_capa
>>       50G      1,2
>>       100G     1,2,4
>>       200G     2,4
>>
>> Options 1 and 2 have been discussed a lot above.
>>
>> For solution 1, the speed and lanes are over-coupled, and the implementation is too
>> complex. But I think it's easier to understand and easier for the device to report
>> capabilities. In addition, the ethtool reporting capability also uses this mode.
>>
>> For solution 2, as huisong said that user don't know what lanes should or can be set
>> for a specified speed on one NIC.
>>
>> I think that when the device reports the capability, the lanes should be associated
>> with the speed. In this way, users can know which lanes are supported by the current
>> speed and verify the configuration validity.
>>
>> So I think solution 3 is better. What do you think?
> 
> I don't understand your proposals.
> Please could you show the function signature for each option?
> 
> 

I agree with separating the lanes setting from the speed setting.
I have a different proposal for device lanes capability reporting.

Three interfaces are added to the lib/ethdev like FEC interfaces.
1. rte_eth_lanes_get(uint16_t port_id, uint32_t *capa)       /* get current lanes */
2. rte_eth_lanes_set(uint16_t port_id, uint32_t capa)
3. rte_eth_lanes_get_capa(uint16_t port_id, struct rte_eth_lanes_capa *speed_lanes_capa)

/* A structure used to get capabilities per link speed */
struct rte_eth_lanes_capa {
	uint32_t speed; /**< Link speed (see RTE_ETH_SPEED_NUM_*) */
	uint32_t capa;  /**< lanes capabilities bitmask */
};

For example, an ethdev report the following lanes capability array:
struct rte_eth_lanes_capa[] device_capa = {
	{ RTE_ETH_SPEED_NUM_50G, 0x0003 }, //supports lanes 1 and 2 for 50G
	{ RTE_ETH_SPEED_NUM_100G, 0x000B } //supports lanes 1, 2 and 4 for 100G
};

The application can know which lanes are supported at a specified speed.

I think it's better to implement the setting lanes feature in this way.

Welcom to jump into discuss.
  
Stephen Hemminger April 2, 2024, 3:28 p.m. UTC | #20
On Tue, 2 Apr 2024 16:37:39 +0800
huangdengdui <huangdengdui@huawei.com> wrote:

> On 2024/4/2 4:07, Thomas Monjalon wrote:
> > 30/03/2024 12:38, huangdengdui:  
> >> But, there are different solutions for the device to report the setting
> >> lane capability, as following:
> >> 1. Like the current patch, reporting device capabilities in speed and
> >>    lane coupling mode. However, if we use this solution, we will have
> >>    to couple the the lanes setting with speed setting.
> >>
> >> 2. Like the Damodharam's RFC patch [1], the device reports the maximum
> >>    number of supported lanes. Users can config a lane randomly,
> >>    which is completely separated from the speed.
> >>
> >> 3. Similar to the FEC capability reported by a device, the device reports the
> >>    relationship table of the number of lanes supported by the speed,
> >>    for example:
> >>       speed    lanes_capa
> >>       50G      1,2
> >>       100G     1,2,4
> >>       200G     2,4
> >>
> >> Options 1 and 2 have been discussed a lot above.
> >>
> >> For solution 1, the speed and lanes are over-coupled, and the implementation is too
> >> complex. But I think it's easier to understand and easier for the device to report
> >> capabilities. In addition, the ethtool reporting capability also uses this mode.
> >>
> >> For solution 2, as huisong said that user don't know what lanes should or can be set
> >> for a specified speed on one NIC.
> >>
> >> I think that when the device reports the capability, the lanes should be associated
> >> with the speed. In this way, users can know which lanes are supported by the current
> >> speed and verify the configuration validity.
> >>
> >> So I think solution 3 is better. What do you think?  
> > 
> > I don't understand your proposals.
> > Please could you show the function signature for each option?
> > 
> >   
> 
> I agree with separating the lanes setting from the speed setting.
> I have a different proposal for device lanes capability reporting.
> 
> Three interfaces are added to the lib/ethdev like FEC interfaces.
> 1. rte_eth_lanes_get(uint16_t port_id, uint32_t *capa)       /* get current lanes */
> 2. rte_eth_lanes_set(uint16_t port_id, uint32_t capa)
> 3. rte_eth_lanes_get_capa(uint16_t port_id, struct rte_eth_lanes_capa *speed_lanes_capa)
> 
> /* A structure used to get capabilities per link speed */
> struct rte_eth_lanes_capa {
> 	uint32_t speed; /**< Link speed (see RTE_ETH_SPEED_NUM_*) */
> 	uint32_t capa;  /**< lanes capabilities bitmask */
> };
> 
> For example, an ethdev report the following lanes capability array:
> struct rte_eth_lanes_capa[] device_capa = {
> 	{ RTE_ETH_SPEED_NUM_50G, 0x0003 }, //supports lanes 1 and 2 for 50G
> 	{ RTE_ETH_SPEED_NUM_100G, 0x000B } //supports lanes 1, 2 and 4 for 100G
> };
> 
> The application can know which lanes are supported at a specified speed.
> 
> I think it's better to implement the setting lanes feature in this way.
> 
> Welcom to jump into discuss.

Wouldn't the best way to handle this be to make lanes as similar as possible
to how link speed is handled in ethdev now.  It would mean holding off until 24.11
release to do it right.  Things like adding link_lanes to rte_eth_link struct
  
Ferruh Yigit April 4, 2024, 1:45 p.m. UTC | #21
On 4/2/2024 9:37 AM, huangdengdui wrote:
> 
> 
> On 2024/4/2 4:07, Thomas Monjalon wrote:
>> 30/03/2024 12:38, huangdengdui:
>>> But, there are different solutions for the device to report the setting
>>> lane capability, as following:
>>> 1. Like the current patch, reporting device capabilities in speed and
>>>    lane coupling mode. However, if we use this solution, we will have
>>>    to couple the the lanes setting with speed setting.
>>>
>>> 2. Like the Damodharam's RFC patch [1], the device reports the maximum
>>>    number of supported lanes. Users can config a lane randomly,
>>>    which is completely separated from the speed.
>>>
>>> 3. Similar to the FEC capability reported by a device, the device reports the
>>>    relationship table of the number of lanes supported by the speed,
>>>    for example:
>>>       speed    lanes_capa
>>>       50G      1,2
>>>       100G     1,2,4
>>>       200G     2,4
>>>
>>> Options 1 and 2 have been discussed a lot above.
>>>
>>> For solution 1, the speed and lanes are over-coupled, and the implementation is too
>>> complex. But I think it's easier to understand and easier for the device to report
>>> capabilities. In addition, the ethtool reporting capability also uses this mode.
>>>
>>> For solution 2, as huisong said that user don't know what lanes should or can be set
>>> for a specified speed on one NIC.
>>>
>>> I think that when the device reports the capability, the lanes should be associated
>>> with the speed. In this way, users can know which lanes are supported by the current
>>> speed and verify the configuration validity.
>>>
>>> So I think solution 3 is better. What do you think?
>>
>> I don't understand your proposals.
>> Please could you show the function signature for each option?
>>
>>
> 
> I agree with separating the lanes setting from the speed setting.
> I have a different proposal for device lanes capability reporting.
> 
> Three interfaces are added to the lib/ethdev like FEC interfaces.
> 1. rte_eth_lanes_get(uint16_t port_id, uint32_t *capa)       /* get current lanes */
> 2. rte_eth_lanes_set(uint16_t port_id, uint32_t capa)
> 3. rte_eth_lanes_get_capa(uint16_t port_id, struct rte_eth_lanes_capa *speed_lanes_capa)
> 
> /* A structure used to get capabilities per link speed */
> struct rte_eth_lanes_capa {
> 	uint32_t speed; /**< Link speed (see RTE_ETH_SPEED_NUM_*) */
> 	uint32_t capa;  /**< lanes capabilities bitmask */
> };
> 
> For example, an ethdev report the following lanes capability array:
> struct rte_eth_lanes_capa[] device_capa = {
> 	{ RTE_ETH_SPEED_NUM_50G, 0x0003 }, //supports lanes 1 and 2 for 50G
> 	{ RTE_ETH_SPEED_NUM_100G, 0x000B } //supports lanes 1, 2 and 4 for 100G
> };
> 
> The application can know which lanes are supported at a specified speed.
> 
> I think it's better to implement the setting lanes feature in this way.
> 
> Welcom to jump into discuss.
>

Hi Dengdui,

+1 to option 3, as lane capability coupled with speed, it make sense to
expose lane capability per speed, as done in FEC capability.
  
Ferruh Yigit May 22, 2024, 8:44 p.m. UTC | #22
On 4/1/2024 11:29 PM, Damodharam Ammepalli wrote:
> On Mon, Apr 1, 2024 at 1:07 PM Thomas Monjalon <thomas@monjalon.net> wrote:
>>
>> 30/03/2024 12:38, huangdengdui:
>>> But, there are different solutions for the device to report the setting
>>> lane capability, as following:
>>> 1. Like the current patch, reporting device capabilities in speed and
>>>    lane coupling mode. However, if we use this solution, we will have
>>>    to couple the the lanes setting with speed setting.
>>>
>>> 2. Like the Damodharam's RFC patch [1], the device reports the maximum
>>>    number of supported lanes. Users can config a lane randomly,
>>>    which is completely separated from the speed.
>>>
>>> 3. Similar to the FEC capability reported by a device, the device reports the
>>>    relationship table of the number of lanes supported by the speed,
>>>    for example:
>>>       speed    lanes_capa
>>>       50G      1,2
>>>       100G     1,2,4
>>>       200G     2,4
>>>
>>> Options 1 and 2 have been discussed a lot above.
>>>
>>> For solution 1, the speed and lanes are over-coupled, and the implementation is too
>>> complex. But I think it's easier to understand and easier for the device to report
>>> capabilities. In addition, the ethtool reporting capability also uses this mode.
>>>
>>> For solution 2, as huisong said that user don't know what lanes should or can be set
>>> for a specified speed on one NIC.
>>>
>>> I think that when the device reports the capability, the lanes should be associated
>>> with the speed. In this way, users can know which lanes are supported by the current
>>> speed and verify the configuration validity.
>>>
>>> So I think solution 3 is better. What do you think?
>>
>> I don't understand your proposals.
>> Please could you show the function signature for each option?
>>
>>
>>
> testpmd can query the driver, and driver can export latest bit-map say in,
> rte_eth_speed_lanes_get()->supported_bmap
> 
> 0  1Gb   link speed
> 1  10Gb  (NRZ: 10G per lane, 1 lane) link speed
> 2  25Gb  (NRZ: 25G per lane, 1 lane) link speed
> 3  40Gb  (NRZ: 10G per lane, 4 lanes) link speed
> 4  50Gb  (NRZ: 25G per lane, 2 lanes) link speed
> 5  100Gb (NRZ: 25G per lane, 4 lanes) link speed
> 6  50Gb  (PAM4-56: 50G per lane, 1 lane) link speed
> 7  100Gb (PAM4-56: 50G per lane, 2 lanes) link speed
> 8  200Gb (PAM4-56: 50G per lane, 4 lanes) link speed
> 9  400Gb (PAM4-56: 50G per lane, 8 lanes) link speed
> 10 100Gb (PAM4-112: 100G per lane, 1 lane) link speed
> 11 200Gb (PAM4-112: 100G per lane, 2 lanes) link speed
> 12 400Gb (PAM4-112: 100G per lane, 4 lanes) link speed
> 13 800Gb (PAM4-112: 100G per lane, 8 lanes) link speed
> 14 For future
>

Dengdui & Huisong mentioned that in HW, speed and lane is coupled, when
we have two different APIs for speed and lane, user may end up providing
invalid configuration.

If lane capability report is tied with speed, this will enable user to
figure out correct lane for speed.

We already have similar implementation for FEC, for similar concern:
```
rte_eth_fec_get_capability(port_id, struct rte_eth_fec_capa 		
		*speed_fec_capa, num);

struct rte_eth_fec_capa {
	uint32_t speed;
	uint32_t capa;
}
```


@Damodharam, can you please check 'rte_eth_fec_get_capability()'?

If we can have similar lane capability reporting per speed, user can
provide lane value based on this. And rest of the validation can be done
by driver.

I will comment on your RFC.


> In cmd_config_speed_specific_parsed()
>    if (parse_and_check_speed_duplex(res->value1, res->value2, &link_speed) < 0)
>         return;
> + /* validate speed x lanes combo */
> + if (!cmd_validate_lanes(res->id, link_speed))
> + return;
> 
> Driver can validate the rest of other internal link parameters in
> rte_eth_dev_start() before
> applying the config to the hardware.
>
  

Patch

diff --git a/doc/guides/rel_notes/release_24_03.rst b/doc/guides/rel_notes/release_24_03.rst
index 7bd9ceab27..4621689c68 100644
--- a/doc/guides/rel_notes/release_24_03.rst
+++ b/doc/guides/rel_notes/release_24_03.rst
@@ -76,6 +76,9 @@  New Features
   * Added a fath path function ``rte_eth_tx_queue_count``
     to get the number of used descriptors of a Tx queue.
 
+* **Support setting lanes for ethdev.**
+  * Support setting lanes by extended ``RTE_ETH_LINK_SPEED_*``.
+
 * **Added hash calculation of an encapsulated packet as done by the HW.**
 
   Added function to calculate hash when doing tunnel encapsulation:
@@ -254,6 +257,9 @@  ABI Changes
 
 * No ABI change that would break compatibility with 23.11.
 
+* ethdev: Convert a numerical speed to a bitmap flag with lanes:
+  The function ``rte_eth_speed_bitflag`` add lanes parameters.
+
 
 Known Issues
 ------------
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index ba31ae9286..e881a7f3cc 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -711,7 +711,8 @@  static int bnxt_update_phy_setting(struct bnxt *bp)
 	}
 
 	/* convert to speedbit flag */
-	curr_speed_bit = rte_eth_speed_bitflag((uint32_t)link->link_speed, 1);
+	curr_speed_bit = rte_eth_speed_bitflag((uint32_t)link->link_speed,
+					       RTE_ETH_LANES_UNKNOWN, 1);
 
 	/*
 	 * Device is not obliged link down in certain scenarios, even
diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c
index b10d1216d2..ecd3b2ef64 100644
--- a/drivers/net/hns3/hns3_ethdev.c
+++ b/drivers/net/hns3/hns3_ethdev.c
@@ -5969,6 +5969,7 @@  hns3_get_speed_fec_capa(struct rte_eth_fec_capa *speed_fec_capa,
 	for (i = 0; i < RTE_DIM(speed_fec_capa_tbl); i++) {
 		speed_bit =
 			rte_eth_speed_bitflag(speed_fec_capa_tbl[i].speed,
+					      RTE_ETH_LANES_UNKNOWN,
 					      RTE_ETH_LINK_FULL_DUPLEX);
 		if ((speed_capa & speed_bit) == 0)
 			continue;
diff --git a/lib/ethdev/ethdev_linux_ethtool.c b/lib/ethdev/ethdev_linux_ethtool.c
index e792204b01..6412845161 100644
--- a/lib/ethdev/ethdev_linux_ethtool.c
+++ b/lib/ethdev/ethdev_linux_ethtool.c
@@ -7,6 +7,10 @@ 
 #include "rte_ethdev.h"
 #include "ethdev_linux_ethtool.h"
 
+#define RTE_ETH_LINK_MODES_INDEX_SPEED	0
+#define RTE_ETH_LINK_MODES_INDEX_DUPLEX	1
+#define RTE_ETH_LINK_MODES_INDEX_LANES	2
+
 /* Link modes sorted with index as defined in ethtool.
  * Values are speed in Mbps with LSB indicating duplex.
  *
@@ -15,123 +19,119 @@ 
  * and allows to compile with new bits included even on an old kernel.
  *
  * The array below is built from bit definitions with this shell command:
- *   sed -rn 's;.*(ETHTOOL_LINK_MODE_)([0-9]+)([0-9a-zA-Z_]*).*= *([0-9]*).*;'\
- *           '[\4] = \2, /\* \1\2\3 *\/;p' /usr/include/linux/ethtool.h |
- *   awk '/_Half_/{$3=$3+1","}1'
+ *   sed -rn 's;.*(ETHTOOL_LINK_MODE_)([0-9]+)([a-zA-Z]+)([0-9_]+)([0-9a-zA-Z_]*)
+ *   .*= *([0-9]*).*;'\ '[\6] = {\2, 1, \4}, /\* \1\2\3\4\5 *\/;p'
+ *    /usr/include/linux/ethtool.h | awk '/_Half_/{$4=0","}1' |
+ *    awk '/, _}/{$5=1"},"}1' | awk '{sub(/_}/,"\}");}1'
  */
-static const uint32_t link_modes[] = {
-	  [0] =      11, /* ETHTOOL_LINK_MODE_10baseT_Half_BIT */
-	  [1] =      10, /* ETHTOOL_LINK_MODE_10baseT_Full_BIT */
-	  [2] =     101, /* ETHTOOL_LINK_MODE_100baseT_Half_BIT */
-	  [3] =     100, /* ETHTOOL_LINK_MODE_100baseT_Full_BIT */
-	  [4] =    1001, /* ETHTOOL_LINK_MODE_1000baseT_Half_BIT */
-	  [5] =    1000, /* ETHTOOL_LINK_MODE_1000baseT_Full_BIT */
-	 [12] =   10000, /* ETHTOOL_LINK_MODE_10000baseT_Full_BIT */
-	 [15] =    2500, /* ETHTOOL_LINK_MODE_2500baseX_Full_BIT */
-	 [17] =    1000, /* ETHTOOL_LINK_MODE_1000baseKX_Full_BIT */
-	 [18] =   10000, /* ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT */
-	 [19] =   10000, /* ETHTOOL_LINK_MODE_10000baseKR_Full_BIT */
-	 [20] =   10000, /* ETHTOOL_LINK_MODE_10000baseR_FEC_BIT */
-	 [21] =   20000, /* ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT */
-	 [22] =   20000, /* ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT */
-	 [23] =   40000, /* ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT */
-	 [24] =   40000, /* ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT */
-	 [25] =   40000, /* ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT */
-	 [26] =   40000, /* ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT */
-	 [27] =   56000, /* ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT */
-	 [28] =   56000, /* ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT */
-	 [29] =   56000, /* ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT */
-	 [30] =   56000, /* ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT */
-	 [31] =   25000, /* ETHTOOL_LINK_MODE_25000baseCR_Full_BIT */
-	 [32] =   25000, /* ETHTOOL_LINK_MODE_25000baseKR_Full_BIT */
-	 [33] =   25000, /* ETHTOOL_LINK_MODE_25000baseSR_Full_BIT */
-	 [34] =   50000, /* ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT */
-	 [35] =   50000, /* ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT */
-	 [36] =  100000, /* ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT */
-	 [37] =  100000, /* ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT */
-	 [38] =  100000, /* ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT */
-	 [39] =  100000, /* ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT */
-	 [40] =   50000, /* ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT */
-	 [41] =    1000, /* ETHTOOL_LINK_MODE_1000baseX_Full_BIT */
-	 [42] =   10000, /* ETHTOOL_LINK_MODE_10000baseCR_Full_BIT */
-	 [43] =   10000, /* ETHTOOL_LINK_MODE_10000baseSR_Full_BIT */
-	 [44] =   10000, /* ETHTOOL_LINK_MODE_10000baseLR_Full_BIT */
-	 [45] =   10000, /* ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT */
-	 [46] =   10000, /* ETHTOOL_LINK_MODE_10000baseER_Full_BIT */
-	 [47] =    2500, /* ETHTOOL_LINK_MODE_2500baseT_Full_BIT */
-	 [48] =    5000, /* ETHTOOL_LINK_MODE_5000baseT_Full_BIT */
-	 [52] =   50000, /* ETHTOOL_LINK_MODE_50000baseKR_Full_BIT */
-	 [53] =   50000, /* ETHTOOL_LINK_MODE_50000baseSR_Full_BIT */
-	 [54] =   50000, /* ETHTOOL_LINK_MODE_50000baseCR_Full_BIT */
-	 [55] =   50000, /* ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT */
-	 [56] =   50000, /* ETHTOOL_LINK_MODE_50000baseDR_Full_BIT */
-	 [57] =  100000, /* ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT */
-	 [58] =  100000, /* ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT */
-	 [59] =  100000, /* ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT */
-	 [60] =  100000, /* ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT */
-	 [61] =  100000, /* ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT */
-	 [62] =  200000, /* ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT */
-	 [63] =  200000, /* ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT */
-	 [64] =  200000, /* ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT */
-	 [65] =  200000, /* ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT */
-	 [66] =  200000, /* ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT */
-	 [67] =     100, /* ETHTOOL_LINK_MODE_100baseT1_Full_BIT */
-	 [68] =    1000, /* ETHTOOL_LINK_MODE_1000baseT1_Full_BIT */
-	 [69] =  400000, /* ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT */
-	 [70] =  400000, /* ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT */
-	 [71] =  400000, /* ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT */
-	 [72] =  400000, /* ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT */
-	 [73] =  400000, /* ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT */
-	 [75] =  100000, /* ETHTOOL_LINK_MODE_100000baseKR_Full_BIT */
-	 [76] =  100000, /* ETHTOOL_LINK_MODE_100000baseSR_Full_BIT */
-	 [77] =  100000, /* ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT */
-	 [78] =  100000, /* ETHTOOL_LINK_MODE_100000baseCR_Full_BIT */
-	 [79] =  100000, /* ETHTOOL_LINK_MODE_100000baseDR_Full_BIT */
-	 [80] =  200000, /* ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT */
-	 [81] =  200000, /* ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT */
-	 [82] =  200000, /* ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT */
-	 [83] =  200000, /* ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT */
-	 [84] =  200000, /* ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT */
-	 [85] =  400000, /* ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT */
-	 [86] =  400000, /* ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT */
-	 [87] =  400000, /* ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT */
-	 [88] =  400000, /* ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT */
-	 [89] =  400000, /* ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT */
-	 [90] =     101, /* ETHTOOL_LINK_MODE_100baseFX_Half_BIT */
-	 [91] =     100, /* ETHTOOL_LINK_MODE_100baseFX_Full_BIT */
-	 [92] =      10, /* ETHTOOL_LINK_MODE_10baseT1L_Full_BIT */
-	 [93] =  800000, /* ETHTOOL_LINK_MODE_800000baseCR8_Full_BIT */
-	 [94] =  800000, /* ETHTOOL_LINK_MODE_800000baseKR8_Full_BIT */
-	 [95] =  800000, /* ETHTOOL_LINK_MODE_800000baseDR8_Full_BIT */
-	 [96] =  800000, /* ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT */
-	 [97] =  800000, /* ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT */
-	 [98] =  800000, /* ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT */
-	 [99] =      10, /* ETHTOOL_LINK_MODE_10baseT1S_Full_BIT */
-	[100] =      11, /* ETHTOOL_LINK_MODE_10baseT1S_Half_BIT */
-	[101] =      11, /* ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT */
+static const uint32_t link_modes[][3] = {
+	[0]   = {10, 0, 1},     /* ETHTOOL_LINK_MODE_10baseT_Half_BIT */
+	[1]   = {10, 1, 1},     /* ETHTOOL_LINK_MODE_10baseT_Full_BIT */
+	[2]   = {100, 0, 1},    /* ETHTOOL_LINK_MODE_100baseT_Half_BIT */
+	[3]   = {100, 1, 1},    /* ETHTOOL_LINK_MODE_100baseT_Full_BIT */
+	[4]   = {1000, 0, 1},   /* ETHTOOL_LINK_MODE_1000baseT_Half_BIT */
+	[5]   = {1000, 1, 1},   /* ETHTOOL_LINK_MODE_1000baseT_Full_BIT */
+	[12]  = {10000, 1, 1},  /* ETHTOOL_LINK_MODE_10000baseT_Full_BIT */
+	[15]  = {2500, 1, 1},   /* ETHTOOL_LINK_MODE_2500baseX_Full_BIT */
+	[17]  = {1000, 1, 1},   /* ETHTOOL_LINK_MODE_1000baseKX_Full_BIT */
+	[18]  = {10000, 1, 4},  /* ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT */
+	[19]  = {10000, 1, 1},  /* ETHTOOL_LINK_MODE_10000baseKR_Full_BIT */
+	[20]  = {10000, 1, 1},  /* ETHTOOL_LINK_MODE_10000baseR_FEC_BIT */
+	[21]  = {20000, 1, 2},  /* ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT */
+	[22]  = {20000, 1, 2},  /* ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT */
+	[23]  = {40000, 1, 4},  /* ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT */
+	[24]  = {40000, 1, 4},  /* ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT */
+	[25]  = {40000, 1, 4},  /* ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT */
+	[26]  = {40000, 1, 4},  /* ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT */
+	[27]  = {56000, 1, 4},  /* ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT */
+	[28]  = {56000, 1, 4},  /* ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT */
+	[29]  = {56000, 1, 4},  /* ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT */
+	[30]  = {56000, 1, 4},  /* ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT */
+	[31]  = {25000, 1, 1},  /* ETHTOOL_LINK_MODE_25000baseCR_Full_BIT */
+	[32]  = {25000, 1, 1},  /* ETHTOOL_LINK_MODE_25000baseKR_Full_BIT */
+	[33]  = {25000, 1, 1},  /* ETHTOOL_LINK_MODE_25000baseSR_Full_BIT */
+	[34]  = {50000, 1, 2},  /* ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT */
+	[35]  = {50000, 1, 2},  /* ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT */
+	[36]  = {100000, 1, 4}, /* ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT */
+	[37]  = {100000, 1, 4}, /* ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT */
+	[38]  = {100000, 1, 4}, /* ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT */
+	[39]  = {100000, 1, 4}, /* ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT */
+	[40]  = {50000, 1, 2},  /* ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT */
+	[41]  = {1000, 1, 1},   /* ETHTOOL_LINK_MODE_1000baseX_Full_BIT */
+	[42]  = {10000, 1, 1},  /* ETHTOOL_LINK_MODE_10000baseCR_Full_BIT */
+	[43]  = {10000, 1, 1},  /* ETHTOOL_LINK_MODE_10000baseSR_Full_BIT */
+	[44]  = {10000, 1, 1},  /* ETHTOOL_LINK_MODE_10000baseLR_Full_BIT */
+	[45]  = {10000, 1, 1},  /* ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT */
+	[46]  = {10000, 1, 1},  /* ETHTOOL_LINK_MODE_10000baseER_Full_BIT */
+	[47]  = {2500, 1, 1},   /* ETHTOOL_LINK_MODE_2500baseT_Full_BIT */
+	[48]  = {5000, 1, 1},   /* ETHTOOL_LINK_MODE_5000baseT_Full_BIT */
+	[52]  = {50000, 1, 1},  /* ETHTOOL_LINK_MODE_50000baseKR_Full_BIT */
+	[53]  = {50000, 1, 1},  /* ETHTOOL_LINK_MODE_50000baseSR_Full_BIT */
+	[54]  = {50000, 1, 1},  /* ETHTOOL_LINK_MODE_50000baseCR_Full_BIT */
+	[55]  = {50000, 1, 1},  /* ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT */
+	[56]  = {50000, 1, 1},  /* ETHTOOL_LINK_MODE_50000baseDR_Full_BIT */
+	[57]  = {100000, 1, 2}, /* ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT */
+	[58]  = {100000, 1, 2}, /* ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT */
+	[59]  = {100000, 1, 2}, /* ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT */
+	[60]  = {100000, 1, 2}, /* ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT */
+	[61]  = {100000, 1, 2}, /* ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT */
+	[62]  = {200000, 1, 4}, /* ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT */
+	[63]  = {200000, 1, 4}, /* ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT */
+	[64]  = {200000, 1, 4}, /* ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT */
+	[65]  = {200000, 1, 4}, /* ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT */
+	[66]  = {200000, 1, 4}, /* ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT */
+	[67]  = {100, 1, 1},    /* ETHTOOL_LINK_MODE_100baseT1_Full_BIT */
+	[68]  = {1000, 1, 1},   /* ETHTOOL_LINK_MODE_1000baseT1_Full_BIT */
+	[69]  = {400000, 1, 8}, /* ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT */
+	[70]  = {400000, 1, 8}, /* ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT */
+	[71]  = {400000, 1, 8}, /* ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT */
+	[72]  = {400000, 1, 8}, /* ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT */
+	[73]  = {400000, 1, 8}, /* ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT */
+	[75]  = {100000, 1, 1}, /* ETHTOOL_LINK_MODE_100000baseKR_Full_BIT */
+	[76]  = {100000, 1, 1}, /* ETHTOOL_LINK_MODE_100000baseSR_Full_BIT */
+	[77]  = {100000, 1, 1}, /* ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT */
+	[78]  = {100000, 1, 1}, /* ETHTOOL_LINK_MODE_100000baseCR_Full_BIT */
+	[79]  = {100000, 1, 1}, /* ETHTOOL_LINK_MODE_100000baseDR_Full_BIT */
+	[80]  = {200000, 1, 2}, /* ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT */
+	[81]  = {200000, 1, 2}, /* ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT */
+	[82]  = {200000, 1, 2}, /* ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT */
+	[83]  = {200000, 1, 2}, /* ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT */
+	[84]  = {200000, 1, 2}, /* ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT */
+	[85]  = {400000, 1, 4}, /* ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT */
+	[86]  = {400000, 1, 4}, /* ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT */
+	[87]  = {400000, 1, 4}, /* ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT */
+	[88]  = {400000, 1, 4}, /* ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT */
+	[89]  = {400000, 1, 4}, /* ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT */
+	[90]  = {100, 0, 1},    /* ETHTOOL_LINK_MODE_100baseFX_Half_BIT */
+	[91]  = {100, 1, 1},    /* ETHTOOL_LINK_MODE_100baseFX_Full_BIT */
+	[92]  = {10, 1, 1},     /* ETHTOOL_LINK_MODE_10baseT1L_Full_BIT */
+	[93]  = {800000, 1, 8}, /* ETHTOOL_LINK_MODE_800000baseCR8_Full_BIT */
+	[94]  = {800000, 1, 8}, /* ETHTOOL_LINK_MODE_800000baseKR8_Full_BIT */
+	[95]  = {800000, 1, 8}, /* ETHTOOL_LINK_MODE_800000baseDR8_Full_BIT */
+	[96]  = {800000, 1, 8}, /* ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT */
+	[97]  = {800000, 1, 8}, /* ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT */
+	[98]  = {800000, 1, 8}, /* ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT */
+	[99]  = {10, 1, 1},     /* ETHTOOL_LINK_MODE_10baseT1S_Full_BIT */
+	[100] = {10, 0, 1},     /* ETHTOOL_LINK_MODE_10baseT1S_Half_BIT */
+	[101] = {10, 0, 1},     /* ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT */
 };
 
 uint32_t
 rte_eth_link_speed_ethtool(enum ethtool_link_mode_bit_indices bit)
 {
-	uint32_t speed;
-	int duplex;
+	uint32_t speed, duplex, lanes;
 
 	/* get mode from array */
 	if (bit >= RTE_DIM(link_modes))
 		return RTE_ETH_LINK_SPEED_AUTONEG;
-	speed = link_modes[bit];
-	if (speed == 0)
+	if (link_modes[bit][RTE_ETH_LINK_MODES_INDEX_SPEED] == 0)
 		return RTE_ETH_LINK_SPEED_AUTONEG;
 	RTE_BUILD_BUG_ON(RTE_ETH_LINK_SPEED_AUTONEG != 0);
 
-	/* duplex is LSB */
-	duplex = (speed & 1) ?
-			RTE_ETH_LINK_HALF_DUPLEX :
-			RTE_ETH_LINK_FULL_DUPLEX;
-	speed &= RTE_GENMASK32(31, 1);
-
-	return rte_eth_speed_bitflag(speed, duplex);
+	speed = link_modes[bit][RTE_ETH_LINK_MODES_INDEX_SPEED];
+	duplex = link_modes[bit][RTE_ETH_LINK_MODES_INDEX_DUPLEX];
+	lanes = link_modes[bit][RTE_ETH_LINK_MODES_INDEX_LANES];
+	return rte_eth_speed_bitflag(speed, duplex, lanes);
 }
 
 uint32_t
diff --git a/lib/ethdev/ethdev_private.h b/lib/ethdev/ethdev_private.h
index 0d36b9c30f..9092ab3a9e 100644
--- a/lib/ethdev/ethdev_private.h
+++ b/lib/ethdev/ethdev_private.h
@@ -79,4 +79,8 @@  void eth_dev_txq_release(struct rte_eth_dev *dev, uint16_t qid);
 int eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues);
 int eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues);
 
+/* versioned functions */
+uint32_t rte_eth_speed_bitflag_v24(uint32_t speed, int duplex);
+uint32_t rte_eth_speed_bitflag_v25(uint32_t speed, uint8_t lanes, int duplex);
+
 #endif /* _ETH_PRIVATE_H_ */
diff --git a/lib/ethdev/ethdev_trace.h b/lib/ethdev/ethdev_trace.h
index 3bec87bfdb..5547b49cab 100644
--- a/lib/ethdev/ethdev_trace.h
+++ b/lib/ethdev/ethdev_trace.h
@@ -183,8 +183,10 @@  RTE_TRACE_POINT(
 
 RTE_TRACE_POINT(
 	rte_eth_trace_speed_bitflag,
-	RTE_TRACE_POINT_ARGS(uint32_t speed, int duplex, uint32_t ret),
+	RTE_TRACE_POINT_ARGS(uint32_t speed, uint8_t lanes, int duplex,
+			     uint32_t ret),
 	rte_trace_point_emit_u32(speed);
+	rte_trace_point_emit_u8(lanes);
 	rte_trace_point_emit_int(duplex);
 	rte_trace_point_emit_u32(ret);
 )
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index f1d2586591..2c9588d0b3 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -62,3 +62,5 @@  endif
 if get_option('buildtype').contains('debug')
     cflags += ['-DRTE_FLOW_DEBUG']
 endif
+
+use_function_versioning = true
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index f1c658f49e..6571116fbf 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -26,6 +26,7 @@ 
 #include <rte_class.h>
 #include <rte_ether.h>
 #include <rte_telemetry.h>
+#include <rte_function_versioning.h>
 
 #include "rte_ethdev.h"
 #include "rte_ethdev_trace_fp.h"
@@ -991,63 +992,101 @@  rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id)
 	return ret;
 }
 
-uint32_t
-rte_eth_speed_bitflag(uint32_t speed, int duplex)
+uint32_t __vsym
+rte_eth_speed_bitflag_v25(uint32_t speed, uint8_t lanes, int duplex)
 {
-	uint32_t ret;
+	uint32_t ret = 0;
 
 	switch (speed) {
 	case RTE_ETH_SPEED_NUM_10M:
-		ret = duplex ? RTE_ETH_LINK_SPEED_10M : RTE_ETH_LINK_SPEED_10M_HD;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_1)
+			ret = duplex ? RTE_ETH_LINK_SPEED_10M : RTE_ETH_LINK_SPEED_10M_HD;
 		break;
 	case RTE_ETH_SPEED_NUM_100M:
-		ret = duplex ? RTE_ETH_LINK_SPEED_100M : RTE_ETH_LINK_SPEED_100M_HD;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_1)
+			ret = duplex ? RTE_ETH_LINK_SPEED_100M : RTE_ETH_LINK_SPEED_100M_HD;
 		break;
 	case RTE_ETH_SPEED_NUM_1G:
-		ret = RTE_ETH_LINK_SPEED_1G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_1)
+			ret = RTE_ETH_LINK_SPEED_1G;
 		break;
 	case RTE_ETH_SPEED_NUM_2_5G:
-		ret = RTE_ETH_LINK_SPEED_2_5G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_1)
+			ret = RTE_ETH_LINK_SPEED_2_5G;
 		break;
 	case RTE_ETH_SPEED_NUM_5G:
-		ret = RTE_ETH_LINK_SPEED_5G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_1)
+			ret = RTE_ETH_LINK_SPEED_5G;
 		break;
 	case RTE_ETH_SPEED_NUM_10G:
-		ret = RTE_ETH_LINK_SPEED_10G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_1)
+			ret = RTE_ETH_LINK_SPEED_10G;
+		else if (lanes == RTE_ETH_LANES_4)
+			ret = RTE_ETH_LINK_SPEED_10G_4LANES;
 		break;
 	case RTE_ETH_SPEED_NUM_20G:
-		ret = RTE_ETH_LINK_SPEED_20G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_2)
+			ret = RTE_ETH_LINK_SPEED_20G_2LANES;
 		break;
 	case RTE_ETH_SPEED_NUM_25G:
-		ret = RTE_ETH_LINK_SPEED_25G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_1)
+			ret = RTE_ETH_LINK_SPEED_25G;
 		break;
 	case RTE_ETH_SPEED_NUM_40G:
-		ret = RTE_ETH_LINK_SPEED_40G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_4)
+			ret = RTE_ETH_LINK_SPEED_40G_4LANES;
 		break;
 	case RTE_ETH_SPEED_NUM_50G:
-		ret = RTE_ETH_LINK_SPEED_50G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_1)
+			ret = RTE_ETH_LINK_SPEED_50G;
+		else if (lanes == RTE_ETH_LANES_2)
+			ret = RTE_ETH_LINK_SPEED_50G_2LANES;
 		break;
 	case RTE_ETH_SPEED_NUM_56G:
-		ret = RTE_ETH_LINK_SPEED_56G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_4)
+			ret = RTE_ETH_LINK_SPEED_56G_4LANES;
 		break;
 	case RTE_ETH_SPEED_NUM_100G:
-		ret = RTE_ETH_LINK_SPEED_100G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_1)
+			ret = RTE_ETH_LINK_SPEED_100G;
+		else if (lanes == RTE_ETH_LANES_2)
+			ret = RTE_ETH_LINK_SPEED_100G_2LANES;
+		else if (lanes == RTE_ETH_LANES_4)
+			ret = RTE_ETH_LINK_SPEED_100G_4LANES;
 		break;
 	case RTE_ETH_SPEED_NUM_200G:
-		ret = RTE_ETH_LINK_SPEED_200G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_4)
+			ret = RTE_ETH_LINK_SPEED_200G_4LANES;
+		else if (lanes == RTE_ETH_LANES_2)
+			ret = RTE_ETH_LINK_SPEED_200G_2LANES;
 		break;
 	case RTE_ETH_SPEED_NUM_400G:
-		ret = RTE_ETH_LINK_SPEED_400G;
+		if (lanes == RTE_ETH_LANES_UNKNOWN || lanes == RTE_ETH_LANES_4)
+			ret = RTE_ETH_LINK_SPEED_400G_4LANES;
+		else if (lanes == RTE_ETH_LANES_8)
+			ret = RTE_ETH_LINK_SPEED_400G_8LANES;
 		break;
 	default:
 		ret = 0;
 	}
 
-	rte_eth_trace_speed_bitflag(speed, duplex, ret);
+	rte_eth_trace_speed_bitflag(speed, lanes, duplex, ret);
 
 	return ret;
 }
 
+uint32_t __vsym
+rte_eth_speed_bitflag_v24(uint32_t speed, int duplex)
+{
+	return rte_eth_speed_bitflag_v25(speed, RTE_ETH_LANES_UNKNOWN, duplex);
+}
+
+/* mark the v24 function as the older version, and v25 as the default version */
+VERSION_SYMBOL(rte_eth_speed_bitflag, _v24, 24);
+BIND_DEFAULT_SYMBOL(rte_eth_speed_bitflag, _v25, 25);
+MAP_STATIC_SYMBOL(uint32_t rte_eth_speed_bitflag(uint32_t speed, uint8_t lanes, int duplex),
+		  rte_eth_speed_bitflag_v25);
+
 const char *
 rte_eth_dev_rx_offload_name(uint64_t offload)
 {
@@ -3110,13 +3149,21 @@  rte_eth_link_to_str(char *str, size_t len, const struct rte_eth_link *eth_link)
 
 	if (eth_link->link_status == RTE_ETH_LINK_DOWN)
 		ret = snprintf(str, len, "Link down");
-	else
+	else if (eth_link->link_lanes == RTE_ETH_LANES_UNKNOWN)
 		ret = snprintf(str, len, "Link up at %s %s %s",
 			rte_eth_link_speed_to_str(eth_link->link_speed),
 			(eth_link->link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ?
 			"FDX" : "HDX",
 			(eth_link->link_autoneg == RTE_ETH_LINK_AUTONEG) ?
 			"Autoneg" : "Fixed");
+	else
+		ret = snprintf(str, len, "Link up at %s %u lanes %s %s",
+			rte_eth_link_speed_to_str(eth_link->link_speed),
+			eth_link->link_lanes,
+			(eth_link->link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ?
+			"FDX" : "HDX",
+			(eth_link->link_autoneg == RTE_ETH_LINK_AUTONEG) ?
+			"Autoneg" : "Fixed");
 
 	rte_eth_trace_link_to_str(len, eth_link, str, ret);
 
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 147257d6a2..123b771046 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -288,24 +288,40 @@  struct rte_eth_stats {
 /**@{@name Link speed capabilities
  * Device supported speeds bitmap flags
  */
-#define RTE_ETH_LINK_SPEED_AUTONEG 0             /**< Autonegotiate (all speeds) */
-#define RTE_ETH_LINK_SPEED_FIXED   RTE_BIT32(0)  /**< Disable autoneg (fixed speed) */
-#define RTE_ETH_LINK_SPEED_10M_HD  RTE_BIT32(1)  /**<  10 Mbps half-duplex */
-#define RTE_ETH_LINK_SPEED_10M     RTE_BIT32(2)  /**<  10 Mbps full-duplex */
-#define RTE_ETH_LINK_SPEED_100M_HD RTE_BIT32(3)  /**< 100 Mbps half-duplex */
-#define RTE_ETH_LINK_SPEED_100M    RTE_BIT32(4)  /**< 100 Mbps full-duplex */
-#define RTE_ETH_LINK_SPEED_1G      RTE_BIT32(5)  /**<   1 Gbps */
-#define RTE_ETH_LINK_SPEED_2_5G    RTE_BIT32(6)  /**< 2.5 Gbps */
-#define RTE_ETH_LINK_SPEED_5G      RTE_BIT32(7)  /**<   5 Gbps */
-#define RTE_ETH_LINK_SPEED_10G     RTE_BIT32(8)  /**<  10 Gbps */
-#define RTE_ETH_LINK_SPEED_20G     RTE_BIT32(9)  /**<  20 Gbps */
-#define RTE_ETH_LINK_SPEED_25G     RTE_BIT32(10) /**<  25 Gbps */
-#define RTE_ETH_LINK_SPEED_40G     RTE_BIT32(11) /**<  40 Gbps */
-#define RTE_ETH_LINK_SPEED_50G     RTE_BIT32(12) /**<  50 Gbps */
-#define RTE_ETH_LINK_SPEED_56G     RTE_BIT32(13) /**<  56 Gbps */
-#define RTE_ETH_LINK_SPEED_100G    RTE_BIT32(14) /**< 100 Gbps */
-#define RTE_ETH_LINK_SPEED_200G    RTE_BIT32(15) /**< 200 Gbps */
-#define RTE_ETH_LINK_SPEED_400G    RTE_BIT32(16) /**< 400 Gbps */
+#define RTE_ETH_LINK_SPEED_AUTONEG        0             /**< Autonegotiate (all speeds) */
+#define RTE_ETH_LINK_SPEED_FIXED          RTE_BIT32(0)  /**< Disable autoneg (fixed speed) */
+#define RTE_ETH_LINK_SPEED_10M_HD         RTE_BIT32(1)  /**<  10 Mbps half-duplex */
+#define RTE_ETH_LINK_SPEED_10M            RTE_BIT32(2)  /**<  10 Mbps full-duplex */
+#define RTE_ETH_LINK_SPEED_100M_HD        RTE_BIT32(3)  /**< 100 Mbps half-duplex */
+#define RTE_ETH_LINK_SPEED_100M           RTE_BIT32(4)  /**< 100 Mbps full-duplex */
+#define RTE_ETH_LINK_SPEED_1G             RTE_BIT32(5)  /**<   1 Gbps */
+#define RTE_ETH_LINK_SPEED_2_5G           RTE_BIT32(6)  /**< 2.5 Gbps */
+#define RTE_ETH_LINK_SPEED_5G             RTE_BIT32(7)  /**<   5 Gbps */
+#define RTE_ETH_LINK_SPEED_10G            RTE_BIT32(8)  /**<  10 Gbps */
+#define RTE_ETH_LINK_SPEED_20G            RTE_BIT32(9)  /**<  20 Gbps 2lanes */
+#define RTE_ETH_LINK_SPEED_25G            RTE_BIT32(10) /**<  25 Gbps */
+#define RTE_ETH_LINK_SPEED_40G            RTE_BIT32(11) /**<  40 Gbps 4lanes */
+#define RTE_ETH_LINK_SPEED_50G            RTE_BIT32(12) /**<  50 Gbps */
+#define RTE_ETH_LINK_SPEED_56G            RTE_BIT32(13) /**<  56 Gbps  4lanes */
+#define RTE_ETH_LINK_SPEED_100G           RTE_BIT32(14) /**< 100 Gbps */
+#define RTE_ETH_LINK_SPEED_200G           RTE_BIT32(15) /**< 200 Gbps 4lanes */
+#define RTE_ETH_LINK_SPEED_400G           RTE_BIT32(16) /**< 400 Gbps 4lanes */
+#define RTE_ETH_LINK_SPEED_10G_4LANES     RTE_BIT32(17)  /**<  10 Gbps 4lanes */
+#define RTE_ETH_LINK_SPEED_50G_2LANES     RTE_BIT32(18) /**<  50 Gbps 2 lanes */
+#define RTE_ETH_LINK_SPEED_100G_2LANES    RTE_BIT32(19) /**< 100 Gbps 2 lanes */
+#define RTE_ETH_LINK_SPEED_100G_4LANES    RTE_BIT32(20) /**< 100 Gbps 4lanes */
+#define RTE_ETH_LINK_SPEED_200G_2LANES    RTE_BIT32(21) /**< 200 Gbps 2lanes */
+#define RTE_ETH_LINK_SPEED_400G_8LANES    RTE_BIT32(22) /**< 400 Gbps 8lanes */
+/**@}*/
+
+/**@{@name Link speed capabilities
+ * Default lanes, use to compatible with earlier versions
+ */
+#define RTE_ETH_LINK_SPEED_20G_2LANES	RTE_ETH_LINK_SPEED_20G
+#define RTE_ETH_LINK_SPEED_40G_4LANES	RTE_ETH_LINK_SPEED_40G
+#define RTE_ETH_LINK_SPEED_56G_4LANES	RTE_ETH_LINK_SPEED_56G
+#define RTE_ETH_LINK_SPEED_200G_4LANES	RTE_ETH_LINK_SPEED_200G
+#define RTE_ETH_LINK_SPEED_400G_4LANES	RTE_ETH_LINK_SPEED_400G
 /**@}*/
 
 /**@{@name Link speed
@@ -329,6 +345,16 @@  struct rte_eth_stats {
 #define RTE_ETH_SPEED_NUM_UNKNOWN UINT32_MAX /**< Unknown */
 /**@}*/
 
+/**@{@name Link lane number
+ * Ethernet lane number
+ */
+#define RTE_ETH_LANES_UNKNOWN    0 /**< Unknown */
+#define RTE_ETH_LANES_1          1 /**< 1 lanes */
+#define RTE_ETH_LANES_2          2 /**< 2 lanes */
+#define RTE_ETH_LANES_4          4 /**< 4 lanes */
+#define RTE_ETH_LANES_8          8 /**< 8 lanes */
+/**@}*/
+
 /**
  * A structure used to retrieve link-level information of an Ethernet port.
  */
@@ -338,6 +364,7 @@  struct __rte_aligned(8) rte_eth_link { /**< aligned for atomic64 read/write */
 	uint16_t link_duplex  : 1;  /**< RTE_ETH_LINK_[HALF/FULL]_DUPLEX */
 	uint16_t link_autoneg : 1;  /**< RTE_ETH_LINK_[AUTONEG/FIXED] */
 	uint16_t link_status  : 1;  /**< RTE_ETH_LINK_[DOWN/UP] */
+	uint16_t link_lanes   : 4;  /**< RTE_ETH_LANES_ */
 };
 
 /**@{@name Link negotiation
@@ -1641,6 +1668,12 @@  struct rte_eth_conf {
 #define RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP         RTE_BIT64(3)
 /** Device supports keeping shared flow objects across restart. */
 #define RTE_ETH_DEV_CAPA_FLOW_SHARED_OBJECT_KEEP RTE_BIT64(4)
+/**
+ * Device supports setting lanes. When the device does not support it,
+ * if a speed supports different numbers of lanes, the application does
+ * not knowe which the lane number are used by the device.
+ */
+#define RTE_ETH_DEV_CAPA_SETTING_LANES RTE_BIT64(5)
 /**@}*/
 
 /*
@@ -2301,12 +2334,16 @@  uint16_t rte_eth_dev_count_total(void);
  *
  * @param speed
  *   Numerical speed value in Mbps
+ * @param lanes
+ *   number of lanes (RTE_ETH_LANES_x)
+ *   RTE_ETH_LANES_UNKNOWN is always used when the device does not support
+ *   setting lanes
  * @param duplex
  *   RTE_ETH_LINK_[HALF/FULL]_DUPLEX (only for 10/100M speeds)
  * @return
  *   0 if the speed cannot be mapped
  */
-uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex);
+uint32_t rte_eth_speed_bitflag(uint32_t speed, uint8_t lanes, int duplex);
 
 /**
  * Get RTE_ETH_RX_OFFLOAD_* flag name.
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 79f6f5293b..9fa2439976 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -169,6 +169,12 @@  DPDK_24 {
 	local: *;
 };
 
+DPDK_25 {
+	global:
+
+	rte_eth_speed_bitflag;
+} DPDK_24;
+
 EXPERIMENTAL {
 	global: