[v6,10/19] net/ngbe: support link update

Message ID 20210617110005.4132926-11-jiawenwu@trustnetic.com (mailing list archive)
State Changes Requested, archived
Delegated to: Andrew Rybchenko
Headers
Series net: ngbe PMD |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Jiawen Wu June 17, 2021, 10:59 a.m. UTC
  Register to handle device interrupt.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 doc/guides/nics/features/ngbe.ini  |   3 +
 doc/guides/nics/ngbe.rst           |   5 +
 drivers/net/ngbe/base/ngbe_dummy.h |   6 +
 drivers/net/ngbe/base/ngbe_type.h  |  11 +
 drivers/net/ngbe/ngbe_ethdev.c     | 376 ++++++++++++++++++++++++++++-
 drivers/net/ngbe/ngbe_ethdev.h     |  34 +++
 6 files changed, 434 insertions(+), 1 deletion(-)
  

Comments

Andrew Rybchenko July 2, 2021, 4:24 p.m. UTC | #1
On 6/17/21 1:59 PM, Jiawen Wu wrote:
> Register to handle device interrupt.
> 
> Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>

[snip]

> diff --git a/doc/guides/nics/ngbe.rst b/doc/guides/nics/ngbe.rst
> index 54d0665db9..0918cc2918 100644
> --- a/doc/guides/nics/ngbe.rst
> +++ b/doc/guides/nics/ngbe.rst
> @@ -8,6 +8,11 @@ The NGBE PMD (librte_pmd_ngbe) provides poll mode driver support
>  for Wangxun 1 Gigabit Ethernet NICs.
>  
>  
> +Features
> +--------
> +
> +- Link state information
> +

Two empty lines before the section.

>  Prerequisites
>  -------------
>  

[snip]

> diff --git a/drivers/net/ngbe/ngbe_ethdev.c b/drivers/net/ngbe/ngbe_ethdev.c
> index deca64137d..c952023e8b 100644
> --- a/drivers/net/ngbe/ngbe_ethdev.c
> +++ b/drivers/net/ngbe/ngbe_ethdev.c
> @@ -141,6 +175,23 @@ eth_ngbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused)
>  		return -ENOMEM;
>  	}
>  
> +	ctrl_ext = rd32(hw, NGBE_PORTCTL);
> +	/* let hardware know driver is loaded */
> +	ctrl_ext |= NGBE_PORTCTL_DRVLOAD;
> +	/* Set PF Reset Done bit so PF/VF Mail Ops can work */
> +	ctrl_ext |= NGBE_PORTCTL_RSTDONE;
> +	wr32(hw, NGBE_PORTCTL, ctrl_ext);
> +	ngbe_flush(hw);
> +
> +	rte_intr_callback_register(intr_handle,
> +				   ngbe_dev_interrupt_handler, eth_dev);
> +
> +	/* enable uio/vfio intr/eventfd mapping */
> +	rte_intr_enable(intr_handle);
> +
> +	/* enable support intr */
> +	ngbe_enable_intr(eth_dev);
> +

I don't understand why it is done unconditionally regardless
of the corresponding bit in dev_conf.

>  	return 0;
>  }
>  
> @@ -180,11 +231,25 @@ static int eth_ngbe_pci_remove(struct rte_pci_device *pci_dev)
>  
>  static struct rte_pci_driver rte_ngbe_pmd = {
>  	.id_table = pci_id_ngbe_map,
> -	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
> +	.drv_flags = RTE_PCI_DRV_NEED_MAPPING |
> +		     RTE_PCI_DRV_INTR_LSC,
>  	.probe = eth_ngbe_pci_probe,
>  	.remove = eth_ngbe_pci_remove,
>  };
>  
> +static int
> +ngbe_dev_configure(struct rte_eth_dev *dev)
> +{
> +	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	/* set flag to update link status after init */
> +	intr->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
> +
> +	return 0;
> +}
> +
>  /*
>   * Reset and stop device.
>   */
> @@ -198,6 +263,315 @@ ngbe_dev_close(struct rte_eth_dev *dev)
>  	return -EINVAL;
>  }
>  
> +static int
> +ngbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
> +{
> +	RTE_SET_USED(dev);
> +
> +	dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_100M |
> +				ETH_LINK_SPEED_10M;
> +
> +	return 0;
> +}
> +
> +/* return 0 means link status changed, -1 means not changed */
> +int
> +ngbe_dev_link_update_share(struct rte_eth_dev *dev,
> +			    int wait_to_complete)
> +{
> +	struct ngbe_hw *hw = ngbe_dev_hw(dev);
> +	struct rte_eth_link link;
> +	u32 link_speed = NGBE_LINK_SPEED_UNKNOWN;
> +	u32 lan_speed = 0;
> +	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
> +	bool link_up;
> +	int err;
> +	int wait = 1;
> +
> +	memset(&link, 0, sizeof(link));
> +	link.link_status = ETH_LINK_DOWN;
> +	link.link_speed = ETH_SPEED_NUM_NONE;
> +	link.link_duplex = ETH_LINK_HALF_DUPLEX;
> +	link.link_autoneg = !(dev->data->dev_conf.link_speeds &
> +			~ETH_LINK_SPEED_AUTONEG);
> +
> +	hw->mac.get_link_status = true;
> +
> +	if (intr->flags & NGBE_FLAG_NEED_LINK_CONFIG)
> +		return rte_eth_linkstatus_set(dev, &link);
> +
> +	/* check if it needs to wait to complete, if lsc interrupt is enabled */
> +	if (wait_to_complete == 0 || dev->data->dev_conf.intr_conf.lsc != 0)
> +		wait = 0;
> +
> +	err = hw->mac.check_link(hw, &link_speed, &link_up, wait);
> +

Please, remove empty line which adds unnecessary logical
separation of the operation and its result check.

> +	if (err != 0) {
> +		link.link_speed = ETH_SPEED_NUM_100M;
> +		link.link_duplex = ETH_LINK_FULL_DUPLEX;
> +		return rte_eth_linkstatus_set(dev, &link);
> +	}
> +
> +	if (!link_up)
> +		return rte_eth_linkstatus_set(dev, &link);
> +
> +	intr->flags &= ~NGBE_FLAG_NEED_LINK_CONFIG;
> +	link.link_status = ETH_LINK_UP;
> +	link.link_duplex = ETH_LINK_FULL_DUPLEX;
> +
> +	switch (link_speed) {
> +	default:
> +	case NGBE_LINK_SPEED_UNKNOWN:
> +		link.link_duplex = ETH_LINK_FULL_DUPLEX;
> +		link.link_speed = ETH_SPEED_NUM_100M;

May be ETH_SPEED_NUM_NONE?

> +		break;
> +
> +	case NGBE_LINK_SPEED_10M_FULL:
> +		link.link_speed = ETH_SPEED_NUM_10M;
> +		lan_speed = 0;
> +		break;
> +
> +	case NGBE_LINK_SPEED_100M_FULL:
> +		link.link_speed = ETH_SPEED_NUM_100M;
> +		lan_speed = 1;
> +		break;
> +
> +	case NGBE_LINK_SPEED_1GB_FULL:
> +		link.link_speed = ETH_SPEED_NUM_1G;
> +		lan_speed = 2;
> +		break;
> +	}
> +
> +	if (hw->is_pf) {
> +		wr32m(hw, NGBE_LAN_SPEED, NGBE_LAN_SPEED_MASK, lan_speed);
> +		if (link_speed & (NGBE_LINK_SPEED_1GB_FULL |
> +			NGBE_LINK_SPEED_100M_FULL | NGBE_LINK_SPEED_10M_FULL)) {

Such indent is very confusing since it is the same as on the
next line. Please, add extra TAB to indent above line further.

> +			wr32m(hw, NGBE_MACTXCFG, NGBE_MACTXCFG_SPEED_MASK,
> +				NGBE_MACTXCFG_SPEED_1G | NGBE_MACTXCFG_TE);
> +		}
> +	}
> +
> +	return rte_eth_linkstatus_set(dev, &link);
> +}
> +
> +static int
> +ngbe_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete)
> +{
> +	return ngbe_dev_link_update_share(dev, wait_to_complete);
> +}
> +
> +/*
> + * It reads ICR and sets flag for the link_update.
> + *
> + * @param dev
> + *  Pointer to struct rte_eth_dev.
> + *
> + * @return
> + *  - On success, zero.
> + *  - On failure, a negative value.
> + */
> +static int
> +ngbe_dev_interrupt_get_status(struct rte_eth_dev *dev)
> +{
> +	uint32_t eicr;
> +	struct ngbe_hw *hw = ngbe_dev_hw(dev);
> +	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
> +
> +	/* clear all cause mask */
> +	ngbe_disable_intr(hw);
> +
> +	/* read-on-clear nic registers here */
> +	eicr = ((u32 *)hw->isb_mem)[NGBE_ISB_MISC];
> +	PMD_DRV_LOG(DEBUG, "eicr %x", eicr);
> +
> +	intr->flags = 0;
> +
> +	/* set flag for async link update */
> +	if (eicr & NGBE_ICRMISC_PHY)
> +		intr->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
> +
> +	if (eicr & NGBE_ICRMISC_VFMBX)
> +		intr->flags |= NGBE_FLAG_MAILBOX;
> +
> +	if (eicr & NGBE_ICRMISC_LNKSEC)
> +		intr->flags |= NGBE_FLAG_MACSEC;
> +
> +	if (eicr & NGBE_ICRMISC_GPIO)
> +		intr->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
> +
> +	return 0;
> +}
> +
> +/**
> + * It gets and then prints the link status.
> + *
> + * @param dev
> + *  Pointer to struct rte_eth_dev.
> + *
> + * @return
> + *  - On success, zero.
> + *  - On failure, a negative value.
> + */
> +static void
> +ngbe_dev_link_status_print(struct rte_eth_dev *dev)
> +{
> +	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> +	struct rte_eth_link link;
> +
> +	rte_eth_linkstatus_get(dev, &link);
> +
> +	if (link.link_status == ETH_LINK_UP) {
> +		PMD_INIT_LOG(INFO, "Port %d: Link Up - speed %u Mbps - %s",
> +					(int)(dev->data->port_id),
> +					(unsigned int)link.link_speed,
> +			link.link_duplex == ETH_LINK_FULL_DUPLEX ?
> +					"full-duplex" : "half-duplex");
> +	} else {
> +		PMD_INIT_LOG(INFO, " Port %d: Link Down",
> +				(int)(dev->data->port_id));
> +	}
> +	PMD_INIT_LOG(DEBUG, "PCI Address: " PCI_PRI_FMT,
> +				pci_dev->addr.domain,
> +				pci_dev->addr.bus,
> +				pci_dev->addr.devid,
> +				pci_dev->addr.function);
> +}
> +
> +/*
> + * It executes link_update after knowing an interrupt occurred.
> + *
> + * @param dev
> + *  Pointer to struct rte_eth_dev.
> + *
> + * @return
> + *  - On success, zero.
> + *  - On failure, a negative value.
> + */
> +static int
> +ngbe_dev_interrupt_action(struct rte_eth_dev *dev)
> +{
> +	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
> +	int64_t timeout;
> +
> +	PMD_DRV_LOG(DEBUG, "intr action type %d", intr->flags);
> +
> +	if (intr->flags & NGBE_FLAG_NEED_LINK_UPDATE) {
> +		struct rte_eth_link link;
> +
> +		/*get the link status before link update, for predicting later*/
> +		rte_eth_linkstatus_get(dev, &link);
> +
> +		ngbe_dev_link_update(dev, 0);
> +
> +		/* likely to up */
> +		if (link.link_status != ETH_LINK_UP)
> +			/* handle it 1 sec later, wait it being stable */
> +			timeout = NGBE_LINK_UP_CHECK_TIMEOUT;
> +		/* likely to down */
> +		else
> +			/* handle it 4 sec later, wait it being stable */
> +			timeout = NGBE_LINK_DOWN_CHECK_TIMEOUT;
> +
> +		ngbe_dev_link_status_print(dev);
> +		if (rte_eal_alarm_set(timeout * 1000,
> +				      ngbe_dev_interrupt_delayed_handler,
> +				      (void *)dev) < 0) {
> +			PMD_DRV_LOG(ERR, "Error setting alarm");
> +		} else {
> +			/* remember original mask */
> +			intr->mask_misc_orig = intr->mask_misc;
> +			/* only disable lsc interrupt */
> +			intr->mask_misc &= ~NGBE_ICRMISC_PHY;
> +
> +			intr->mask_orig = intr->mask;
> +			/* only disable all misc interrupts */
> +			intr->mask &= ~(1ULL << NGBE_MISC_VEC_ID);
> +		}
> +	}
> +
> +	PMD_DRV_LOG(DEBUG, "enable intr immediately");
> +	ngbe_enable_intr(dev);
> +
> +	return 0;
> +}
> +
> +/**
> + * Interrupt handler which shall be registered for alarm callback for delayed
> + * handling specific interrupt to wait for the stable nic state. As the
> + * NIC interrupt state is not stable for ngbe after link is just down,
> + * it needs to wait 4 seconds to get the stable status.
> + *
> + * @param handle
> + *  Pointer to interrupt handle.
> + * @param param
> + *  The address of parameter (struct rte_eth_dev *) registered before.
> + *
> + * @return
> + *  void

Such documentation of the void functions is useless.
It may be simply omited.

> + */
> +static void
> +ngbe_dev_interrupt_delayed_handler(void *param)
> +{
> +	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
> +	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
> +	struct ngbe_hw *hw = ngbe_dev_hw(dev);
> +	uint32_t eicr;
> +
> +	ngbe_disable_intr(hw);
> +
> +	eicr = ((u32 *)hw->isb_mem)[NGBE_ISB_MISC];
> +
> +	if (intr->flags & NGBE_FLAG_NEED_LINK_UPDATE) {
> +		ngbe_dev_link_update(dev, 0);
> +		intr->flags &= ~NGBE_FLAG_NEED_LINK_UPDATE;
> +		ngbe_dev_link_status_print(dev);
> +		rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
> +					      NULL);
> +	}
> +
> +	if (intr->flags & NGBE_FLAG_MACSEC) {
> +		rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_MACSEC,
> +					      NULL);
> +		intr->flags &= ~NGBE_FLAG_MACSEC;
> +	}
> +
> +	/* restore original mask */
> +	intr->mask_misc = intr->mask_misc_orig;
> +	intr->mask_misc_orig = 0;
> +	intr->mask = intr->mask_orig;
> +	intr->mask_orig = 0;
> +
> +	PMD_DRV_LOG(DEBUG, "enable intr in delayed handler S[%08x]", eicr);
> +	ngbe_enable_intr(dev);
> +}
> +
> +/**
> + * Interrupt handler triggered by NIC  for handling
> + * specific interrupt.
> + *
> + * @param handle
> + *  Pointer to interrupt handle.
> + * @param param
> + *  The address of parameter (struct rte_eth_dev *) registered before.
> + *
> + * @return
> + *  void

Such documentation of the void functions is useless.
It may be simply omited.

> + */
> +static void
> +ngbe_dev_interrupt_handler(void *param)
> +{
> +	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
> +
> +	ngbe_dev_interrupt_get_status(dev);
> +	ngbe_dev_interrupt_action(dev);
> +}
> +
> +static const struct eth_dev_ops ngbe_eth_dev_ops = {
> +	.dev_configure              = ngbe_dev_configure,
> +	.dev_infos_get              = ngbe_dev_info_get,
> +	.link_update                = ngbe_dev_link_update,
> +};
> +
>  RTE_PMD_REGISTER_PCI(net_ngbe, rte_ngbe_pmd);
>  RTE_PMD_REGISTER_PCI_TABLE(net_ngbe, pci_id_ngbe_map);
>  RTE_PMD_REGISTER_KMOD_DEP(net_ngbe, "* igb_uio | uio_pci_generic | vfio-pci");
> diff --git a/drivers/net/ngbe/ngbe_ethdev.h b/drivers/net/ngbe/ngbe_ethdev.h
> index 87cc1cff6b..b67508a3de 100644
> --- a/drivers/net/ngbe/ngbe_ethdev.h
> +++ b/drivers/net/ngbe/ngbe_ethdev.h
> @@ -6,11 +6,30 @@
>  #ifndef _NGBE_ETHDEV_H_
>  #define _NGBE_ETHDEV_H_
>  
> +/* need update link, bit flag */
> +#define NGBE_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
> +#define NGBE_FLAG_MAILBOX          (uint32_t)(1 << 1)
> +#define NGBE_FLAG_PHY_INTERRUPT    (uint32_t)(1 << 2)
> +#define NGBE_FLAG_MACSEC           (uint32_t)(1 << 3)
> +#define NGBE_FLAG_NEED_LINK_CONFIG (uint32_t)(1 << 4)
> +
> +#define NGBE_MISC_VEC_ID               RTE_INTR_VEC_ZERO_OFFSET
> +
> +/* structure for interrupt relative data */
> +struct ngbe_interrupt {
> +	uint32_t flags;
> +	uint32_t mask_misc;
> +	uint32_t mask_misc_orig; /* save mask during delayed handler */
> +	uint64_t mask;
> +	uint64_t mask_orig; /* save mask during delayed handler */
> +};
> +
>  /*
>   * Structure to store private data for each driver instance (for each port).
>   */
>  struct ngbe_adapter {
>  	struct ngbe_hw             hw;
> +	struct ngbe_interrupt      intr;
>  };
>  
>  static inline struct ngbe_adapter *
> @@ -30,6 +49,21 @@ ngbe_dev_hw(struct rte_eth_dev *dev)
>  	return hw;
>  }
>  
> +static inline struct ngbe_interrupt *
> +ngbe_dev_intr(struct rte_eth_dev *dev)
> +{
> +	struct ngbe_adapter *ad = ngbe_dev_adapter(dev);
> +	struct ngbe_interrupt *intr = &ad->intr;
> +
> +	return intr;
> +}
> +
> +int
> +ngbe_dev_link_update_share(struct rte_eth_dev *dev,
> +		int wait_to_complete);
> +
> +#define NGBE_LINK_DOWN_CHECK_TIMEOUT 4000 /* ms */
> +#define NGBE_LINK_UP_CHECK_TIMEOUT   1000 /* ms */
>  #define NGBE_VMDQ_NUM_UC_MAC         4096 /* Maximum nb. of UC MAC addr. */
>  
>  #endif /* _NGBE_ETHDEV_H_ */
>
  
Jiawen Wu July 5, 2021, 7:10 a.m. UTC | #2
On July 3, 2021 12:24 AM, Andrew Rybchenko wrote:
> On 6/17/21 1:59 PM, Jiawen Wu wrote:
> > Register to handle device interrupt.
> >
> > Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
> 
> [snip]
> 
> > diff --git a/doc/guides/nics/ngbe.rst b/doc/guides/nics/ngbe.rst index
> > 54d0665db9..0918cc2918 100644
> > --- a/doc/guides/nics/ngbe.rst
> > +++ b/doc/guides/nics/ngbe.rst
> > @@ -8,6 +8,11 @@ The NGBE PMD (librte_pmd_ngbe) provides poll mode
> > driver support  for Wangxun 1 Gigabit Ethernet NICs.
> >
> >
> > +Features
> > +--------
> > +
> > +- Link state information
> > +
> 
> Two empty lines before the section.
> 
> >  Prerequisites
> >  -------------
> >
> 
> [snip]
> 
> > diff --git a/drivers/net/ngbe/ngbe_ethdev.c
> > b/drivers/net/ngbe/ngbe_ethdev.c index deca64137d..c952023e8b 100644
> > --- a/drivers/net/ngbe/ngbe_ethdev.c
> > +++ b/drivers/net/ngbe/ngbe_ethdev.c
> > @@ -141,6 +175,23 @@ eth_ngbe_dev_init(struct rte_eth_dev *eth_dev,
> void *init_params __rte_unused)
> >  		return -ENOMEM;
> >  	}
> >
> > +	ctrl_ext = rd32(hw, NGBE_PORTCTL);
> > +	/* let hardware know driver is loaded */
> > +	ctrl_ext |= NGBE_PORTCTL_DRVLOAD;
> > +	/* Set PF Reset Done bit so PF/VF Mail Ops can work */
> > +	ctrl_ext |= NGBE_PORTCTL_RSTDONE;
> > +	wr32(hw, NGBE_PORTCTL, ctrl_ext);
> > +	ngbe_flush(hw);
> > +
> > +	rte_intr_callback_register(intr_handle,
> > +				   ngbe_dev_interrupt_handler, eth_dev);
> > +
> > +	/* enable uio/vfio intr/eventfd mapping */
> > +	rte_intr_enable(intr_handle);
> > +
> > +	/* enable support intr */
> > +	ngbe_enable_intr(eth_dev);
> > +
> 
> I don't understand why it is done unconditionally regardless of the
> corresponding bit in dev_conf.

Sorry...I don't quite understand what you mean.

> 
> >  	return 0;
> >  }
> >
  
Andrew Rybchenko July 5, 2021, 8:58 a.m. UTC | #3
On 7/5/21 10:10 AM, Jiawen Wu wrote:
> On July 3, 2021 12:24 AM, Andrew Rybchenko wrote:
>> On 6/17/21 1:59 PM, Jiawen Wu wrote:
>>> Register to handle device interrupt.
>>>
>>> Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
>>
>> [snip]
>>
>>> diff --git a/doc/guides/nics/ngbe.rst b/doc/guides/nics/ngbe.rst index
>>> 54d0665db9..0918cc2918 100644
>>> --- a/doc/guides/nics/ngbe.rst
>>> +++ b/doc/guides/nics/ngbe.rst
>>> @@ -8,6 +8,11 @@ The NGBE PMD (librte_pmd_ngbe) provides poll mode
>>> driver support  for Wangxun 1 Gigabit Ethernet NICs.
>>>
>>>
>>> +Features
>>> +--------
>>> +
>>> +- Link state information
>>> +
>>
>> Two empty lines before the section.
>>
>>>  Prerequisites
>>>  -------------
>>>
>>
>> [snip]
>>
>>> diff --git a/drivers/net/ngbe/ngbe_ethdev.c
>>> b/drivers/net/ngbe/ngbe_ethdev.c index deca64137d..c952023e8b 100644
>>> --- a/drivers/net/ngbe/ngbe_ethdev.c
>>> +++ b/drivers/net/ngbe/ngbe_ethdev.c
>>> @@ -141,6 +175,23 @@ eth_ngbe_dev_init(struct rte_eth_dev *eth_dev,
>> void *init_params __rte_unused)
>>>  		return -ENOMEM;
>>>  	}
>>>
>>> +	ctrl_ext = rd32(hw, NGBE_PORTCTL);
>>> +	/* let hardware know driver is loaded */
>>> +	ctrl_ext |= NGBE_PORTCTL_DRVLOAD;
>>> +	/* Set PF Reset Done bit so PF/VF Mail Ops can work */
>>> +	ctrl_ext |= NGBE_PORTCTL_RSTDONE;
>>> +	wr32(hw, NGBE_PORTCTL, ctrl_ext);
>>> +	ngbe_flush(hw);
>>> +
>>> +	rte_intr_callback_register(intr_handle,
>>> +				   ngbe_dev_interrupt_handler, eth_dev);
>>> +
>>> +	/* enable uio/vfio intr/eventfd mapping */
>>> +	rte_intr_enable(intr_handle);
>>> +
>>> +	/* enable support intr */
>>> +	ngbe_enable_intr(eth_dev);
>>> +
>>
>> I don't understand why it is done unconditionally regardless of the
>> corresponding bit in dev_conf.
> 
> Sorry...I don't quite understand what you mean.
May be it is OK and it is specific to your HW, but typically
interrupts are only enabled and used if requested via either
dev_conf->intr_conf.lsc for link status change OR
dev_conf->intr_conf.rxq for Rx queues.
  

Patch

diff --git a/doc/guides/nics/features/ngbe.ini b/doc/guides/nics/features/ngbe.ini
index 977286ac04..291a542a42 100644
--- a/doc/guides/nics/features/ngbe.ini
+++ b/doc/guides/nics/features/ngbe.ini
@@ -4,6 +4,9 @@ 
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+Speed capabilities   = Y
+Link status          = Y
+Link status event    = Y
 Multiprocess aware   = Y
 Linux                = Y
 ARMv8                = Y
diff --git a/doc/guides/nics/ngbe.rst b/doc/guides/nics/ngbe.rst
index 54d0665db9..0918cc2918 100644
--- a/doc/guides/nics/ngbe.rst
+++ b/doc/guides/nics/ngbe.rst
@@ -8,6 +8,11 @@  The NGBE PMD (librte_pmd_ngbe) provides poll mode driver support
 for Wangxun 1 Gigabit Ethernet NICs.
 
 
+Features
+--------
+
+- Link state information
+
 Prerequisites
 -------------
 
diff --git a/drivers/net/ngbe/base/ngbe_dummy.h b/drivers/net/ngbe/base/ngbe_dummy.h
index 8462d6d1cb..4273e5af36 100644
--- a/drivers/net/ngbe/base/ngbe_dummy.h
+++ b/drivers/net/ngbe/base/ngbe_dummy.h
@@ -64,6 +64,11 @@  static inline void ngbe_mac_release_swfw_sync_dummy(struct ngbe_hw *TUP0,
 					u32 TUP1)
 {
 }
+static inline s32 ngbe_mac_check_link_dummy(struct ngbe_hw *TUP0, u32 *TUP1,
+					bool *TUP3, bool TUP4)
+{
+	return NGBE_ERR_OPS_DUMMY;
+}
 static inline s32 ngbe_mac_set_rar_dummy(struct ngbe_hw *TUP0, u32 TUP1,
 					u8 *TUP2, u32 TUP3, u32 TUP4)
 {
@@ -135,6 +140,7 @@  static inline void ngbe_init_ops_dummy(struct ngbe_hw *hw)
 	hw->mac.get_mac_addr = ngbe_mac_get_mac_addr_dummy;
 	hw->mac.acquire_swfw_sync = ngbe_mac_acquire_swfw_sync_dummy;
 	hw->mac.release_swfw_sync = ngbe_mac_release_swfw_sync_dummy;
+	hw->mac.check_link = ngbe_mac_check_link_dummy;
 	hw->mac.set_rar = ngbe_mac_set_rar_dummy;
 	hw->mac.clear_rar = ngbe_mac_clear_rar_dummy;
 	hw->mac.set_vmdq = ngbe_mac_set_vmdq_dummy;
diff --git a/drivers/net/ngbe/base/ngbe_type.h b/drivers/net/ngbe/base/ngbe_type.h
index 517db3380d..04c1cac422 100644
--- a/drivers/net/ngbe/base/ngbe_type.h
+++ b/drivers/net/ngbe/base/ngbe_type.h
@@ -97,6 +97,8 @@  struct ngbe_mac_info {
 	s32 (*acquire_swfw_sync)(struct ngbe_hw *hw, u32 mask);
 	void (*release_swfw_sync)(struct ngbe_hw *hw, u32 mask);
 
+	s32 (*check_link)(struct ngbe_hw *hw, u32 *speed,
+			       bool *link_up, bool link_up_wait_to_complete);
 	/* RAR */
 	s32 (*set_rar)(struct ngbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
 			  u32 enable_addr);
@@ -117,6 +119,7 @@  struct ngbe_mac_info {
 	u32 num_rar_entries;
 	u32 max_tx_queues;
 	u32 max_rx_queues;
+	bool get_link_status;
 	struct ngbe_thermal_sensor_data  thermal_sensor_data;
 	bool set_lben;
 };
@@ -142,6 +145,14 @@  struct ngbe_phy_info {
 	bool reset_disable;
 };
 
+enum ngbe_isb_idx {
+	NGBE_ISB_HEADER,
+	NGBE_ISB_MISC,
+	NGBE_ISB_VEC0,
+	NGBE_ISB_VEC1,
+	NGBE_ISB_MAX
+};
+
 struct ngbe_hw {
 	void IOMEM *hw_addr;
 	void *back;
diff --git a/drivers/net/ngbe/ngbe_ethdev.c b/drivers/net/ngbe/ngbe_ethdev.c
index deca64137d..c952023e8b 100644
--- a/drivers/net/ngbe/ngbe_ethdev.c
+++ b/drivers/net/ngbe/ngbe_ethdev.c
@@ -7,12 +7,17 @@ 
 #include <rte_common.h>
 #include <ethdev_pci.h>
 
+#include <rte_alarm.h>
+
 #include "ngbe_logs.h"
 #include "base/ngbe.h"
 #include "ngbe_ethdev.h"
 
 static int ngbe_dev_close(struct rte_eth_dev *dev);
 
+static void ngbe_dev_interrupt_handler(void *param);
+static void ngbe_dev_interrupt_delayed_handler(void *param);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -32,6 +37,28 @@  static const struct rte_pci_id pci_id_ngbe_map[] = {
 	{ .vendor_id = 0, /* sentinel */ },
 };
 
+static const struct eth_dev_ops ngbe_eth_dev_ops;
+
+static inline void
+ngbe_enable_intr(struct rte_eth_dev *dev)
+{
+	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
+	struct ngbe_hw *hw = ngbe_dev_hw(dev);
+
+	wr32(hw, NGBE_IENMISC, intr->mask_misc);
+	wr32(hw, NGBE_IMC(0), intr->mask & BIT_MASK32);
+	ngbe_flush(hw);
+}
+
+static void
+ngbe_disable_intr(struct ngbe_hw *hw)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	wr32(hw, NGBE_IMS(0), NGBE_IMS_MASK);
+	ngbe_flush(hw);
+}
+
 /*
  * Ensure that all locks are released before first NVM or PHY access
  */
@@ -60,11 +87,15 @@  eth_ngbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused)
 {
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	struct ngbe_hw *hw = ngbe_dev_hw(eth_dev);
+	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 	const struct rte_memzone *mz;
+	uint32_t ctrl_ext;
 	int err;
 
 	PMD_INIT_FUNC_TRACE();
 
+	eth_dev->dev_ops = &ngbe_eth_dev_ops;
+
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
 
@@ -116,6 +147,9 @@  eth_ngbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused)
 		return -EIO;
 	}
 
+	/* disable interrupt */
+	ngbe_disable_intr(hw);
+
 	/* Allocate memory for storing MAC addresses */
 	eth_dev->data->mac_addrs = rte_zmalloc("ngbe", RTE_ETHER_ADDR_LEN *
 					       hw->mac.num_rar_entries, 0);
@@ -141,6 +175,23 @@  eth_ngbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused)
 		return -ENOMEM;
 	}
 
+	ctrl_ext = rd32(hw, NGBE_PORTCTL);
+	/* let hardware know driver is loaded */
+	ctrl_ext |= NGBE_PORTCTL_DRVLOAD;
+	/* Set PF Reset Done bit so PF/VF Mail Ops can work */
+	ctrl_ext |= NGBE_PORTCTL_RSTDONE;
+	wr32(hw, NGBE_PORTCTL, ctrl_ext);
+	ngbe_flush(hw);
+
+	rte_intr_callback_register(intr_handle,
+				   ngbe_dev_interrupt_handler, eth_dev);
+
+	/* enable uio/vfio intr/eventfd mapping */
+	rte_intr_enable(intr_handle);
+
+	/* enable support intr */
+	ngbe_enable_intr(eth_dev);
+
 	return 0;
 }
 
@@ -180,11 +231,25 @@  static int eth_ngbe_pci_remove(struct rte_pci_device *pci_dev)
 
 static struct rte_pci_driver rte_ngbe_pmd = {
 	.id_table = pci_id_ngbe_map,
-	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING |
+		     RTE_PCI_DRV_INTR_LSC,
 	.probe = eth_ngbe_pci_probe,
 	.remove = eth_ngbe_pci_remove,
 };
 
+static int
+ngbe_dev_configure(struct rte_eth_dev *dev)
+{
+	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
+
+	PMD_INIT_FUNC_TRACE();
+
+	/* set flag to update link status after init */
+	intr->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
+
+	return 0;
+}
+
 /*
  * Reset and stop device.
  */
@@ -198,6 +263,315 @@  ngbe_dev_close(struct rte_eth_dev *dev)
 	return -EINVAL;
 }
 
+static int
+ngbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	RTE_SET_USED(dev);
+
+	dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_100M |
+				ETH_LINK_SPEED_10M;
+
+	return 0;
+}
+
+/* return 0 means link status changed, -1 means not changed */
+int
+ngbe_dev_link_update_share(struct rte_eth_dev *dev,
+			    int wait_to_complete)
+{
+	struct ngbe_hw *hw = ngbe_dev_hw(dev);
+	struct rte_eth_link link;
+	u32 link_speed = NGBE_LINK_SPEED_UNKNOWN;
+	u32 lan_speed = 0;
+	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
+	bool link_up;
+	int err;
+	int wait = 1;
+
+	memset(&link, 0, sizeof(link));
+	link.link_status = ETH_LINK_DOWN;
+	link.link_speed = ETH_SPEED_NUM_NONE;
+	link.link_duplex = ETH_LINK_HALF_DUPLEX;
+	link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+			~ETH_LINK_SPEED_AUTONEG);
+
+	hw->mac.get_link_status = true;
+
+	if (intr->flags & NGBE_FLAG_NEED_LINK_CONFIG)
+		return rte_eth_linkstatus_set(dev, &link);
+
+	/* check if it needs to wait to complete, if lsc interrupt is enabled */
+	if (wait_to_complete == 0 || dev->data->dev_conf.intr_conf.lsc != 0)
+		wait = 0;
+
+	err = hw->mac.check_link(hw, &link_speed, &link_up, wait);
+
+	if (err != 0) {
+		link.link_speed = ETH_SPEED_NUM_100M;
+		link.link_duplex = ETH_LINK_FULL_DUPLEX;
+		return rte_eth_linkstatus_set(dev, &link);
+	}
+
+	if (!link_up)
+		return rte_eth_linkstatus_set(dev, &link);
+
+	intr->flags &= ~NGBE_FLAG_NEED_LINK_CONFIG;
+	link.link_status = ETH_LINK_UP;
+	link.link_duplex = ETH_LINK_FULL_DUPLEX;
+
+	switch (link_speed) {
+	default:
+	case NGBE_LINK_SPEED_UNKNOWN:
+		link.link_duplex = ETH_LINK_FULL_DUPLEX;
+		link.link_speed = ETH_SPEED_NUM_100M;
+		break;
+
+	case NGBE_LINK_SPEED_10M_FULL:
+		link.link_speed = ETH_SPEED_NUM_10M;
+		lan_speed = 0;
+		break;
+
+	case NGBE_LINK_SPEED_100M_FULL:
+		link.link_speed = ETH_SPEED_NUM_100M;
+		lan_speed = 1;
+		break;
+
+	case NGBE_LINK_SPEED_1GB_FULL:
+		link.link_speed = ETH_SPEED_NUM_1G;
+		lan_speed = 2;
+		break;
+	}
+
+	if (hw->is_pf) {
+		wr32m(hw, NGBE_LAN_SPEED, NGBE_LAN_SPEED_MASK, lan_speed);
+		if (link_speed & (NGBE_LINK_SPEED_1GB_FULL |
+			NGBE_LINK_SPEED_100M_FULL | NGBE_LINK_SPEED_10M_FULL)) {
+			wr32m(hw, NGBE_MACTXCFG, NGBE_MACTXCFG_SPEED_MASK,
+				NGBE_MACTXCFG_SPEED_1G | NGBE_MACTXCFG_TE);
+		}
+	}
+
+	return rte_eth_linkstatus_set(dev, &link);
+}
+
+static int
+ngbe_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete)
+{
+	return ngbe_dev_link_update_share(dev, wait_to_complete);
+}
+
+/*
+ * It reads ICR and sets flag for the link_update.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+static int
+ngbe_dev_interrupt_get_status(struct rte_eth_dev *dev)
+{
+	uint32_t eicr;
+	struct ngbe_hw *hw = ngbe_dev_hw(dev);
+	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
+
+	/* clear all cause mask */
+	ngbe_disable_intr(hw);
+
+	/* read-on-clear nic registers here */
+	eicr = ((u32 *)hw->isb_mem)[NGBE_ISB_MISC];
+	PMD_DRV_LOG(DEBUG, "eicr %x", eicr);
+
+	intr->flags = 0;
+
+	/* set flag for async link update */
+	if (eicr & NGBE_ICRMISC_PHY)
+		intr->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
+
+	if (eicr & NGBE_ICRMISC_VFMBX)
+		intr->flags |= NGBE_FLAG_MAILBOX;
+
+	if (eicr & NGBE_ICRMISC_LNKSEC)
+		intr->flags |= NGBE_FLAG_MACSEC;
+
+	if (eicr & NGBE_ICRMISC_GPIO)
+		intr->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
+
+	return 0;
+}
+
+/**
+ * It gets and then prints the link status.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+static void
+ngbe_dev_link_status_print(struct rte_eth_dev *dev)
+{
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+	struct rte_eth_link link;
+
+	rte_eth_linkstatus_get(dev, &link);
+
+	if (link.link_status == ETH_LINK_UP) {
+		PMD_INIT_LOG(INFO, "Port %d: Link Up - speed %u Mbps - %s",
+					(int)(dev->data->port_id),
+					(unsigned int)link.link_speed,
+			link.link_duplex == ETH_LINK_FULL_DUPLEX ?
+					"full-duplex" : "half-duplex");
+	} else {
+		PMD_INIT_LOG(INFO, " Port %d: Link Down",
+				(int)(dev->data->port_id));
+	}
+	PMD_INIT_LOG(DEBUG, "PCI Address: " PCI_PRI_FMT,
+				pci_dev->addr.domain,
+				pci_dev->addr.bus,
+				pci_dev->addr.devid,
+				pci_dev->addr.function);
+}
+
+/*
+ * It executes link_update after knowing an interrupt occurred.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+static int
+ngbe_dev_interrupt_action(struct rte_eth_dev *dev)
+{
+	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
+	int64_t timeout;
+
+	PMD_DRV_LOG(DEBUG, "intr action type %d", intr->flags);
+
+	if (intr->flags & NGBE_FLAG_NEED_LINK_UPDATE) {
+		struct rte_eth_link link;
+
+		/*get the link status before link update, for predicting later*/
+		rte_eth_linkstatus_get(dev, &link);
+
+		ngbe_dev_link_update(dev, 0);
+
+		/* likely to up */
+		if (link.link_status != ETH_LINK_UP)
+			/* handle it 1 sec later, wait it being stable */
+			timeout = NGBE_LINK_UP_CHECK_TIMEOUT;
+		/* likely to down */
+		else
+			/* handle it 4 sec later, wait it being stable */
+			timeout = NGBE_LINK_DOWN_CHECK_TIMEOUT;
+
+		ngbe_dev_link_status_print(dev);
+		if (rte_eal_alarm_set(timeout * 1000,
+				      ngbe_dev_interrupt_delayed_handler,
+				      (void *)dev) < 0) {
+			PMD_DRV_LOG(ERR, "Error setting alarm");
+		} else {
+			/* remember original mask */
+			intr->mask_misc_orig = intr->mask_misc;
+			/* only disable lsc interrupt */
+			intr->mask_misc &= ~NGBE_ICRMISC_PHY;
+
+			intr->mask_orig = intr->mask;
+			/* only disable all misc interrupts */
+			intr->mask &= ~(1ULL << NGBE_MISC_VEC_ID);
+		}
+	}
+
+	PMD_DRV_LOG(DEBUG, "enable intr immediately");
+	ngbe_enable_intr(dev);
+
+	return 0;
+}
+
+/**
+ * Interrupt handler which shall be registered for alarm callback for delayed
+ * handling specific interrupt to wait for the stable nic state. As the
+ * NIC interrupt state is not stable for ngbe after link is just down,
+ * it needs to wait 4 seconds to get the stable status.
+ *
+ * @param handle
+ *  Pointer to interrupt handle.
+ * @param param
+ *  The address of parameter (struct rte_eth_dev *) registered before.
+ *
+ * @return
+ *  void
+ */
+static void
+ngbe_dev_interrupt_delayed_handler(void *param)
+{
+	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
+	struct ngbe_hw *hw = ngbe_dev_hw(dev);
+	uint32_t eicr;
+
+	ngbe_disable_intr(hw);
+
+	eicr = ((u32 *)hw->isb_mem)[NGBE_ISB_MISC];
+
+	if (intr->flags & NGBE_FLAG_NEED_LINK_UPDATE) {
+		ngbe_dev_link_update(dev, 0);
+		intr->flags &= ~NGBE_FLAG_NEED_LINK_UPDATE;
+		ngbe_dev_link_status_print(dev);
+		rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
+					      NULL);
+	}
+
+	if (intr->flags & NGBE_FLAG_MACSEC) {
+		rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_MACSEC,
+					      NULL);
+		intr->flags &= ~NGBE_FLAG_MACSEC;
+	}
+
+	/* restore original mask */
+	intr->mask_misc = intr->mask_misc_orig;
+	intr->mask_misc_orig = 0;
+	intr->mask = intr->mask_orig;
+	intr->mask_orig = 0;
+
+	PMD_DRV_LOG(DEBUG, "enable intr in delayed handler S[%08x]", eicr);
+	ngbe_enable_intr(dev);
+}
+
+/**
+ * Interrupt handler triggered by NIC  for handling
+ * specific interrupt.
+ *
+ * @param handle
+ *  Pointer to interrupt handle.
+ * @param param
+ *  The address of parameter (struct rte_eth_dev *) registered before.
+ *
+ * @return
+ *  void
+ */
+static void
+ngbe_dev_interrupt_handler(void *param)
+{
+	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+
+	ngbe_dev_interrupt_get_status(dev);
+	ngbe_dev_interrupt_action(dev);
+}
+
+static const struct eth_dev_ops ngbe_eth_dev_ops = {
+	.dev_configure              = ngbe_dev_configure,
+	.dev_infos_get              = ngbe_dev_info_get,
+	.link_update                = ngbe_dev_link_update,
+};
+
 RTE_PMD_REGISTER_PCI(net_ngbe, rte_ngbe_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_ngbe, pci_id_ngbe_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_ngbe, "* igb_uio | uio_pci_generic | vfio-pci");
diff --git a/drivers/net/ngbe/ngbe_ethdev.h b/drivers/net/ngbe/ngbe_ethdev.h
index 87cc1cff6b..b67508a3de 100644
--- a/drivers/net/ngbe/ngbe_ethdev.h
+++ b/drivers/net/ngbe/ngbe_ethdev.h
@@ -6,11 +6,30 @@ 
 #ifndef _NGBE_ETHDEV_H_
 #define _NGBE_ETHDEV_H_
 
+/* need update link, bit flag */
+#define NGBE_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
+#define NGBE_FLAG_MAILBOX          (uint32_t)(1 << 1)
+#define NGBE_FLAG_PHY_INTERRUPT    (uint32_t)(1 << 2)
+#define NGBE_FLAG_MACSEC           (uint32_t)(1 << 3)
+#define NGBE_FLAG_NEED_LINK_CONFIG (uint32_t)(1 << 4)
+
+#define NGBE_MISC_VEC_ID               RTE_INTR_VEC_ZERO_OFFSET
+
+/* structure for interrupt relative data */
+struct ngbe_interrupt {
+	uint32_t flags;
+	uint32_t mask_misc;
+	uint32_t mask_misc_orig; /* save mask during delayed handler */
+	uint64_t mask;
+	uint64_t mask_orig; /* save mask during delayed handler */
+};
+
 /*
  * Structure to store private data for each driver instance (for each port).
  */
 struct ngbe_adapter {
 	struct ngbe_hw             hw;
+	struct ngbe_interrupt      intr;
 };
 
 static inline struct ngbe_adapter *
@@ -30,6 +49,21 @@  ngbe_dev_hw(struct rte_eth_dev *dev)
 	return hw;
 }
 
+static inline struct ngbe_interrupt *
+ngbe_dev_intr(struct rte_eth_dev *dev)
+{
+	struct ngbe_adapter *ad = ngbe_dev_adapter(dev);
+	struct ngbe_interrupt *intr = &ad->intr;
+
+	return intr;
+}
+
+int
+ngbe_dev_link_update_share(struct rte_eth_dev *dev,
+		int wait_to_complete);
+
+#define NGBE_LINK_DOWN_CHECK_TIMEOUT 4000 /* ms */
+#define NGBE_LINK_UP_CHECK_TIMEOUT   1000 /* ms */
 #define NGBE_VMDQ_NUM_UC_MAC         4096 /* Maximum nb. of UC MAC addr. */
 
 #endif /* _NGBE_ETHDEV_H_ */