[v2,04/15] bus/pci: find PCI capability

Message ID 20230821113549.3191921-5-david.marchand@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Cleanup PCI(e) drivers |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

David Marchand Aug. 21, 2023, 11:35 a.m. UTC
  Introduce two helpers so that drivers stop reinventing the wheel when it
comes to finding capabilities in a device PCI configuration space.
Use it in existing drivers.

Note:
- base/ drivers code is left untouched, only some wrappers in cxgbe
  are touched,
- bnx2x maintained a per device cache of capabilities, this code has been
  reworked to only cache the capabilities used in this driver,

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
Changes since v1:
- updated commitlog,
- separated VFIO changes for using standard PCI helper in a separate
  patch,
- marked new experimental symbols with current version,
- reordered defines in rte_pci.h,

---
 drivers/bus/pci/linux/pci_vfio.c   |  74 ++++--------------
 drivers/bus/pci/pci_common.c       |  45 +++++++++++
 drivers/bus/pci/rte_bus_pci.h      |  31 ++++++++
 drivers/bus/pci/version.map        |   4 +
 drivers/crypto/virtio/virtio_pci.c |  57 +++++---------
 drivers/event/dlb2/pf/dlb2_main.c  |  42 +---------
 drivers/net/bnx2x/bnx2x.c          |  41 +++++-----
 drivers/net/cxgbe/base/adapter.h   |  28 +------
 drivers/net/gve/gve_ethdev.c       |  46 ++---------
 drivers/net/gve/gve_ethdev.h       |   4 -
 drivers/net/hns3/hns3_ethdev_vf.c  |  79 +++----------------
 drivers/net/virtio/virtio_pci.c    | 121 +++++------------------------
 lib/pci/rte_pci.h                  |  11 +++
 13 files changed, 186 insertions(+), 397 deletions(-)
  

Comments

Chenbo Xia Sept. 7, 2023, 12:43 p.m. UTC | #1
Hi David,

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Monday, August 21, 2023 7:36 PM
> To: dev@dpdk.org
> Cc: thomas@monjalon.net; ferruh.yigit@amd.com; Xia, Chenbo
> <chenbo.xia@intel.com>; nipun.gupta@amd.com; Richardson, Bruce
> <bruce.richardson@intel.com>; Burakov, Anatoly <anatoly.burakov@intel.com>;
> Jay Zhou <jianjay.zhou@huawei.com>; McDaniel, Timothy
> <timothy.mcdaniel@intel.com>; Julien Aube <julien_dpdk@jaube.fr>; Rahul
> Lakkireddy <rahul.lakkireddy@chelsio.com>; Guo, Junfeng
> <junfeng.guo@intel.com>; Jeroen de Borst <jeroendb@google.com>; Rushil
> Gupta <rushilg@google.com>; Joshua Washington <joshwash@google.com>;
> Dongdong Liu <liudongdong3@huawei.com>; Yisen Zhuang
> <yisen.zhuang@huawei.com>; Maxime Coquelin <maxime.coquelin@redhat.com>;
> Gaetan Rivet <grive@u256.net>
> Subject: [PATCH v2 04/15] bus/pci: find PCI capability
> 
> Introduce two helpers so that drivers stop reinventing the wheel when it
> comes to finding capabilities in a device PCI configuration space.
> Use it in existing drivers.
> 
> Note:
> - base/ drivers code is left untouched, only some wrappers in cxgbe
>   are touched,
> - bnx2x maintained a per device cache of capabilities, this code has been
>   reworked to only cache the capabilities used in this driver,
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
> Changes since v1:
> - updated commitlog,
> - separated VFIO changes for using standard PCI helper in a separate
>   patch,
> - marked new experimental symbols with current version,
> - reordered defines in rte_pci.h,
> 
> ---
>  drivers/bus/pci/linux/pci_vfio.c   |  74 ++++--------------
>  drivers/bus/pci/pci_common.c       |  45 +++++++++++
>  drivers/bus/pci/rte_bus_pci.h      |  31 ++++++++
>  drivers/bus/pci/version.map        |   4 +
>  drivers/crypto/virtio/virtio_pci.c |  57 +++++---------
>  drivers/event/dlb2/pf/dlb2_main.c  |  42 +---------
>  drivers/net/bnx2x/bnx2x.c          |  41 +++++-----
>  drivers/net/cxgbe/base/adapter.h   |  28 +------
>  drivers/net/gve/gve_ethdev.c       |  46 ++---------
>  drivers/net/gve/gve_ethdev.h       |   4 -
>  drivers/net/hns3/hns3_ethdev_vf.c  |  79 +++----------------
>  drivers/net/virtio/virtio_pci.c    | 121 +++++------------------------
>  lib/pci/rte_pci.h                  |  11 +++
>  13 files changed, 186 insertions(+), 397 deletions(-)
> 
> diff --git a/drivers/bus/pci/linux/pci_vfio.c
> b/drivers/bus/pci/linux/pci_vfio.c
> index 958f8b3b52..614ed5d696 100644
> --- a/drivers/bus/pci/linux/pci_vfio.c
> +++ b/drivers/bus/pci/linux/pci_vfio.c
> @@ -110,74 +110,34 @@ static int
>  pci_vfio_get_msix_bar(const struct rte_pci_device *dev,
>  	struct pci_msix_table *msix_table)
>  {
> -	int ret;
> -	uint32_t reg;
> -	uint16_t flags;
> -	uint8_t cap_id, cap_offset;
> +	off_t cap_offset;
> 
> -	/* read PCI capability pointer from config space */
> -	ret = rte_pci_read_config(dev, &reg, sizeof(reg),
> PCI_CAPABILITY_LIST);
> -	if (ret != sizeof(reg)) {
> -		RTE_LOG(ERR, EAL,
> -			"Cannot read capability pointer from PCI config
> space!\n");
> +	cap_offset = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);

I notice in some cases we use rte_pci_has_capability_list() to check first,
then looking for specific cap, in other cases we don't use
rte_pci_has_capability_list(). Since we define this API, should we always do
the check?


> +	if (cap_offset < 0)
>  		return -1;
> -	}
> 
> -	/* we need first byte */
> -	cap_offset = reg & 0xFF;
> +	if (cap_offset != 0) {
> +		uint16_t flags;
> +		uint32_t reg;
> 
> -	while (cap_offset) {
> -
> -		/* read PCI capability ID */
> -		ret = rte_pci_read_config(dev, &reg, sizeof(reg), cap_offset);
> -		if (ret != sizeof(reg)) {
> +		/* table offset resides in the next 4 bytes */
> +		if (rte_pci_read_config(dev, &reg, sizeof(reg), cap_offset + 4)
> < 0) {
>  			RTE_LOG(ERR, EAL,
> -				"Cannot read capability ID from PCI config
> space!\n");
> +				"Cannot read MSIX table from PCI config space!\n");
>  			return -1;
>  		}
> 
> -		/* we need first byte */
> -		cap_id = reg & 0xFF;
> -
> -		/* if we haven't reached MSI-X, check next capability */
> -		if (cap_id != PCI_CAP_ID_MSIX) {
> -			ret = rte_pci_read_config(dev, &reg, sizeof(reg),
> cap_offset);
> -			if (ret != sizeof(reg)) {
> -				RTE_LOG(ERR, EAL,
> -					"Cannot read capability pointer from PCI
> config space!\n");
> -				return -1;
> -			}
> -
> -			/* we need second byte */
> -			cap_offset = (reg & 0xFF00) >> 8;
> -
> -			continue;
> +		if (rte_pci_read_config(dev, &flags, sizeof(flags), cap_offset
> + 2) < 0) {
> +			RTE_LOG(ERR, EAL,
> +				"Cannot read MSIX flags from PCI config space!\n");
> +			return -1;
>  		}
> -		/* else, read table offset */
> -		else {
> -			/* table offset resides in the next 4 bytes */
> -			ret = rte_pci_read_config(dev, &reg, sizeof(reg),
> cap_offset + 4);
> -			if (ret != sizeof(reg)) {
> -				RTE_LOG(ERR, EAL,
> -					"Cannot read table offset from PCI config
> space!\n");
> -				return -1;
> -			}
> -
> -			ret = rte_pci_read_config(dev, &flags, sizeof(flags),
> cap_offset + 2);
> -			if (ret != sizeof(flags)) {
> -				RTE_LOG(ERR, EAL,
> -					"Cannot read table flags from PCI config
> space!\n");
> -				return -1;
> -			}
> -
> -			msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
> -			msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> -			msix_table->size =
> -				16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
> 
> -			return 0;
> -		}
> +		msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
> +		msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> +		msix_table->size = 16 * (1 + (flags &
> RTE_PCI_MSIX_FLAGS_QSIZE));
>  	}
> +
>  	return 0;
>  }
> 
> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
> index 382b0b8946..52272617eb 100644
> --- a/drivers/bus/pci/pci_common.c
> +++ b/drivers/bus/pci/pci_common.c
> @@ -813,6 +813,51 @@ rte_pci_get_iommu_class(void)
>  	return iova_mode;
>  }
> 
> +bool
> +rte_pci_has_capability_list(const struct rte_pci_device *dev)
> +{
> +	uint16_t status;
> +
> +	if (rte_pci_read_config(dev, &status, sizeof(status),
> RTE_PCI_STATUS) != sizeof(status))
> +		return false;
> +
> +	return (status & RTE_PCI_STATUS_CAP_LIST) != 0;
> +}
> +
> +off_t
> +rte_pci_find_capability(const struct rte_pci_device *dev, uint8_t cap)
> +{
> +	off_t offset;
> +	uint8_t pos;
> +	int ttl;
> +
> +	offset = RTE_PCI_CAPABILITY_LIST;
> +	ttl = (RTE_PCI_CFG_SPACE_SIZE - RTE_PCI_STD_HEADER_SIZEOF) /
> RTE_PCI_CAP_SIZEOF;
> +
> +	if (rte_pci_read_config(dev, &pos, sizeof(pos), offset) < 0)
> +		return -1;
> +
> +	while (pos && ttl--) {
> +		uint16_t ent;
> +		uint8_t id;
> +
> +		offset = pos;
> +		if (rte_pci_read_config(dev, &ent, sizeof(ent), offset) < 0)
> +			return -1;
> +
> +		id = ent & 0xff;
> +		if (id == 0xff)
> +			break;
> +
> +		if (id == cap)
> +			return offset;
> +
> +		pos = (ent >> 8);
> +	}
> +
> +	return 0;
> +}
> +
>  off_t
>  rte_pci_find_ext_capability(const struct rte_pci_device *dev, uint32_t
> cap)
>  {
> diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
> index 75d0030eae..1ed33dbf3d 100644
> --- a/drivers/bus/pci/rte_bus_pci.h
> +++ b/drivers/bus/pci/rte_bus_pci.h
> @@ -68,6 +68,37 @@ void rte_pci_unmap_device(struct rte_pci_device *dev);
>   */
>  void rte_pci_dump(FILE *f);
> 
> +/**
> + * Check whether this device has a PCI capability list.
> + *
> + *  @param dev
> + *    A pointer to rte_pci_device structure.
> + *
> + *  @return
> + *    true/false
> + */
> +__rte_experimental
> +bool rte_pci_has_capability_list(const struct rte_pci_device *dev);
> +
> +/**
> + * Find device's PCI capability.
> + *
> + *  @param dev
> + *    A pointer to rte_pci_device structure.
> + *
> + *  @param cap
> + *    Capability to be found, which can be any from
> + *    RTE_PCI_CAP_ID_*, defined in librte_pci.
> + *
> + *  @return
> + *  > 0: The offset of the next matching capability structure
> + *       within the device's PCI configuration space.
> + *  < 0: An error in PCI config space read.
> + *  = 0: Device does not support it.
> + */
> +__rte_experimental
> +off_t rte_pci_find_capability(const struct rte_pci_device *dev, uint8_t
> cap);
> +
>  /**
>   * Find device's extended PCI capability.
>   *
> diff --git a/drivers/bus/pci/version.map b/drivers/bus/pci/version.map
> index a0000f7938..2674f30235 100644
> --- a/drivers/bus/pci/version.map
> +++ b/drivers/bus/pci/version.map
> @@ -25,6 +25,10 @@ EXPERIMENTAL {
>  	# added in 23.07
>  	rte_pci_mmio_read;
>  	rte_pci_mmio_write;
> +
> +	# added in 23.11
> +	rte_pci_find_capability;
> +	rte_pci_has_capability_list;
>  };
> 
>  INTERNAL {
> diff --git a/drivers/crypto/virtio/virtio_pci.c
> b/drivers/crypto/virtio/virtio_pci.c
> index 95a43c8801..abc52b4701 100644
> --- a/drivers/crypto/virtio/virtio_pci.c
> +++ b/drivers/crypto/virtio/virtio_pci.c
> @@ -19,7 +19,6 @@
>   * we can't simply include that header here, as there is no such
>   * file for non-Linux platform.
>   */
> -#define PCI_CAPABILITY_LIST	0x34
>  #define PCI_CAP_ID_VNDR		0x09
>  #define PCI_CAP_ID_MSIX		0x11
> 
> @@ -343,8 +342,9 @@ get_cfg_addr(struct rte_pci_device *dev, struct
> virtio_pci_cap *cap)
>  static int
>  virtio_read_caps(struct rte_pci_device *dev, struct virtio_crypto_hw *hw)
>  {
> -	uint8_t pos;
>  	struct virtio_pci_cap cap;
> +	uint16_t flags;
> +	off_t pos;
>  	int ret;
> 
>  	if (rte_pci_map_device(dev)) {
> @@ -352,44 +352,26 @@ virtio_read_caps(struct rte_pci_device *dev, struct
> virtio_crypto_hw *hw)
>  		return -1;
>  	}
> 
> -	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
> -	if (ret < 0) {
> -		VIRTIO_CRYPTO_INIT_LOG_DBG("failed to read pci capability
> list");
> -		return -1;
> +	/*
> +	 * Transitional devices would also have this capability,
> +	 * that's why we also check if msix is enabled.
> +	 */
> +	pos = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);
> +	if (pos > 0 && rte_pci_read_config(dev, &flags, sizeof(flags),
> +			pos + 2) == sizeof(flags)) {
> +		if (flags & PCI_MSIX_ENABLE)
> +			hw->use_msix = VIRTIO_MSIX_ENABLED;
> +		else
> +			hw->use_msix = VIRTIO_MSIX_DISABLED;
> +	} else {
> +		hw->use_msix = VIRTIO_MSIX_NONE;
>  	}
> 
> -	while (pos) {
> -		ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
> -		if (ret < 0) {
> -			VIRTIO_CRYPTO_INIT_LOG_ERR(
> -				"failed to read pci cap at pos: %x", pos);
> -			break;
> -		}
> -
> -		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
> -			/* Transitional devices would also have this capability,
> -			 * that's why we also check if msix is enabled.
> -			 * 1st byte is cap ID; 2nd byte is the position of next
> -			 * cap; next two bytes are the flags.
> -			 */
> -			uint16_t flags = ((uint16_t *)&cap)[1];
> -
> -			if (flags & PCI_MSIX_ENABLE)
> -				hw->use_msix = VIRTIO_MSIX_ENABLED;
> -			else
> -				hw->use_msix = VIRTIO_MSIX_DISABLED;
> -		}
> -
> -		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
> -			VIRTIO_CRYPTO_INIT_LOG_DBG(
> -				"[%2x] skipping non VNDR cap id: %02x",
> -				pos, cap.cap_vndr);
> -			goto next;
> -		}
> -
> +	pos = rte_pci_find_capability(dev, PCI_CAP_ID_VNDR);

The logic of vendor cap init seems incorrect. Virtio devices have multiple
Vendor cap (different cfg type). But now the logic seems to only init the first
one.

> +	if (pos > 0 && rte_pci_read_config(dev, &cap, sizeof(cap), pos) ==
> sizeof(cap)) {
>  		VIRTIO_CRYPTO_INIT_LOG_DBG(
>  			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
> -			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
> +			(unsigned int)pos, cap.cfg_type, cap.bar, cap.offset,
> cap.length);
> 
>  		switch (cap.cfg_type) {
>  		case VIRTIO_PCI_CAP_COMMON_CFG:
> @@ -411,9 +393,6 @@ virtio_read_caps(struct rte_pci_device *dev, struct
> virtio_crypto_hw *hw)
>  			hw->isr = get_cfg_addr(dev, &cap);
>  			break;
>  		}
> -
> -next:
> -		pos = cap.cap_next;
>  	}

...

> diff --git a/drivers/net/virtio/virtio_pci.c
> b/drivers/net/virtio/virtio_pci.c
> index 29eb739b04..9fd9db3e03 100644
> --- a/drivers/net/virtio/virtio_pci.c
> +++ b/drivers/net/virtio/virtio_pci.c
> @@ -20,7 +20,6 @@
>   * we can't simply include that header here, as there is no such
>   * file for non-Linux platform.
>   */
> -#define PCI_CAPABILITY_LIST	0x34
>  #define PCI_CAP_ID_VNDR		0x09
>  #define PCI_CAP_ID_MSIX		0x11
> 
> @@ -38,46 +37,16 @@ struct virtio_pci_internal
> virtio_pci_internal[RTE_MAX_ETHPORTS];
>  static enum virtio_msix_status
>  vtpci_msix_detect(struct rte_pci_device *dev)
>  {
> -	uint8_t pos;
> -	int ret;
> +	uint16_t flags;
> +	off_t pos;
> 
> -	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
> -	if (ret != 1) {
> -		PMD_INIT_LOG(DEBUG,
> -			     "failed to read pci capability list, ret %d", ret);
> -		return VIRTIO_MSIX_NONE;
> -	}
> -
> -	while (pos) {
> -		uint8_t cap[2];
> -
> -		ret = rte_pci_read_config(dev, cap, sizeof(cap), pos);
> -		if (ret != sizeof(cap)) {
> -			PMD_INIT_LOG(DEBUG,
> -				     "failed to read pci cap at pos: %x ret %d",
> -				     pos, ret);
> -			break;
> -		}
> -
> -		if (cap[0] == PCI_CAP_ID_MSIX) {
> -			uint16_t flags;
> -
> -			ret = rte_pci_read_config(dev, &flags, sizeof(flags),
> -					pos + sizeof(cap));
> -			if (ret != sizeof(flags)) {
> -				PMD_INIT_LOG(DEBUG,
> -					     "failed to read pci cap at pos:"
> -					     " %x ret %d", pos + 2, ret);
> -				break;
> -			}
> -
> -			if (flags & PCI_MSIX_ENABLE)
> -				return VIRTIO_MSIX_ENABLED;
> -			else
> -				return VIRTIO_MSIX_DISABLED;
> -		}
> -
> -		pos = cap[1];
> +	pos = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);
> +	if (pos > 0 && rte_pci_read_config(dev, &flags, sizeof(flags),
> +			pos + 2) == sizeof(flags)) {
> +		if (flags & PCI_MSIX_ENABLE)
> +			return VIRTIO_MSIX_ENABLED;
> +		else
> +			return VIRTIO_MSIX_DISABLED;
>  	}
> 
>  	return VIRTIO_MSIX_NONE;
> @@ -623,8 +592,8 @@ static int
>  virtio_read_caps(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
>  {
>  	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
> -	uint8_t pos;
>  	struct virtio_pci_cap cap;
> +	off_t pos;
>  	int ret;
> 
>  	if (rte_pci_map_device(pci_dev)) {
> @@ -632,72 +601,25 @@ virtio_read_caps(struct rte_pci_device *pci_dev,
> struct virtio_hw *hw)
>  		return -1;
>  	}
> 
> -	ret = rte_pci_read_config(pci_dev, &pos, 1, PCI_CAPABILITY_LIST);
> -	if (ret != 1) {
> -		PMD_INIT_LOG(DEBUG,
> -			     "failed to read pci capability list, ret %d", ret);
> -		return -1;
> -	}
> -
> -	while (pos) {
> -		ret = rte_pci_read_config(pci_dev, &cap, 2, pos);
> -		if (ret != 2) {
> -			PMD_INIT_LOG(DEBUG,
> -				     "failed to read pci cap at pos: %x ret %d",
> -				     pos, ret);
> -			break;
> -		}
> -
> -		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
> -			/* Transitional devices would also have this capability,
> -			 * that's why we also check if msix is enabled.
> -			 * 1st byte is cap ID; 2nd byte is the position of next
> -			 * cap; next two bytes are the flags.
> -			 */
> -			uint16_t flags;
> -
> -			ret = rte_pci_read_config(pci_dev, &flags, sizeof(flags),
> -					pos + 2);
> -			if (ret != sizeof(flags)) {
> -				PMD_INIT_LOG(DEBUG,
> -					     "failed to read pci cap at pos:"
> -					     " %x ret %d", pos + 2, ret);
> -				break;
> -			}
> -
> -			if (flags & PCI_MSIX_ENABLE)
> -				dev->msix_status = VIRTIO_MSIX_ENABLED;
> -			else
> -				dev->msix_status = VIRTIO_MSIX_DISABLED;
> -		}
> -
> -		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
> -			PMD_INIT_LOG(DEBUG,
> -				"[%2x] skipping non VNDR cap id: %02x",
> -				pos, cap.cap_vndr);
> -			goto next;
> -		}
> -
> -		ret = rte_pci_read_config(pci_dev, &cap, sizeof(cap), pos);
> -		if (ret != sizeof(cap)) {
> -			PMD_INIT_LOG(DEBUG,
> -				     "failed to read pci cap at pos: %x ret %d",
> -				     pos, ret);
> -			break;
> -		}
> +	/*
> +	 * Transitional devices would also have this capability,
> +	 * that's why we also check if msix is enabled.
> +	 */
> +	dev->msix_status = vtpci_msix_detect(pci_dev);
> 
> +	pos = rte_pci_find_capability(pci_dev, PCI_CAP_ID_VNDR);
> +	if (pos > 0 && rte_pci_read_config(pci_dev, &cap, sizeof(cap), pos)
> == sizeof(cap)) {
>  		PMD_INIT_LOG(DEBUG,
>  			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
> -			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
> +			(unsigned int)pos, cap.cfg_type, cap.bar, cap.offset,
> cap.length);
> 

Same comment about the vendor cap.

Thanks,
Chenbo

>  		switch (cap.cfg_type) {
>  		case VIRTIO_PCI_CAP_COMMON_CFG:
>  			dev->common_cfg = get_cfg_addr(pci_dev, &cap);
>  			break;
>  		case VIRTIO_PCI_CAP_NOTIFY_CFG:
> -			ret = rte_pci_read_config(pci_dev,
> -					&dev->notify_off_multiplier,
> -					4, pos + sizeof(cap));
> +			ret = rte_pci_read_config(pci_dev, &dev-
> >notify_off_multiplier,
> +				4, pos + sizeof(cap));
>  			if (ret != 4)
>  				PMD_INIT_LOG(DEBUG,
>  					"failed to read notify_off_multiplier,
> ret %d",
> @@ -712,9 +634,6 @@ virtio_read_caps(struct rte_pci_device *pci_dev,
> struct virtio_hw *hw)
>  			dev->isr = get_cfg_addr(pci_dev, &cap);
>  			break;
>  		}
> -
> -next:
> -		pos = cap.cap_next;
>  	}
> 
>  	if (dev->common_cfg == NULL || dev->notify_base == NULL ||
> diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
> index aab761b918..49fd5b1d02 100644
> --- a/lib/pci/rte_pci.h
> +++ b/lib/pci/rte_pci.h
> @@ -28,13 +28,24 @@ extern "C" {
>  #define RTE_PCI_CFG_SPACE_SIZE		256
>  #define RTE_PCI_CFG_SPACE_EXP_SIZE	4096
> 
> +#define RTE_PCI_STD_HEADER_SIZEOF	64
> +
> +/* Standard register offsets in the PCI configuration space */
>  #define RTE_PCI_VENDOR_ID	0x00	/* 16 bits */
>  #define RTE_PCI_DEVICE_ID	0x02	/* 16 bits */
>  #define RTE_PCI_COMMAND		0x04	/* 16 bits */
> +#define RTE_PCI_STATUS		0x06	/* 16 bits */
> +#define RTE_PCI_CAPABILITY_LIST	0x34	/* 32 bits */
> 
>  /* PCI Command Register */
>  #define RTE_PCI_COMMAND_MASTER	0x4	/* Bus Master Enable */
> 
> +/* PCI Status Register (RTE_PCI_STATUS) */
> +#define RTE_PCI_STATUS_CAP_LIST		0x10	/* Support Capability List
> */
> +
> +/* Capability registers (RTE_PCI_CAPABILITY_LIST) */
> +#define RTE_PCI_CAP_SIZEOF		4
> +
>  /* PCI Express capability registers */
>  #define RTE_PCI_EXP_DEVCTL	8	/* Device Control */
> 
> --
> 2.41.0
  
David Marchand Sept. 14, 2023, 12:29 p.m. UTC | #2
Hello Chenbo,

On Thu, Sep 7, 2023 at 2:43 PM Xia, Chenbo <chenbo.xia@intel.com> wrote:
> > Introduce two helpers so that drivers stop reinventing the wheel when it
> > comes to finding capabilities in a device PCI configuration space.
> > Use it in existing drivers.
> >
> > Note:
> > - base/ drivers code is left untouched, only some wrappers in cxgbe
> >   are touched,
> > - bnx2x maintained a per device cache of capabilities, this code has been
> >   reworked to only cache the capabilities used in this driver,
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> > ---
> > Changes since v1:
> > - updated commitlog,
> > - separated VFIO changes for using standard PCI helper in a separate
> >   patch,
> > - marked new experimental symbols with current version,
> > - reordered defines in rte_pci.h,
> >
> > ---
> >  drivers/bus/pci/linux/pci_vfio.c   |  74 ++++--------------
> >  drivers/bus/pci/pci_common.c       |  45 +++++++++++
> >  drivers/bus/pci/rte_bus_pci.h      |  31 ++++++++
> >  drivers/bus/pci/version.map        |   4 +
> >  drivers/crypto/virtio/virtio_pci.c |  57 +++++---------
> >  drivers/event/dlb2/pf/dlb2_main.c  |  42 +---------
> >  drivers/net/bnx2x/bnx2x.c          |  41 +++++-----
> >  drivers/net/cxgbe/base/adapter.h   |  28 +------
> >  drivers/net/gve/gve_ethdev.c       |  46 ++---------
> >  drivers/net/gve/gve_ethdev.h       |   4 -
> >  drivers/net/hns3/hns3_ethdev_vf.c  |  79 +++----------------
> >  drivers/net/virtio/virtio_pci.c    | 121 +++++------------------------
> >  lib/pci/rte_pci.h                  |  11 +++
> >  13 files changed, 186 insertions(+), 397 deletions(-)
> >
> > diff --git a/drivers/bus/pci/linux/pci_vfio.c
> > b/drivers/bus/pci/linux/pci_vfio.c
> > index 958f8b3b52..614ed5d696 100644
> > --- a/drivers/bus/pci/linux/pci_vfio.c
> > +++ b/drivers/bus/pci/linux/pci_vfio.c
> > @@ -110,74 +110,34 @@ static int
> >  pci_vfio_get_msix_bar(const struct rte_pci_device *dev,
> >       struct pci_msix_table *msix_table)
> >  {
> > -     int ret;
> > -     uint32_t reg;
> > -     uint16_t flags;
> > -     uint8_t cap_id, cap_offset;
> > +     off_t cap_offset;
> >
> > -     /* read PCI capability pointer from config space */
> > -     ret = rte_pci_read_config(dev, &reg, sizeof(reg),
> > PCI_CAPABILITY_LIST);
> > -     if (ret != sizeof(reg)) {
> > -             RTE_LOG(ERR, EAL,
> > -                     "Cannot read capability pointer from PCI config
> > space!\n");
> > +     cap_offset = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);
>
> I notice in some cases we use rte_pci_has_capability_list() to check first,
> then looking for specific cap, in other cases we don't use
> rte_pci_has_capability_list(). Since we define this API, should we always do
> the check?

Hum, I am not sure of what you suggest here.

I tried to do a 1:1 conversion of what existed.
Do you mean there are places where I missed converting some
rte_pci_read_config(PCI_CAPABILITY_LIST) to
rte_pci_has_capability_list()?
If so, could you point at them?

Or are you suggesting to have this check as part of rte_pci_find_capability() ?


I'll respin a v3 soon (to fix the nasty issue you pointed out below).
A v4 may be needed depending on your replies to above questions.


>
>
> > +     if (cap_offset < 0)
> >               return -1;
> > -     }
> >
> > -     /* we need first byte */
> > -     cap_offset = reg & 0xFF;
> > +     if (cap_offset != 0) {
> > +             uint16_t flags;
> > +             uint32_t reg;
> >
> > -     while (cap_offset) {
> > -
> > -             /* read PCI capability ID */
> > -             ret = rte_pci_read_config(dev, &reg, sizeof(reg), cap_offset);
> > -             if (ret != sizeof(reg)) {
> > +             /* table offset resides in the next 4 bytes */
> > +             if (rte_pci_read_config(dev, &reg, sizeof(reg), cap_offset + 4)
> > < 0) {
> >                       RTE_LOG(ERR, EAL,
> > -                             "Cannot read capability ID from PCI config
> > space!\n");
> > +                             "Cannot read MSIX table from PCI config space!\n");
> >                       return -1;
> >               }
> >
> > -             /* we need first byte */
> > -             cap_id = reg & 0xFF;
> > -
> > -             /* if we haven't reached MSI-X, check next capability */
> > -             if (cap_id != PCI_CAP_ID_MSIX) {
> > -                     ret = rte_pci_read_config(dev, &reg, sizeof(reg),
> > cap_offset);
> > -                     if (ret != sizeof(reg)) {
> > -                             RTE_LOG(ERR, EAL,
> > -                                     "Cannot read capability pointer from PCI
> > config space!\n");
> > -                             return -1;
> > -                     }
> > -
> > -                     /* we need second byte */
> > -                     cap_offset = (reg & 0xFF00) >> 8;
> > -
> > -                     continue;
> > +             if (rte_pci_read_config(dev, &flags, sizeof(flags), cap_offset
> > + 2) < 0) {
> > +                     RTE_LOG(ERR, EAL,
> > +                             "Cannot read MSIX flags from PCI config space!\n");
> > +                     return -1;
> >               }
> > -             /* else, read table offset */
> > -             else {
> > -                     /* table offset resides in the next 4 bytes */
> > -                     ret = rte_pci_read_config(dev, &reg, sizeof(reg),
> > cap_offset + 4);
> > -                     if (ret != sizeof(reg)) {
> > -                             RTE_LOG(ERR, EAL,
> > -                                     "Cannot read table offset from PCI config
> > space!\n");
> > -                             return -1;
> > -                     }
> > -
> > -                     ret = rte_pci_read_config(dev, &flags, sizeof(flags),
> > cap_offset + 2);
> > -                     if (ret != sizeof(flags)) {
> > -                             RTE_LOG(ERR, EAL,
> > -                                     "Cannot read table flags from PCI config
> > space!\n");
> > -                             return -1;
> > -                     }
> > -
> > -                     msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
> > -                     msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> > -                     msix_table->size =
> > -                             16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
> >
> > -                     return 0;
> > -             }
> > +             msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
> > +             msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> > +             msix_table->size = 16 * (1 + (flags &
> > RTE_PCI_MSIX_FLAGS_QSIZE));
> >       }
> > +
> >       return 0;
> >  }
> >
> > diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
> > index 382b0b8946..52272617eb 100644
> > --- a/drivers/bus/pci/pci_common.c
> > +++ b/drivers/bus/pci/pci_common.c
> > @@ -813,6 +813,51 @@ rte_pci_get_iommu_class(void)
> >       return iova_mode;
> >  }
> >
> > +bool
> > +rte_pci_has_capability_list(const struct rte_pci_device *dev)
> > +{
> > +     uint16_t status;
> > +
> > +     if (rte_pci_read_config(dev, &status, sizeof(status),
> > RTE_PCI_STATUS) != sizeof(status))
> > +             return false;
> > +
> > +     return (status & RTE_PCI_STATUS_CAP_LIST) != 0;
> > +}
> > +
> > +off_t
> > +rte_pci_find_capability(const struct rte_pci_device *dev, uint8_t cap)
> > +{
> > +     off_t offset;
> > +     uint8_t pos;
> > +     int ttl;
> > +
> > +     offset = RTE_PCI_CAPABILITY_LIST;
> > +     ttl = (RTE_PCI_CFG_SPACE_SIZE - RTE_PCI_STD_HEADER_SIZEOF) /
> > RTE_PCI_CAP_SIZEOF;
> > +
> > +     if (rte_pci_read_config(dev, &pos, sizeof(pos), offset) < 0)
> > +             return -1;
> > +
> > +     while (pos && ttl--) {
> > +             uint16_t ent;
> > +             uint8_t id;
> > +
> > +             offset = pos;
> > +             if (rte_pci_read_config(dev, &ent, sizeof(ent), offset) < 0)
> > +                     return -1;
> > +
> > +             id = ent & 0xff;
> > +             if (id == 0xff)
> > +                     break;
> > +
> > +             if (id == cap)
> > +                     return offset;
> > +
> > +             pos = (ent >> 8);
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> >  off_t
> >  rte_pci_find_ext_capability(const struct rte_pci_device *dev, uint32_t
> > cap)
> >  {
> > diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
> > index 75d0030eae..1ed33dbf3d 100644
> > --- a/drivers/bus/pci/rte_bus_pci.h
> > +++ b/drivers/bus/pci/rte_bus_pci.h
> > @@ -68,6 +68,37 @@ void rte_pci_unmap_device(struct rte_pci_device *dev);
> >   */
> >  void rte_pci_dump(FILE *f);
> >
> > +/**
> > + * Check whether this device has a PCI capability list.
> > + *
> > + *  @param dev
> > + *    A pointer to rte_pci_device structure.
> > + *
> > + *  @return
> > + *    true/false
> > + */
> > +__rte_experimental
> > +bool rte_pci_has_capability_list(const struct rte_pci_device *dev);
> > +
> > +/**
> > + * Find device's PCI capability.
> > + *
> > + *  @param dev
> > + *    A pointer to rte_pci_device structure.
> > + *
> > + *  @param cap
> > + *    Capability to be found, which can be any from
> > + *    RTE_PCI_CAP_ID_*, defined in librte_pci.
> > + *
> > + *  @return
> > + *  > 0: The offset of the next matching capability structure
> > + *       within the device's PCI configuration space.
> > + *  < 0: An error in PCI config space read.
> > + *  = 0: Device does not support it.
> > + */
> > +__rte_experimental
> > +off_t rte_pci_find_capability(const struct rte_pci_device *dev, uint8_t
> > cap);
> > +
> >  /**
> >   * Find device's extended PCI capability.
> >   *
> > diff --git a/drivers/bus/pci/version.map b/drivers/bus/pci/version.map
> > index a0000f7938..2674f30235 100644
> > --- a/drivers/bus/pci/version.map
> > +++ b/drivers/bus/pci/version.map
> > @@ -25,6 +25,10 @@ EXPERIMENTAL {
> >       # added in 23.07
> >       rte_pci_mmio_read;
> >       rte_pci_mmio_write;
> > +
> > +     # added in 23.11
> > +     rte_pci_find_capability;
> > +     rte_pci_has_capability_list;
> >  };
> >
> >  INTERNAL {
> > diff --git a/drivers/crypto/virtio/virtio_pci.c
> > b/drivers/crypto/virtio/virtio_pci.c
> > index 95a43c8801..abc52b4701 100644
> > --- a/drivers/crypto/virtio/virtio_pci.c
> > +++ b/drivers/crypto/virtio/virtio_pci.c
> > @@ -19,7 +19,6 @@
> >   * we can't simply include that header here, as there is no such
> >   * file for non-Linux platform.
> >   */
> > -#define PCI_CAPABILITY_LIST  0x34
> >  #define PCI_CAP_ID_VNDR              0x09
> >  #define PCI_CAP_ID_MSIX              0x11
> >
> > @@ -343,8 +342,9 @@ get_cfg_addr(struct rte_pci_device *dev, struct
> > virtio_pci_cap *cap)
> >  static int
> >  virtio_read_caps(struct rte_pci_device *dev, struct virtio_crypto_hw *hw)
> >  {
> > -     uint8_t pos;
> >       struct virtio_pci_cap cap;
> > +     uint16_t flags;
> > +     off_t pos;
> >       int ret;
> >
> >       if (rte_pci_map_device(dev)) {
> > @@ -352,44 +352,26 @@ virtio_read_caps(struct rte_pci_device *dev, struct
> > virtio_crypto_hw *hw)
> >               return -1;
> >       }
> >
> > -     ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
> > -     if (ret < 0) {
> > -             VIRTIO_CRYPTO_INIT_LOG_DBG("failed to read pci capability
> > list");
> > -             return -1;
> > +     /*
> > +      * Transitional devices would also have this capability,
> > +      * that's why we also check if msix is enabled.
> > +      */
> > +     pos = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);
> > +     if (pos > 0 && rte_pci_read_config(dev, &flags, sizeof(flags),
> > +                     pos + 2) == sizeof(flags)) {
> > +             if (flags & PCI_MSIX_ENABLE)
> > +                     hw->use_msix = VIRTIO_MSIX_ENABLED;
> > +             else
> > +                     hw->use_msix = VIRTIO_MSIX_DISABLED;
> > +     } else {
> > +             hw->use_msix = VIRTIO_MSIX_NONE;
> >       }
> >
> > -     while (pos) {
> > -             ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
> > -             if (ret < 0) {
> > -                     VIRTIO_CRYPTO_INIT_LOG_ERR(
> > -                             "failed to read pci cap at pos: %x", pos);
> > -                     break;
> > -             }
> > -
> > -             if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
> > -                     /* Transitional devices would also have this capability,
> > -                      * that's why we also check if msix is enabled.
> > -                      * 1st byte is cap ID; 2nd byte is the position of next
> > -                      * cap; next two bytes are the flags.
> > -                      */
> > -                     uint16_t flags = ((uint16_t *)&cap)[1];
> > -
> > -                     if (flags & PCI_MSIX_ENABLE)
> > -                             hw->use_msix = VIRTIO_MSIX_ENABLED;
> > -                     else
> > -                             hw->use_msix = VIRTIO_MSIX_DISABLED;
> > -             }
> > -
> > -             if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
> > -                     VIRTIO_CRYPTO_INIT_LOG_DBG(
> > -                             "[%2x] skipping non VNDR cap id: %02x",
> > -                             pos, cap.cap_vndr);
> > -                     goto next;
> > -             }
> > -
> > +     pos = rte_pci_find_capability(dev, PCI_CAP_ID_VNDR);
>
> The logic of vendor cap init seems incorrect. Virtio devices have multiple
> Vendor cap (different cfg type). But now the logic seems to only init the first
> one.

Indeed, good catch!
It is sad that the CI did not catch this regression in virtio pmd init :-(.

I'll add a find_next_capability helper for v3.



>
> > +     if (pos > 0 && rte_pci_read_config(dev, &cap, sizeof(cap), pos) ==
> > sizeof(cap)) {
> >               VIRTIO_CRYPTO_INIT_LOG_DBG(
> >                       "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
> > -                     pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
> > +                     (unsigned int)pos, cap.cfg_type, cap.bar, cap.offset,
> > cap.length);
> >
> >               switch (cap.cfg_type) {
> >               case VIRTIO_PCI_CAP_COMMON_CFG:
> > @@ -411,9 +393,6 @@ virtio_read_caps(struct rte_pci_device *dev, struct
> > virtio_crypto_hw *hw)
> >                       hw->isr = get_cfg_addr(dev, &cap);
> >                       break;
> >               }
> > -
> > -next:
> > -             pos = cap.cap_next;
> >       }
>
> ...
  
Chenbo Xia Sept. 19, 2023, 2:19 a.m. UTC | #3
Hi David,

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Thursday, September 14, 2023 8:29 PM
> To: Xia, Chenbo <chenbo.xia@intel.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>
> Cc: dev@dpdk.org; thomas@monjalon.net; ferruh.yigit@amd.com;
> nipun.gupta@amd.com; Richardson, Bruce <bruce.richardson@intel.com>;
> Burakov, Anatoly <anatoly.burakov@intel.com>; Jay Zhou
> <jianjay.zhou@huawei.com>; McDaniel, Timothy <timothy.mcdaniel@intel.com>;
> Julien Aube <julien_dpdk@jaube.fr>; Rahul Lakkireddy
> <rahul.lakkireddy@chelsio.com>; Guo, Junfeng <junfeng.guo@intel.com>;
> Jeroen de Borst <jeroendb@google.com>; Rushil Gupta <rushilg@google.com>;
> Joshua Washington <joshwash@google.com>; Dongdong Liu
> <liudongdong3@huawei.com>; Yisen Zhuang <yisen.zhuang@huawei.com>; Gaetan
> Rivet <grive@u256.net>
> Subject: Re: [PATCH v2 04/15] bus/pci: find PCI capability
> 
> Hello Chenbo,
> 
> On Thu, Sep 7, 2023 at 2:43 PM Xia, Chenbo <chenbo.xia@intel.com> wrote:
> > > Introduce two helpers so that drivers stop reinventing the wheel when
> it
> > > comes to finding capabilities in a device PCI configuration space.
> > > Use it in existing drivers.
> > >
> > > Note:
> > > - base/ drivers code is left untouched, only some wrappers in cxgbe
> > >   are touched,
> > > - bnx2x maintained a per device cache of capabilities, this code has
> been
> > >   reworked to only cache the capabilities used in this driver,
> > >
> > > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > > Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> > > ---
> > > Changes since v1:
> > > - updated commitlog,
> > > - separated VFIO changes for using standard PCI helper in a separate
> > >   patch,
> > > - marked new experimental symbols with current version,
> > > - reordered defines in rte_pci.h,
> > >
> > > ---
> > >  drivers/bus/pci/linux/pci_vfio.c   |  74 ++++--------------
> > >  drivers/bus/pci/pci_common.c       |  45 +++++++++++
> > >  drivers/bus/pci/rte_bus_pci.h      |  31 ++++++++
> > >  drivers/bus/pci/version.map        |   4 +
> > >  drivers/crypto/virtio/virtio_pci.c |  57 +++++---------
> > >  drivers/event/dlb2/pf/dlb2_main.c  |  42 +---------
> > >  drivers/net/bnx2x/bnx2x.c          |  41 +++++-----
> > >  drivers/net/cxgbe/base/adapter.h   |  28 +------
> > >  drivers/net/gve/gve_ethdev.c       |  46 ++---------
> > >  drivers/net/gve/gve_ethdev.h       |   4 -
> > >  drivers/net/hns3/hns3_ethdev_vf.c  |  79 +++----------------
> > >  drivers/net/virtio/virtio_pci.c    | 121 +++++-----------------------
> -
> > >  lib/pci/rte_pci.h                  |  11 +++
> > >  13 files changed, 186 insertions(+), 397 deletions(-)
> > >
> > > diff --git a/drivers/bus/pci/linux/pci_vfio.c
> > > b/drivers/bus/pci/linux/pci_vfio.c
> > > index 958f8b3b52..614ed5d696 100644
> > > --- a/drivers/bus/pci/linux/pci_vfio.c
> > > +++ b/drivers/bus/pci/linux/pci_vfio.c
> > > @@ -110,74 +110,34 @@ static int
> > >  pci_vfio_get_msix_bar(const struct rte_pci_device *dev,
> > >       struct pci_msix_table *msix_table)
> > >  {
> > > -     int ret;
> > > -     uint32_t reg;
> > > -     uint16_t flags;
> > > -     uint8_t cap_id, cap_offset;
> > > +     off_t cap_offset;
> > >
> > > -     /* read PCI capability pointer from config space */
> > > -     ret = rte_pci_read_config(dev, &reg, sizeof(reg),
> > > PCI_CAPABILITY_LIST);
> > > -     if (ret != sizeof(reg)) {
> > > -             RTE_LOG(ERR, EAL,
> > > -                     "Cannot read capability pointer from PCI config
> > > space!\n");
> > > +     cap_offset = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);
> >
> > I notice in some cases we use rte_pci_has_capability_list() to check
> first,
> > then looking for specific cap, in other cases we don't use
> > rte_pci_has_capability_list(). Since we define this API, should we
> always do
> > the check?
> 
> Hum, I am not sure of what you suggest here.
> 
> I tried to do a 1:1 conversion of what existed.
> Do you mean there are places where I missed converting some
> rte_pci_read_config(PCI_CAPABILITY_LIST) to
> rte_pci_has_capability_list()?
> If so, could you point at them?
> 
> Or are you suggesting to have this check as part of
> rte_pci_find_capability() ?

Your conversion is correct. I was saying about the usage of
rte_pci_has_capability_list/rte_pci_find_capability, logically should we
always call rte_pci_has_capability_list first before find capability?

I don't have strong opinion on this. You can choose to keep the same or
call rte_pci_has_capability_list more.

Thanks!
Chenbo

> 
> 
> I'll respin a v3 soon (to fix the nasty issue you pointed out below).
> A v4 may be needed depending on your replies to above questions.
> 
> 
> >
> >
> > > +     if (cap_offset < 0)
> > >               return -1;
> > > -     }
> > >
> > > -     /* we need first byte */
> > > -     cap_offset = reg & 0xFF;
> > > +     if (cap_offset != 0) {
> > > +             uint16_t flags;
> > > +             uint32_t reg;
> > >
> > > -     while (cap_offset) {
> > > -
> > > -             /* read PCI capability ID */
> > > -             ret = rte_pci_read_config(dev, &reg, sizeof(reg),
> cap_offset);
> > > -             if (ret != sizeof(reg)) {
> > > +             /* table offset resides in the next 4 bytes */
> > > +             if (rte_pci_read_config(dev, &reg, sizeof(reg),
> cap_offset + 4)
> > > < 0) {
> > >                       RTE_LOG(ERR, EAL,
> > > -                             "Cannot read capability ID from PCI
> config
> > > space!\n");
> > > +                             "Cannot read MSIX table from PCI config
> space!\n");
> > >                       return -1;
> > >               }
> > >
> > > -             /* we need first byte */
> > > -             cap_id = reg & 0xFF;
> > > -
> > > -             /* if we haven't reached MSI-X, check next capability */
> > > -             if (cap_id != PCI_CAP_ID_MSIX) {
> > > -                     ret = rte_pci_read_config(dev, &reg, sizeof(reg),
> > > cap_offset);
> > > -                     if (ret != sizeof(reg)) {
> > > -                             RTE_LOG(ERR, EAL,
> > > -                                     "Cannot read capability pointer
> from PCI
> > > config space!\n");
> > > -                             return -1;
> > > -                     }
> > > -
> > > -                     /* we need second byte */
> > > -                     cap_offset = (reg & 0xFF00) >> 8;
> > > -
> > > -                     continue;
> > > +             if (rte_pci_read_config(dev, &flags, sizeof(flags),
> cap_offset
> > > + 2) < 0) {
> > > +                     RTE_LOG(ERR, EAL,
> > > +                             "Cannot read MSIX flags from PCI config
> space!\n");
> > > +                     return -1;
> > >               }
> > > -             /* else, read table offset */
> > > -             else {
> > > -                     /* table offset resides in the next 4 bytes */
> > > -                     ret = rte_pci_read_config(dev, &reg, sizeof(reg),
> > > cap_offset + 4);
> > > -                     if (ret != sizeof(reg)) {
> > > -                             RTE_LOG(ERR, EAL,
> > > -                                     "Cannot read table offset from
> PCI config
> > > space!\n");
> > > -                             return -1;
> > > -                     }
> > > -
> > > -                     ret = rte_pci_read_config(dev, &flags,
> sizeof(flags),
> > > cap_offset + 2);
> > > -                     if (ret != sizeof(flags)) {
> > > -                             RTE_LOG(ERR, EAL,
> > > -                                     "Cannot read table flags from
> PCI config
> > > space!\n");
> > > -                             return -1;
> > > -                     }
> > > -
> > > -                     msix_table->bar_index = reg &
> RTE_PCI_MSIX_TABLE_BIR;
> > > -                     msix_table->offset = reg &
> RTE_PCI_MSIX_TABLE_OFFSET;
> > > -                     msix_table->size =
> > > -                             16 * (1 + (flags &
> RTE_PCI_MSIX_FLAGS_QSIZE));
> > >
> > > -                     return 0;
> > > -             }
> > > +             msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
> > > +             msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> > > +             msix_table->size = 16 * (1 + (flags &
> > > RTE_PCI_MSIX_FLAGS_QSIZE));
> > >       }
> > > +
> > >       return 0;
> > >  }
> > >
> > > diff --git a/drivers/bus/pci/pci_common.c
> b/drivers/bus/pci/pci_common.c
> > > index 382b0b8946..52272617eb 100644
> > > --- a/drivers/bus/pci/pci_common.c
> > > +++ b/drivers/bus/pci/pci_common.c
> > > @@ -813,6 +813,51 @@ rte_pci_get_iommu_class(void)
> > >       return iova_mode;
> > >  }
> > >
> > > +bool
> > > +rte_pci_has_capability_list(const struct rte_pci_device *dev)
> > > +{
> > > +     uint16_t status;
> > > +
> > > +     if (rte_pci_read_config(dev, &status, sizeof(status),
> > > RTE_PCI_STATUS) != sizeof(status))
> > > +             return false;
> > > +
> > > +     return (status & RTE_PCI_STATUS_CAP_LIST) != 0;
> > > +}
> > > +
> > > +off_t
> > > +rte_pci_find_capability(const struct rte_pci_device *dev, uint8_t cap)
> > > +{
> > > +     off_t offset;
> > > +     uint8_t pos;
> > > +     int ttl;
> > > +
> > > +     offset = RTE_PCI_CAPABILITY_LIST;
> > > +     ttl = (RTE_PCI_CFG_SPACE_SIZE - RTE_PCI_STD_HEADER_SIZEOF) /
> > > RTE_PCI_CAP_SIZEOF;
> > > +
> > > +     if (rte_pci_read_config(dev, &pos, sizeof(pos), offset) < 0)
> > > +             return -1;
> > > +
> > > +     while (pos && ttl--) {
> > > +             uint16_t ent;
> > > +             uint8_t id;
> > > +
> > > +             offset = pos;
> > > +             if (rte_pci_read_config(dev, &ent, sizeof(ent), offset)
> < 0)
> > > +                     return -1;
> > > +
> > > +             id = ent & 0xff;
> > > +             if (id == 0xff)
> > > +                     break;
> > > +
> > > +             if (id == cap)
> > > +                     return offset;
> > > +
> > > +             pos = (ent >> 8);
> > > +     }
> > > +
> > > +     return 0;
> > > +}
> > > +
> > >  off_t
> > >  rte_pci_find_ext_capability(const struct rte_pci_device *dev,
> uint32_t
> > > cap)
> > >  {
> > > diff --git a/drivers/bus/pci/rte_bus_pci.h
> b/drivers/bus/pci/rte_bus_pci.h
> > > index 75d0030eae..1ed33dbf3d 100644
> > > --- a/drivers/bus/pci/rte_bus_pci.h
> > > +++ b/drivers/bus/pci/rte_bus_pci.h
> > > @@ -68,6 +68,37 @@ void rte_pci_unmap_device(struct rte_pci_device
> *dev);
> > >   */
> > >  void rte_pci_dump(FILE *f);
> > >
> > > +/**
> > > + * Check whether this device has a PCI capability list.
> > > + *
> > > + *  @param dev
> > > + *    A pointer to rte_pci_device structure.
> > > + *
> > > + *  @return
> > > + *    true/false
> > > + */
> > > +__rte_experimental
> > > +bool rte_pci_has_capability_list(const struct rte_pci_device *dev);
> > > +
> > > +/**
> > > + * Find device's PCI capability.
> > > + *
> > > + *  @param dev
> > > + *    A pointer to rte_pci_device structure.
> > > + *
> > > + *  @param cap
> > > + *    Capability to be found, which can be any from
> > > + *    RTE_PCI_CAP_ID_*, defined in librte_pci.
> > > + *
> > > + *  @return
> > > + *  > 0: The offset of the next matching capability structure
> > > + *       within the device's PCI configuration space.
> > > + *  < 0: An error in PCI config space read.
> > > + *  = 0: Device does not support it.
> > > + */
> > > +__rte_experimental
> > > +off_t rte_pci_find_capability(const struct rte_pci_device *dev,
> uint8_t
> > > cap);
> > > +
> > >  /**
> > >   * Find device's extended PCI capability.
> > >   *
> > > diff --git a/drivers/bus/pci/version.map b/drivers/bus/pci/version.map
> > > index a0000f7938..2674f30235 100644
> > > --- a/drivers/bus/pci/version.map
> > > +++ b/drivers/bus/pci/version.map
> > > @@ -25,6 +25,10 @@ EXPERIMENTAL {
> > >       # added in 23.07
> > >       rte_pci_mmio_read;
> > >       rte_pci_mmio_write;
> > > +
> > > +     # added in 23.11
> > > +     rte_pci_find_capability;
> > > +     rte_pci_has_capability_list;
> > >  };
> > >
> > >  INTERNAL {
> > > diff --git a/drivers/crypto/virtio/virtio_pci.c
> > > b/drivers/crypto/virtio/virtio_pci.c
> > > index 95a43c8801..abc52b4701 100644
> > > --- a/drivers/crypto/virtio/virtio_pci.c
> > > +++ b/drivers/crypto/virtio/virtio_pci.c
> > > @@ -19,7 +19,6 @@
> > >   * we can't simply include that header here, as there is no such
> > >   * file for non-Linux platform.
> > >   */
> > > -#define PCI_CAPABILITY_LIST  0x34
> > >  #define PCI_CAP_ID_VNDR              0x09
> > >  #define PCI_CAP_ID_MSIX              0x11
> > >
> > > @@ -343,8 +342,9 @@ get_cfg_addr(struct rte_pci_device *dev, struct
> > > virtio_pci_cap *cap)
> > >  static int
> > >  virtio_read_caps(struct rte_pci_device *dev, struct virtio_crypto_hw
> *hw)
> > >  {
> > > -     uint8_t pos;
> > >       struct virtio_pci_cap cap;
> > > +     uint16_t flags;
> > > +     off_t pos;
> > >       int ret;
> > >
> > >       if (rte_pci_map_device(dev)) {
> > > @@ -352,44 +352,26 @@ virtio_read_caps(struct rte_pci_device *dev,
> struct
> > > virtio_crypto_hw *hw)
> > >               return -1;
> > >       }
> > >
> > > -     ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
> > > -     if (ret < 0) {
> > > -             VIRTIO_CRYPTO_INIT_LOG_DBG("failed to read pci
> capability
> > > list");
> > > -             return -1;
> > > +     /*
> > > +      * Transitional devices would also have this capability,
> > > +      * that's why we also check if msix is enabled.
> > > +      */
> > > +     pos = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);
> > > +     if (pos > 0 && rte_pci_read_config(dev, &flags, sizeof(flags),
> > > +                     pos + 2) == sizeof(flags)) {
> > > +             if (flags & PCI_MSIX_ENABLE)
> > > +                     hw->use_msix = VIRTIO_MSIX_ENABLED;
> > > +             else
> > > +                     hw->use_msix = VIRTIO_MSIX_DISABLED;
> > > +     } else {
> > > +             hw->use_msix = VIRTIO_MSIX_NONE;
> > >       }
> > >
> > > -     while (pos) {
> > > -             ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
> > > -             if (ret < 0) {
> > > -                     VIRTIO_CRYPTO_INIT_LOG_ERR(
> > > -                             "failed to read pci cap at pos: %x",
> pos);
> > > -                     break;
> > > -             }
> > > -
> > > -             if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
> > > -                     /* Transitional devices would also have this
> capability,
> > > -                      * that's why we also check if msix is enabled.
> > > -                      * 1st byte is cap ID; 2nd byte is the position
> of next
> > > -                      * cap; next two bytes are the flags.
> > > -                      */
> > > -                     uint16_t flags = ((uint16_t *)&cap)[1];
> > > -
> > > -                     if (flags & PCI_MSIX_ENABLE)
> > > -                             hw->use_msix = VIRTIO_MSIX_ENABLED;
> > > -                     else
> > > -                             hw->use_msix = VIRTIO_MSIX_DISABLED;
> > > -             }
> > > -
> > > -             if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
> > > -                     VIRTIO_CRYPTO_INIT_LOG_DBG(
> > > -                             "[%2x] skipping non VNDR cap id: %02x",
> > > -                             pos, cap.cap_vndr);
> > > -                     goto next;
> > > -             }
> > > -
> > > +     pos = rte_pci_find_capability(dev, PCI_CAP_ID_VNDR);
> >
> > The logic of vendor cap init seems incorrect. Virtio devices have
> multiple
> > Vendor cap (different cfg type). But now the logic seems to only init
> the first
> > one.
> 
> Indeed, good catch!
> It is sad that the CI did not catch this regression in virtio pmd init :-(.
> 
> I'll add a find_next_capability helper for v3.
> 
> 
> 
> >
> > > +     if (pos > 0 && rte_pci_read_config(dev, &cap, sizeof(cap), pos)
> ==
> > > sizeof(cap)) {
> > >               VIRTIO_CRYPTO_INIT_LOG_DBG(
> > >                       "[%2x] cfg type: %u, bar: %u, offset: %04x,
> len: %u",
> > > -                     pos, cap.cfg_type, cap.bar, cap.offset,
> cap.length);
> > > +                     (unsigned int)pos, cap.cfg_type, cap.bar,
> cap.offset,
> > > cap.length);
> > >
> > >               switch (cap.cfg_type) {
> > >               case VIRTIO_PCI_CAP_COMMON_CFG:
> > > @@ -411,9 +393,6 @@ virtio_read_caps(struct rte_pci_device *dev,
> struct
> > > virtio_crypto_hw *hw)
> > >                       hw->isr = get_cfg_addr(dev, &cap);
> > >                       break;
> > >               }
> > > -
> > > -next:
> > > -             pos = cap.cap_next;
> > >       }
> >
> > ...
> 
> 
> --
> David Marchand
  
David Marchand Sept. 19, 2023, 9 a.m. UTC | #4
On Tue, Sep 19, 2023 at 4:19 AM Xia, Chenbo <chenbo.xia@intel.com> wrote:
>
> Hi David,
>
> > -----Original Message-----
> > From: David Marchand <david.marchand@redhat.com>
> > Sent: Thursday, September 14, 2023 8:29 PM
> > To: Xia, Chenbo <chenbo.xia@intel.com>; Maxime Coquelin
> > <maxime.coquelin@redhat.com>
> > Cc: dev@dpdk.org; thomas@monjalon.net; ferruh.yigit@amd.com;
> > nipun.gupta@amd.com; Richardson, Bruce <bruce.richardson@intel.com>;
> > Burakov, Anatoly <anatoly.burakov@intel.com>; Jay Zhou
> > <jianjay.zhou@huawei.com>; McDaniel, Timothy <timothy.mcdaniel@intel.com>;
> > Julien Aube <julien_dpdk@jaube.fr>; Rahul Lakkireddy
> > <rahul.lakkireddy@chelsio.com>; Guo, Junfeng <junfeng.guo@intel.com>;
> > Jeroen de Borst <jeroendb@google.com>; Rushil Gupta <rushilg@google.com>;
> > Joshua Washington <joshwash@google.com>; Dongdong Liu
> > <liudongdong3@huawei.com>; Yisen Zhuang <yisen.zhuang@huawei.com>; Gaetan
> > Rivet <grive@u256.net>
> > Subject: Re: [PATCH v2 04/15] bus/pci: find PCI capability
> >
> > Hello Chenbo,
> >
> > On Thu, Sep 7, 2023 at 2:43 PM Xia, Chenbo <chenbo.xia@intel.com> wrote:
> > > > Introduce two helpers so that drivers stop reinventing the wheel when
> > it
> > > > comes to finding capabilities in a device PCI configuration space.
> > > > Use it in existing drivers.
> > > >
> > > > Note:
> > > > - base/ drivers code is left untouched, only some wrappers in cxgbe
> > > >   are touched,
> > > > - bnx2x maintained a per device cache of capabilities, this code has
> > been
> > > >   reworked to only cache the capabilities used in this driver,
> > > >
> > > > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > > > Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> > > > ---
> > > > Changes since v1:
> > > > - updated commitlog,
> > > > - separated VFIO changes for using standard PCI helper in a separate
> > > >   patch,
> > > > - marked new experimental symbols with current version,
> > > > - reordered defines in rte_pci.h,
> > > >
> > > > ---
> > > >  drivers/bus/pci/linux/pci_vfio.c   |  74 ++++--------------
> > > >  drivers/bus/pci/pci_common.c       |  45 +++++++++++
> > > >  drivers/bus/pci/rte_bus_pci.h      |  31 ++++++++
> > > >  drivers/bus/pci/version.map        |   4 +
> > > >  drivers/crypto/virtio/virtio_pci.c |  57 +++++---------
> > > >  drivers/event/dlb2/pf/dlb2_main.c  |  42 +---------
> > > >  drivers/net/bnx2x/bnx2x.c          |  41 +++++-----
> > > >  drivers/net/cxgbe/base/adapter.h   |  28 +------
> > > >  drivers/net/gve/gve_ethdev.c       |  46 ++---------
> > > >  drivers/net/gve/gve_ethdev.h       |   4 -
> > > >  drivers/net/hns3/hns3_ethdev_vf.c  |  79 +++----------------
> > > >  drivers/net/virtio/virtio_pci.c    | 121 +++++-----------------------
> > -
> > > >  lib/pci/rte_pci.h                  |  11 +++
> > > >  13 files changed, 186 insertions(+), 397 deletions(-)
> > > >
> > > > diff --git a/drivers/bus/pci/linux/pci_vfio.c
> > > > b/drivers/bus/pci/linux/pci_vfio.c
> > > > index 958f8b3b52..614ed5d696 100644
> > > > --- a/drivers/bus/pci/linux/pci_vfio.c
> > > > +++ b/drivers/bus/pci/linux/pci_vfio.c
> > > > @@ -110,74 +110,34 @@ static int
> > > >  pci_vfio_get_msix_bar(const struct rte_pci_device *dev,
> > > >       struct pci_msix_table *msix_table)
> > > >  {
> > > > -     int ret;
> > > > -     uint32_t reg;
> > > > -     uint16_t flags;
> > > > -     uint8_t cap_id, cap_offset;
> > > > +     off_t cap_offset;
> > > >
> > > > -     /* read PCI capability pointer from config space */
> > > > -     ret = rte_pci_read_config(dev, &reg, sizeof(reg),
> > > > PCI_CAPABILITY_LIST);
> > > > -     if (ret != sizeof(reg)) {
> > > > -             RTE_LOG(ERR, EAL,
> > > > -                     "Cannot read capability pointer from PCI config
> > > > space!\n");
> > > > +     cap_offset = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);
> > >
> > > I notice in some cases we use rte_pci_has_capability_list() to check
> > first,
> > > then looking for specific cap, in other cases we don't use
> > > rte_pci_has_capability_list(). Since we define this API, should we
> > always do
> > > the check?
> >
> > Hum, I am not sure of what you suggest here.
> >
> > I tried to do a 1:1 conversion of what existed.
> > Do you mean there are places where I missed converting some
> > rte_pci_read_config(PCI_CAPABILITY_LIST) to
> > rte_pci_has_capability_list()?
> > If so, could you point at them?
> >
> > Or are you suggesting to have this check as part of
> > rte_pci_find_capability() ?
>
> Your conversion is correct. I was saying about the usage of
> rte_pci_has_capability_list/rte_pci_find_capability, logically should we
> always call rte_pci_has_capability_list first before find capability?
>
> I don't have strong opinion on this. You can choose to keep the same or
> call rte_pci_has_capability_list more.

I prefer to keep it as is (the series is already big enough).
We can still add more checks in the future.

Thanks Chenbo.
  

Patch

diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index 958f8b3b52..614ed5d696 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -110,74 +110,34 @@  static int
 pci_vfio_get_msix_bar(const struct rte_pci_device *dev,
 	struct pci_msix_table *msix_table)
 {
-	int ret;
-	uint32_t reg;
-	uint16_t flags;
-	uint8_t cap_id, cap_offset;
+	off_t cap_offset;
 
-	/* read PCI capability pointer from config space */
-	ret = rte_pci_read_config(dev, &reg, sizeof(reg), PCI_CAPABILITY_LIST);
-	if (ret != sizeof(reg)) {
-		RTE_LOG(ERR, EAL,
-			"Cannot read capability pointer from PCI config space!\n");
+	cap_offset = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);
+	if (cap_offset < 0)
 		return -1;
-	}
 
-	/* we need first byte */
-	cap_offset = reg & 0xFF;
+	if (cap_offset != 0) {
+		uint16_t flags;
+		uint32_t reg;
 
-	while (cap_offset) {
-
-		/* read PCI capability ID */
-		ret = rte_pci_read_config(dev, &reg, sizeof(reg), cap_offset);
-		if (ret != sizeof(reg)) {
+		/* table offset resides in the next 4 bytes */
+		if (rte_pci_read_config(dev, &reg, sizeof(reg), cap_offset + 4) < 0) {
 			RTE_LOG(ERR, EAL,
-				"Cannot read capability ID from PCI config space!\n");
+				"Cannot read MSIX table from PCI config space!\n");
 			return -1;
 		}
 
-		/* we need first byte */
-		cap_id = reg & 0xFF;
-
-		/* if we haven't reached MSI-X, check next capability */
-		if (cap_id != PCI_CAP_ID_MSIX) {
-			ret = rte_pci_read_config(dev, &reg, sizeof(reg), cap_offset);
-			if (ret != sizeof(reg)) {
-				RTE_LOG(ERR, EAL,
-					"Cannot read capability pointer from PCI config space!\n");
-				return -1;
-			}
-
-			/* we need second byte */
-			cap_offset = (reg & 0xFF00) >> 8;
-
-			continue;
+		if (rte_pci_read_config(dev, &flags, sizeof(flags), cap_offset + 2) < 0) {
+			RTE_LOG(ERR, EAL,
+				"Cannot read MSIX flags from PCI config space!\n");
+			return -1;
 		}
-		/* else, read table offset */
-		else {
-			/* table offset resides in the next 4 bytes */
-			ret = rte_pci_read_config(dev, &reg, sizeof(reg), cap_offset + 4);
-			if (ret != sizeof(reg)) {
-				RTE_LOG(ERR, EAL,
-					"Cannot read table offset from PCI config space!\n");
-				return -1;
-			}
-
-			ret = rte_pci_read_config(dev, &flags, sizeof(flags), cap_offset + 2);
-			if (ret != sizeof(flags)) {
-				RTE_LOG(ERR, EAL,
-					"Cannot read table flags from PCI config space!\n");
-				return -1;
-			}
-
-			msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
-			msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
-			msix_table->size =
-				16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
 
-			return 0;
-		}
+		msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
+		msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
+		msix_table->size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
 	}
+
 	return 0;
 }
 
diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 382b0b8946..52272617eb 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -813,6 +813,51 @@  rte_pci_get_iommu_class(void)
 	return iova_mode;
 }
 
+bool
+rte_pci_has_capability_list(const struct rte_pci_device *dev)
+{
+	uint16_t status;
+
+	if (rte_pci_read_config(dev, &status, sizeof(status), RTE_PCI_STATUS) != sizeof(status))
+		return false;
+
+	return (status & RTE_PCI_STATUS_CAP_LIST) != 0;
+}
+
+off_t
+rte_pci_find_capability(const struct rte_pci_device *dev, uint8_t cap)
+{
+	off_t offset;
+	uint8_t pos;
+	int ttl;
+
+	offset = RTE_PCI_CAPABILITY_LIST;
+	ttl = (RTE_PCI_CFG_SPACE_SIZE - RTE_PCI_STD_HEADER_SIZEOF) / RTE_PCI_CAP_SIZEOF;
+
+	if (rte_pci_read_config(dev, &pos, sizeof(pos), offset) < 0)
+		return -1;
+
+	while (pos && ttl--) {
+		uint16_t ent;
+		uint8_t id;
+
+		offset = pos;
+		if (rte_pci_read_config(dev, &ent, sizeof(ent), offset) < 0)
+			return -1;
+
+		id = ent & 0xff;
+		if (id == 0xff)
+			break;
+
+		if (id == cap)
+			return offset;
+
+		pos = (ent >> 8);
+	}
+
+	return 0;
+}
+
 off_t
 rte_pci_find_ext_capability(const struct rte_pci_device *dev, uint32_t cap)
 {
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 75d0030eae..1ed33dbf3d 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -68,6 +68,37 @@  void rte_pci_unmap_device(struct rte_pci_device *dev);
  */
 void rte_pci_dump(FILE *f);
 
+/**
+ * Check whether this device has a PCI capability list.
+ *
+ *  @param dev
+ *    A pointer to rte_pci_device structure.
+ *
+ *  @return
+ *    true/false
+ */
+__rte_experimental
+bool rte_pci_has_capability_list(const struct rte_pci_device *dev);
+
+/**
+ * Find device's PCI capability.
+ *
+ *  @param dev
+ *    A pointer to rte_pci_device structure.
+ *
+ *  @param cap
+ *    Capability to be found, which can be any from
+ *    RTE_PCI_CAP_ID_*, defined in librte_pci.
+ *
+ *  @return
+ *  > 0: The offset of the next matching capability structure
+ *       within the device's PCI configuration space.
+ *  < 0: An error in PCI config space read.
+ *  = 0: Device does not support it.
+ */
+__rte_experimental
+off_t rte_pci_find_capability(const struct rte_pci_device *dev, uint8_t cap);
+
 /**
  * Find device's extended PCI capability.
  *
diff --git a/drivers/bus/pci/version.map b/drivers/bus/pci/version.map
index a0000f7938..2674f30235 100644
--- a/drivers/bus/pci/version.map
+++ b/drivers/bus/pci/version.map
@@ -25,6 +25,10 @@  EXPERIMENTAL {
 	# added in 23.07
 	rte_pci_mmio_read;
 	rte_pci_mmio_write;
+
+	# added in 23.11
+	rte_pci_find_capability;
+	rte_pci_has_capability_list;
 };
 
 INTERNAL {
diff --git a/drivers/crypto/virtio/virtio_pci.c b/drivers/crypto/virtio/virtio_pci.c
index 95a43c8801..abc52b4701 100644
--- a/drivers/crypto/virtio/virtio_pci.c
+++ b/drivers/crypto/virtio/virtio_pci.c
@@ -19,7 +19,6 @@ 
  * we can't simply include that header here, as there is no such
  * file for non-Linux platform.
  */
-#define PCI_CAPABILITY_LIST	0x34
 #define PCI_CAP_ID_VNDR		0x09
 #define PCI_CAP_ID_MSIX		0x11
 
@@ -343,8 +342,9 @@  get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
 static int
 virtio_read_caps(struct rte_pci_device *dev, struct virtio_crypto_hw *hw)
 {
-	uint8_t pos;
 	struct virtio_pci_cap cap;
+	uint16_t flags;
+	off_t pos;
 	int ret;
 
 	if (rte_pci_map_device(dev)) {
@@ -352,44 +352,26 @@  virtio_read_caps(struct rte_pci_device *dev, struct virtio_crypto_hw *hw)
 		return -1;
 	}
 
-	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
-	if (ret < 0) {
-		VIRTIO_CRYPTO_INIT_LOG_DBG("failed to read pci capability list");
-		return -1;
+	/*
+	 * Transitional devices would also have this capability,
+	 * that's why we also check if msix is enabled.
+	 */
+	pos = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);
+	if (pos > 0 && rte_pci_read_config(dev, &flags, sizeof(flags),
+			pos + 2) == sizeof(flags)) {
+		if (flags & PCI_MSIX_ENABLE)
+			hw->use_msix = VIRTIO_MSIX_ENABLED;
+		else
+			hw->use_msix = VIRTIO_MSIX_DISABLED;
+	} else {
+		hw->use_msix = VIRTIO_MSIX_NONE;
 	}
 
-	while (pos) {
-		ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
-		if (ret < 0) {
-			VIRTIO_CRYPTO_INIT_LOG_ERR(
-				"failed to read pci cap at pos: %x", pos);
-			break;
-		}
-
-		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
-			/* Transitional devices would also have this capability,
-			 * that's why we also check if msix is enabled.
-			 * 1st byte is cap ID; 2nd byte is the position of next
-			 * cap; next two bytes are the flags.
-			 */
-			uint16_t flags = ((uint16_t *)&cap)[1];
-
-			if (flags & PCI_MSIX_ENABLE)
-				hw->use_msix = VIRTIO_MSIX_ENABLED;
-			else
-				hw->use_msix = VIRTIO_MSIX_DISABLED;
-		}
-
-		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
-			VIRTIO_CRYPTO_INIT_LOG_DBG(
-				"[%2x] skipping non VNDR cap id: %02x",
-				pos, cap.cap_vndr);
-			goto next;
-		}
-
+	pos = rte_pci_find_capability(dev, PCI_CAP_ID_VNDR);
+	if (pos > 0 && rte_pci_read_config(dev, &cap, sizeof(cap), pos) == sizeof(cap)) {
 		VIRTIO_CRYPTO_INIT_LOG_DBG(
 			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
-			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
+			(unsigned int)pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
 
 		switch (cap.cfg_type) {
 		case VIRTIO_PCI_CAP_COMMON_CFG:
@@ -411,9 +393,6 @@  virtio_read_caps(struct rte_pci_device *dev, struct virtio_crypto_hw *hw)
 			hw->isr = get_cfg_addr(dev, &cap);
 			break;
 		}
-
-next:
-		pos = cap.cap_next;
 	}
 
 	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
diff --git a/drivers/event/dlb2/pf/dlb2_main.c b/drivers/event/dlb2/pf/dlb2_main.c
index 717aa4fc08..40e5cb594f 100644
--- a/drivers/event/dlb2/pf/dlb2_main.c
+++ b/drivers/event/dlb2/pf/dlb2_main.c
@@ -27,10 +27,6 @@ 
 #define NO_OWNER_VF 0	/* PF ONLY! */
 #define NOT_VF_REQ false /* PF ONLY! */
 
-#define DLB2_PCI_CAP_POINTER 0x34
-#define DLB2_PCI_CAP_NEXT(hdr) (((hdr) >> 8) & 0xFC)
-#define DLB2_PCI_CAP_ID(hdr) ((hdr) & 0xFF)
-
 #define DLB2_PCI_LNKCTL 16
 #define DLB2_PCI_SLTCTL 24
 #define DLB2_PCI_RTCTL 28
@@ -65,35 +61,6 @@ 
 #define DLB2_PCI_ACS_UF                  0x10
 #define DLB2_PCI_ACS_EC                  0x20
 
-static int dlb2_pci_find_capability(struct rte_pci_device *pdev, uint32_t id)
-{
-	uint8_t pos;
-	int ret;
-	uint16_t hdr;
-
-	ret = rte_pci_read_config(pdev, &pos, 1, DLB2_PCI_CAP_POINTER);
-	pos &= 0xFC;
-
-	if (ret != 1)
-		return -1;
-
-	while (pos > 0x3F) {
-		ret = rte_pci_read_config(pdev, &hdr, 2, pos);
-		if (ret != 2)
-			return -1;
-
-		if (DLB2_PCI_CAP_ID(hdr) == id)
-			return pos;
-
-		if (DLB2_PCI_CAP_ID(hdr) == 0xFF)
-			return -1;
-
-		pos = DLB2_PCI_CAP_NEXT(hdr);
-	}
-
-	return -1;
-}
-
 static int
 dlb2_pf_init_driver_state(struct dlb2_dev *dlb2_dev)
 {
@@ -258,9 +225,9 @@  dlb2_pf_reset(struct dlb2_dev *dlb2_dev)
 	uint32_t pri_reqs_dword;
 	uint16_t pri_ctrl_word;
 
-	int pcie_cap_offset;
+	off_t pcie_cap_offset;
 	int pri_cap_offset;
-	int msix_cap_offset;
+	off_t msix_cap_offset;
 	int err_cap_offset;
 	int acs_cap_offset;
 	int wait_count;
@@ -277,7 +244,7 @@  dlb2_pf_reset(struct dlb2_dev *dlb2_dev)
 			return ret;
 	}
 
-	pcie_cap_offset = dlb2_pci_find_capability(pdev, DLB2_PCI_CAP_ID_EXP);
+	pcie_cap_offset = rte_pci_find_capability(pdev, DLB2_PCI_CAP_ID_EXP);
 
 	if (pcie_cap_offset < 0) {
 		DLB2_LOG_ERR("[%s()] failed to find the pcie capability\n",
@@ -516,8 +483,7 @@  dlb2_pf_reset(struct dlb2_dev *dlb2_dev)
 		}
 	}
 
-	msix_cap_offset = dlb2_pci_find_capability(pdev,
-						   DLB2_PCI_CAP_ID_MSIX);
+	msix_cap_offset = rte_pci_find_capability(pdev, DLB2_PCI_CAP_ID_MSIX);
 	if (msix_cap_offset >= 0) {
 		off = msix_cap_offset + DLB2_PCI_MSIX_FLAGS;
 		if (rte_pci_read_config(pdev, &cmd, 2, off) == 2) {
diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index 29c16bb207..06f2949885 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -9586,14 +9586,17 @@  static void bnx2x_init_multi_cos(struct bnx2x_softc *sc)
 	}
 }
 
+static uint8_t bnx2x_pci_capabilities[] = {
+	PCIY_EXPRESS,
+	PCIY_PMG,
+	PCIY_MSI,
+	PCIY_MSIX,
+};
+
 static int bnx2x_pci_get_caps(struct bnx2x_softc *sc)
 {
-	struct {
-		uint8_t id;
-		uint8_t next;
-	} pci_cap;
-	uint16_t status;
 	struct bnx2x_pci_cap *cap;
+	unsigned int i;
 
 	cap = sc->pci_caps = rte_zmalloc("caps", sizeof(struct bnx2x_pci_cap),
 					 RTE_CACHE_LINE_SIZE);
@@ -9602,29 +9605,21 @@  static int bnx2x_pci_get_caps(struct bnx2x_softc *sc)
 		return -ENOMEM;
 	}
 
-#ifndef RTE_EXEC_ENV_FREEBSD
-	pci_read(sc, PCI_STATUS, &status, 2);
-	if (!(status & PCI_STATUS_CAP_LIST)) {
-#else
-	pci_read(sc, PCIR_STATUS, &status, 2);
-	if (!(status & PCIM_STATUS_CAPPRESENT)) {
-#endif
+	if (!rte_pci_has_capability_list(sc->pci_dev)) {
 		PMD_DRV_LOG(NOTICE, sc, "PCIe capability reading failed");
 		return -1;
 	}
 
-#ifndef RTE_EXEC_ENV_FREEBSD
-	pci_read(sc, PCI_CAPABILITY_LIST, &pci_cap.next, 1);
-#else
-	pci_read(sc, PCIR_CAP_PTR, &pci_cap.next, 1);
-#endif
-	while (pci_cap.next) {
-		cap->addr = pci_cap.next & ~3;
-		pci_read(sc, pci_cap.next & ~3, &pci_cap, 2);
-		if (pci_cap.id == 0xff)
-			break;
-		cap->id = pci_cap.id;
+	for (i = 0; i < RTE_DIM(bnx2x_pci_capabilities); i++) {
+		off_t pos = rte_pci_find_capability(sc->pci_dev,
+			bnx2x_pci_capabilities[i]);
+
+		if (pos <= 0)
+			continue;
+
+		cap->id = bnx2x_pci_capabilities[i];
 		cap->type = BNX2X_PCI_CAP;
+		cap->addr = pos;
 		cap->next = rte_zmalloc("pci_cap",
 					sizeof(struct bnx2x_pci_cap),
 					RTE_CACHE_LINE_SIZE);
diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h
index 8f2ffa0eeb..00d7591ea4 100644
--- a/drivers/net/cxgbe/base/adapter.h
+++ b/drivers/net/cxgbe/base/adapter.h
@@ -511,13 +511,8 @@  static inline void t4_write_reg64(struct adapter *adapter, u32 reg_addr,
 	CXGBE_WRITE_REG64(adapter, reg_addr, val);
 }
 
-#define PCI_STATUS              0x06    /* 16 bits */
-#define PCI_STATUS_CAP_LIST     0x10    /* Support Capability List */
-#define PCI_CAPABILITY_LIST     0x34
 /* Offset of first capability list entry */
 #define PCI_CAP_ID_EXP          0x10    /* PCI Express */
-#define PCI_CAP_LIST_ID         0       /* Capability ID */
-#define PCI_CAP_LIST_NEXT       1       /* Next capability in the list */
 #define PCI_EXP_DEVCTL          0x0008  /* Device control */
 #define PCI_EXP_DEVCTL2         40      /* Device Control 2 */
 #define PCI_EXP_DEVCTL_EXT_TAG  0x0100  /* Extended Tag Field Enable */
@@ -620,31 +615,12 @@  static inline void t4_os_pci_read_cfg(struct adapter *adapter, size_t addr,
  */
 static inline int t4_os_find_pci_capability(struct adapter *adapter, int cap)
 {
-	u16 status;
-	int ttl = 48;
-	u8 pos = 0;
-	u8 id = 0;
-
-	t4_os_pci_read_cfg2(adapter, PCI_STATUS, &status);
-	if (!(status & PCI_STATUS_CAP_LIST)) {
+	if (!rte_pci_has_capability_list(adapter->pdev)) {
 		dev_err(adapter, "PCIe capability reading failed\n");
 		return -1;
 	}
 
-	t4_os_pci_read_cfg(adapter, PCI_CAPABILITY_LIST, &pos);
-	while (ttl-- && pos >= 0x40) {
-		pos &= ~3;
-		t4_os_pci_read_cfg(adapter, (pos + PCI_CAP_LIST_ID), &id);
-
-		if (id == 0xff)
-			break;
-
-		if (id == cap)
-			return (int)pos;
-
-		t4_os_pci_read_cfg(adapter, (pos + PCI_CAP_LIST_NEXT), &pos);
-	}
-	return 0;
+	return rte_pci_find_capability(adapter->pdev, cap);
 }
 
 /**
diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c
index aa75abe102..c276b9e68e 100644
--- a/drivers/net/gve/gve_ethdev.c
+++ b/drivers/net/gve/gve_ethdev.c
@@ -606,53 +606,17 @@  gve_teardown_device_resources(struct gve_priv *priv)
 	gve_clear_device_resources_ok(priv);
 }
 
-static uint8_t
-pci_dev_find_capability(struct rte_pci_device *pdev, int cap)
-{
-	uint8_t pos, id;
-	uint16_t ent;
-	int loops;
-	int ret;
-
-	ret = rte_pci_read_config(pdev, &pos, sizeof(pos), PCI_CAPABILITY_LIST);
-	if (ret != sizeof(pos))
-		return 0;
-
-	loops = (PCI_CFG_SPACE_SIZE - PCI_STD_HEADER_SIZEOF) / PCI_CAP_SIZEOF;
-
-	while (pos && loops--) {
-		ret = rte_pci_read_config(pdev, &ent, sizeof(ent), pos);
-		if (ret != sizeof(ent))
-			return 0;
-
-		id = ent & 0xff;
-		if (id == 0xff)
-			break;
-
-		if (id == cap)
-			return pos;
-
-		pos = (ent >> 8);
-	}
-
-	return 0;
-}
-
 static int
 pci_dev_msix_vec_count(struct rte_pci_device *pdev)
 {
-	uint8_t msix_cap = pci_dev_find_capability(pdev, PCI_CAP_ID_MSIX);
+	off_t msix_pos = rte_pci_find_capability(pdev, PCI_CAP_ID_MSIX);
 	uint16_t control;
-	int ret;
 
-	if (!msix_cap)
-		return 0;
-
-	ret = rte_pci_read_config(pdev, &control, sizeof(control), msix_cap + PCI_MSIX_FLAGS);
-	if (ret != sizeof(control))
-		return 0;
+	if (msix_pos > 0 && rte_pci_read_config(pdev, &control, sizeof(control),
+			msix_pos + PCI_MSIX_FLAGS) == sizeof(control))
+		return (control & PCI_MSIX_FLAGS_QSIZE) + 1;
 
-	return (control & PCI_MSIX_FLAGS_QSIZE) + 1;
+	return 0;
 }
 
 static int
diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h
index c9bcfa553c..8759b1c76e 100644
--- a/drivers/net/gve/gve_ethdev.h
+++ b/drivers/net/gve/gve_ethdev.h
@@ -19,10 +19,6 @@ 
  * we can't simply include that header here, as there is no such
  * file for non-Linux platform.
  */
-#define PCI_CFG_SPACE_SIZE	256
-#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
-#define PCI_STD_HEADER_SIZEOF	64
-#define PCI_CAP_SIZEOF		4
 #define PCI_CAP_ID_MSIX		0x11	/* MSI-X */
 #define PCI_MSIX_FLAGS		2	/* Message Control */
 #define PCI_MSIX_FLAGS_QSIZE	0x07FF	/* Table size */
diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c
index 7b3c5dc168..b731850b01 100644
--- a/drivers/net/hns3/hns3_ethdev_vf.c
+++ b/drivers/net/hns3/hns3_ethdev_vf.c
@@ -49,80 +49,24 @@  static int hns3vf_remove_mc_mac_addr(struct hns3_hw *hw,
 static int hns3vf_dev_link_update(struct rte_eth_dev *eth_dev,
 				   __rte_unused int wait_to_complete);
 
-/**
- * hns3vf_find_pci_capability - lookup a capability in the PCI capability list
- * @cap: the capability
- *
- * Return the address of the given capability within the PCI capability list.
- */
 static int
-hns3vf_find_pci_capability(const struct rte_pci_device *device, int cap)
+hns3vf_enable_msix(const struct rte_pci_device *device, bool op)
 {
-#define MAX_PCIE_CAPABILITY 48
-	uint16_t status;
-	uint8_t pos;
-	uint8_t id;
-	int ttl;
+	uint16_t control;
+	off_t pos;
 	int ret;
 
-	ret = rte_pci_read_config(device, &status, sizeof(status), PCI_STATUS);
-	if (ret < 0) {
-		PMD_INIT_LOG(ERR, "Failed to read PCI offset 0x%x", PCI_STATUS);
-		return 0;
-	}
-
-	if (!(status & PCI_STATUS_CAP_LIST))
-		return 0;
-
-	ttl = MAX_PCIE_CAPABILITY;
-	ret = rte_pci_read_config(device, &pos, sizeof(pos),
-				  PCI_CAPABILITY_LIST);
-	if (ret < 0) {
-		PMD_INIT_LOG(ERR, "Failed to read PCI offset 0x%x",
-			     PCI_CAPABILITY_LIST);
+	if (!rte_pci_has_capability_list(device)) {
+		PMD_INIT_LOG(ERR, "Failed to read PCI capability list");
 		return 0;
 	}
 
-	while (ttl-- && pos >= PCI_STD_HEADER_SIZEOF) {
-		ret = rte_pci_read_config(device, &id, sizeof(id),
-					  (pos + PCI_CAP_LIST_ID));
-		if (ret < 0) {
-			PMD_INIT_LOG(ERR, "Failed to read PCI offset 0x%x",
-				     (pos + PCI_CAP_LIST_ID));
-			break;
-		}
-
-		if (id == 0xFF)
-			break;
-
-		if (id == cap)
-			return (int)pos;
-
-		ret = rte_pci_read_config(device, &pos, sizeof(pos),
-					  (pos + PCI_CAP_LIST_NEXT));
-		if (ret < 0) {
-			PMD_INIT_LOG(ERR, "Failed to read PCI offset 0x%x",
-				     (pos + PCI_CAP_LIST_NEXT));
-			break;
-		}
-	}
-	return 0;
-}
-
-static int
-hns3vf_enable_msix(const struct rte_pci_device *device, bool op)
-{
-	uint16_t control;
-	int pos;
-	int ret;
-
-	pos = hns3vf_find_pci_capability(device, PCI_CAP_ID_MSIX);
-	if (pos) {
+	pos = rte_pci_find_capability(device, PCI_CAP_ID_MSIX);
+	if (pos > 0) {
 		ret = rte_pci_read_config(device, &control, sizeof(control),
-					  (pos + PCI_MSIX_FLAGS));
+			pos + PCI_MSIX_FLAGS);
 		if (ret < 0) {
-			PMD_INIT_LOG(ERR, "Failed to read PCI offset 0x%x",
-				     (pos + PCI_MSIX_FLAGS));
+			PMD_INIT_LOG(ERR, "Failed to read MSIX flags");
 			return -ENXIO;
 		}
 
@@ -131,10 +75,9 @@  hns3vf_enable_msix(const struct rte_pci_device *device, bool op)
 		else
 			control &= ~PCI_MSIX_FLAGS_ENABLE;
 		ret = rte_pci_write_config(device, &control, sizeof(control),
-					   (pos + PCI_MSIX_FLAGS));
+			pos + PCI_MSIX_FLAGS);
 		if (ret < 0) {
-			PMD_INIT_LOG(ERR, "failed to write PCI offset 0x%x",
-				     (pos + PCI_MSIX_FLAGS));
+			PMD_INIT_LOG(ERR, "failed to write MSIX flags");
 			return -ENXIO;
 		}
 
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 29eb739b04..9fd9db3e03 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -20,7 +20,6 @@ 
  * we can't simply include that header here, as there is no such
  * file for non-Linux platform.
  */
-#define PCI_CAPABILITY_LIST	0x34
 #define PCI_CAP_ID_VNDR		0x09
 #define PCI_CAP_ID_MSIX		0x11
 
@@ -38,46 +37,16 @@  struct virtio_pci_internal virtio_pci_internal[RTE_MAX_ETHPORTS];
 static enum virtio_msix_status
 vtpci_msix_detect(struct rte_pci_device *dev)
 {
-	uint8_t pos;
-	int ret;
+	uint16_t flags;
+	off_t pos;
 
-	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
-	if (ret != 1) {
-		PMD_INIT_LOG(DEBUG,
-			     "failed to read pci capability list, ret %d", ret);
-		return VIRTIO_MSIX_NONE;
-	}
-
-	while (pos) {
-		uint8_t cap[2];
-
-		ret = rte_pci_read_config(dev, cap, sizeof(cap), pos);
-		if (ret != sizeof(cap)) {
-			PMD_INIT_LOG(DEBUG,
-				     "failed to read pci cap at pos: %x ret %d",
-				     pos, ret);
-			break;
-		}
-
-		if (cap[0] == PCI_CAP_ID_MSIX) {
-			uint16_t flags;
-
-			ret = rte_pci_read_config(dev, &flags, sizeof(flags),
-					pos + sizeof(cap));
-			if (ret != sizeof(flags)) {
-				PMD_INIT_LOG(DEBUG,
-					     "failed to read pci cap at pos:"
-					     " %x ret %d", pos + 2, ret);
-				break;
-			}
-
-			if (flags & PCI_MSIX_ENABLE)
-				return VIRTIO_MSIX_ENABLED;
-			else
-				return VIRTIO_MSIX_DISABLED;
-		}
-
-		pos = cap[1];
+	pos = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);
+	if (pos > 0 && rte_pci_read_config(dev, &flags, sizeof(flags),
+			pos + 2) == sizeof(flags)) {
+		if (flags & PCI_MSIX_ENABLE)
+			return VIRTIO_MSIX_ENABLED;
+		else
+			return VIRTIO_MSIX_DISABLED;
 	}
 
 	return VIRTIO_MSIX_NONE;
@@ -623,8 +592,8 @@  static int
 virtio_read_caps(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
 {
 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
-	uint8_t pos;
 	struct virtio_pci_cap cap;
+	off_t pos;
 	int ret;
 
 	if (rte_pci_map_device(pci_dev)) {
@@ -632,72 +601,25 @@  virtio_read_caps(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
 		return -1;
 	}
 
-	ret = rte_pci_read_config(pci_dev, &pos, 1, PCI_CAPABILITY_LIST);
-	if (ret != 1) {
-		PMD_INIT_LOG(DEBUG,
-			     "failed to read pci capability list, ret %d", ret);
-		return -1;
-	}
-
-	while (pos) {
-		ret = rte_pci_read_config(pci_dev, &cap, 2, pos);
-		if (ret != 2) {
-			PMD_INIT_LOG(DEBUG,
-				     "failed to read pci cap at pos: %x ret %d",
-				     pos, ret);
-			break;
-		}
-
-		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
-			/* Transitional devices would also have this capability,
-			 * that's why we also check if msix is enabled.
-			 * 1st byte is cap ID; 2nd byte is the position of next
-			 * cap; next two bytes are the flags.
-			 */
-			uint16_t flags;
-
-			ret = rte_pci_read_config(pci_dev, &flags, sizeof(flags),
-					pos + 2);
-			if (ret != sizeof(flags)) {
-				PMD_INIT_LOG(DEBUG,
-					     "failed to read pci cap at pos:"
-					     " %x ret %d", pos + 2, ret);
-				break;
-			}
-
-			if (flags & PCI_MSIX_ENABLE)
-				dev->msix_status = VIRTIO_MSIX_ENABLED;
-			else
-				dev->msix_status = VIRTIO_MSIX_DISABLED;
-		}
-
-		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
-			PMD_INIT_LOG(DEBUG,
-				"[%2x] skipping non VNDR cap id: %02x",
-				pos, cap.cap_vndr);
-			goto next;
-		}
-
-		ret = rte_pci_read_config(pci_dev, &cap, sizeof(cap), pos);
-		if (ret != sizeof(cap)) {
-			PMD_INIT_LOG(DEBUG,
-				     "failed to read pci cap at pos: %x ret %d",
-				     pos, ret);
-			break;
-		}
+	/*
+	 * Transitional devices would also have this capability,
+	 * that's why we also check if msix is enabled.
+	 */
+	dev->msix_status = vtpci_msix_detect(pci_dev);
 
+	pos = rte_pci_find_capability(pci_dev, PCI_CAP_ID_VNDR);
+	if (pos > 0 && rte_pci_read_config(pci_dev, &cap, sizeof(cap), pos) == sizeof(cap)) {
 		PMD_INIT_LOG(DEBUG,
 			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
-			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
+			(unsigned int)pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
 
 		switch (cap.cfg_type) {
 		case VIRTIO_PCI_CAP_COMMON_CFG:
 			dev->common_cfg = get_cfg_addr(pci_dev, &cap);
 			break;
 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
-			ret = rte_pci_read_config(pci_dev,
-					&dev->notify_off_multiplier,
-					4, pos + sizeof(cap));
+			ret = rte_pci_read_config(pci_dev, &dev->notify_off_multiplier,
+				4, pos + sizeof(cap));
 			if (ret != 4)
 				PMD_INIT_LOG(DEBUG,
 					"failed to read notify_off_multiplier, ret %d",
@@ -712,9 +634,6 @@  virtio_read_caps(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
 			dev->isr = get_cfg_addr(pci_dev, &cap);
 			break;
 		}
-
-next:
-		pos = cap.cap_next;
 	}
 
 	if (dev->common_cfg == NULL || dev->notify_base == NULL ||
diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
index aab761b918..49fd5b1d02 100644
--- a/lib/pci/rte_pci.h
+++ b/lib/pci/rte_pci.h
@@ -28,13 +28,24 @@  extern "C" {
 #define RTE_PCI_CFG_SPACE_SIZE		256
 #define RTE_PCI_CFG_SPACE_EXP_SIZE	4096
 
+#define RTE_PCI_STD_HEADER_SIZEOF	64
+
+/* Standard register offsets in the PCI configuration space */
 #define RTE_PCI_VENDOR_ID	0x00	/* 16 bits */
 #define RTE_PCI_DEVICE_ID	0x02	/* 16 bits */
 #define RTE_PCI_COMMAND		0x04	/* 16 bits */
+#define RTE_PCI_STATUS		0x06	/* 16 bits */
+#define RTE_PCI_CAPABILITY_LIST	0x34	/* 32 bits */
 
 /* PCI Command Register */
 #define RTE_PCI_COMMAND_MASTER	0x4	/* Bus Master Enable */
 
+/* PCI Status Register (RTE_PCI_STATUS) */
+#define RTE_PCI_STATUS_CAP_LIST		0x10	/* Support Capability List */
+
+/* Capability registers (RTE_PCI_CAPABILITY_LIST) */
+#define RTE_PCI_CAP_SIZEOF		4
+
 /* PCI Express capability registers */
 #define RTE_PCI_EXP_DEVCTL	8	/* Device Control */