[v2,3/3] bus/pci: only consider usable devices to select IOVA mode

Message ID 1560505157-9769-4-git-send-email-david.marchand@redhat.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series Improve automatic selection of IOVA mode |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

David Marchand June 14, 2019, 9:39 a.m. UTC
  From: Ben Walker <benjamin.walker@intel.com>

When selecting the preferred IOVA mode of the pci bus, the current
heuristic ("are devices bound?", "are devices bound to UIO?", "are pmd
drivers supporting IOVA as VA?" etc..) should honor the device
white/blacklist so that an unwanted device does not impact the decision.

There is no reason to consider a device which has no driver available.

This applies to all OS, so implements this in common code then call a
OS specific callback.

On Linux side:
- the VFIO special considerations should be evaluated only if VFIO
  support is built,
- there is no strong requirement on using VA rather than PA if a driver
  supports VA, so defaulting to DC in such a case.

Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 drivers/bus/pci/bsd/pci.c    |   9 +-
 drivers/bus/pci/linux/pci.c  | 191 ++++++++++++-------------------------------
 drivers/bus/pci/pci_common.c |  65 +++++++++++++++
 drivers/bus/pci/private.h    |   8 ++
 4 files changed, 131 insertions(+), 142 deletions(-)
  

Comments

Burakov, Anatoly July 3, 2019, 10:45 a.m. UTC | #1
On 14-Jun-19 10:39 AM, David Marchand wrote:
> From: Ben Walker <benjamin.walker@intel.com>
> 
> When selecting the preferred IOVA mode of the pci bus, the current
> heuristic ("are devices bound?", "are devices bound to UIO?", "are pmd
> drivers supporting IOVA as VA?" etc..) should honor the device
> white/blacklist so that an unwanted device does not impact the decision.
> 
> There is no reason to consider a device which has no driver available.
> 
> This applies to all OS, so implements this in common code then call a
> OS specific callback.
> 
> On Linux side:
> - the VFIO special considerations should be evaluated only if VFIO
>    support is built,
> - there is no strong requirement on using VA rather than PA if a driver
>    supports VA, so defaulting to DC in such a case.
> 
> Signed-off-by: Ben Walker <benjamin.walker@intel.com>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---

<snip>

> +		     const struct rte_pci_device *pdev)
>   {
> -	struct rte_pci_device *dev = NULL;
> -	struct rte_pci_driver *drv = NULL;
> +	enum rte_iova_mode iova_mode = RTE_IOVA_DC;
> +	static int iommu_no_va = -1;
>   
> -	FOREACH_DRIVER_ON_PCIBUS(drv) {
> -		FOREACH_DEVICE_ON_PCIBUS(dev) {
> -			if (!rte_pci_match(drv, dev))
> -				continue;
> -			/*
> -			 * just one PCI device needs to be checked out because
> -			 * the IOMMU hardware is the same for all of them.
> -			 */
> -			return pci_one_device_iommu_support_va(dev);
> +	switch (pdev->kdrv) {
> +	case RTE_KDRV_VFIO: {
> +#ifdef VFIO_PRESENT
> +		static int is_vfio_noiommu_enabled = -1;
> +
> +		if (is_vfio_noiommu_enabled == -1) {
> +			if (rte_vfio_noiommu_is_enabled() == 1)
> +				is_vfio_noiommu_enabled = 1;
> +			else
> +				is_vfio_noiommu_enabled = 0;
> +		}
> +		if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) {
> +			iova_mode = RTE_IOVA_PA;
> +		} else if (is_vfio_noiommu_enabled != 0) {
> +			RTE_LOG(DEBUG, EAL, "Forcing to 'PA', vfio-noiommu mode configured\n");
> +			iova_mode = RTE_IOVA_PA;
>   		}
> +#endif
> +		break;

I'm not too well-versed in bus code, so please excuse my ignorance of 
this codebase.

It seems that we would be ignoring drv_flags in case VFIO wasn't 
compiled - if the driver has no RTE_PCI_DRV_IOVA_AS_VA flag, i'm pretty 
sure we can set IOVA mode to PA without caring about VFIO at all. I 
think it would be better to have something like this:

if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) {
	iova_mode = RTE_IOVA_PA;
	break; // early exit
}
#ifdef VFIO_PRESENT
static int is_vfio_noiommu_enabled = -1;

if (is_vfio_noiommu_enabled == -1) {
	if (rte_vfio_noiommu_is_enabled() == 1)
		is_vfio_noiommu_enabled = 1;
	else
		is_vfio_noiommu_enabled = 0;
}
if (is_vfio_noiommu_enabled != 0) {
	iova_mode = RTE_IOVA_PA;
}
#endif
break;


In fact, could we not check if devices support both flags, and do an 
early exit in case they don't?

Something like this, for example:

if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) {
	return RTE_IOVA_PA; // early exit - device only wants PA
}
// device supports both PA and VA modes, so do more checks
switch (pdev->kdrv) {
...
}

Unless i'm missing something, this would look much simpler and easier to 
understand.
  
David Marchand July 4, 2019, 9:18 a.m. UTC | #2
On Wed, Jul 3, 2019 at 12:45 PM Burakov, Anatoly <anatoly.burakov@intel.com>
wrote:

> On 14-Jun-19 10:39 AM, David Marchand wrote:
> > From: Ben Walker <benjamin.walker@intel.com>
> >
> > When selecting the preferred IOVA mode of the pci bus, the current
> > heuristic ("are devices bound?", "are devices bound to UIO?", "are pmd
> > drivers supporting IOVA as VA?" etc..) should honor the device
> > white/blacklist so that an unwanted device does not impact the decision.
> >
> > There is no reason to consider a device which has no driver available.
> >
> > This applies to all OS, so implements this in common code then call a
> > OS specific callback.
> >
> > On Linux side:
> > - the VFIO special considerations should be evaluated only if VFIO
> >    support is built,
> > - there is no strong requirement on using VA rather than PA if a driver
> >    supports VA, so defaulting to DC in such a case.
> >
> > Signed-off-by: Ben Walker <benjamin.walker@intel.com>
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
>
> <snip>
>
> > +                  const struct rte_pci_device *pdev)
> >   {
> > -     struct rte_pci_device *dev = NULL;
> > -     struct rte_pci_driver *drv = NULL;
> > +     enum rte_iova_mode iova_mode = RTE_IOVA_DC;
> > +     static int iommu_no_va = -1;
> >
> > -     FOREACH_DRIVER_ON_PCIBUS(drv) {
> > -             FOREACH_DEVICE_ON_PCIBUS(dev) {
> > -                     if (!rte_pci_match(drv, dev))
> > -                             continue;
> > -                     /*
> > -                      * just one PCI device needs to be checked out
> because
> > -                      * the IOMMU hardware is the same for all of them.
> > -                      */
> > -                     return pci_one_device_iommu_support_va(dev);
> > +     switch (pdev->kdrv) {
> > +     case RTE_KDRV_VFIO: {
> > +#ifdef VFIO_PRESENT
> > +             static int is_vfio_noiommu_enabled = -1;
> > +
> > +             if (is_vfio_noiommu_enabled == -1) {
> > +                     if (rte_vfio_noiommu_is_enabled() == 1)
> > +                             is_vfio_noiommu_enabled = 1;
> > +                     else
> > +                             is_vfio_noiommu_enabled = 0;
> > +             }
> > +             if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) {
> > +                     iova_mode = RTE_IOVA_PA;
> > +             } else if (is_vfio_noiommu_enabled != 0) {
> > +                     RTE_LOG(DEBUG, EAL, "Forcing to 'PA', vfio-noiommu
> mode configured\n");
> > +                     iova_mode = RTE_IOVA_PA;
> >               }
> > +#endif
> > +             break;
>
> I'm not too well-versed in bus code, so please excuse my ignorance of
> this codebase.
>
> It seems that we would be ignoring drv_flags in case VFIO wasn't
> compiled - if the driver has no RTE_PCI_DRV_IOVA_AS_VA flag, i'm pretty
> sure we can set IOVA mode to PA without caring about VFIO at all. I
> think it would be better to have something like this:
>
> if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) {
>         iova_mode = RTE_IOVA_PA;
>         break; // early exit
> }
>

If the device is bound to VFIO, but the dpdk binary has no vfio support, we
don't need to consider this device in the decision.
Did I miss something in what you suggest?
  
Burakov, Anatoly July 4, 2019, 10:43 a.m. UTC | #3
On 04-Jul-19 10:18 AM, David Marchand wrote:
> 
> 
> On Wed, Jul 3, 2019 at 12:45 PM Burakov, Anatoly 
> <anatoly.burakov@intel.com <mailto:anatoly.burakov@intel.com>> wrote:
> 
>     On 14-Jun-19 10:39 AM, David Marchand wrote:
>      > From: Ben Walker <benjamin.walker@intel.com
>     <mailto:benjamin.walker@intel.com>>
>      >
>      > When selecting the preferred IOVA mode of the pci bus, the current
>      > heuristic ("are devices bound?", "are devices bound to UIO?",
>     "are pmd
>      > drivers supporting IOVA as VA?" etc..) should honor the device
>      > white/blacklist so that an unwanted device does not impact the
>     decision.
>      >
>      > There is no reason to consider a device which has no driver
>     available.
>      >
>      > This applies to all OS, so implements this in common code then call a
>      > OS specific callback.
>      >
>      > On Linux side:
>      > - the VFIO special considerations should be evaluated only if VFIO
>      >    support is built,
>      > - there is no strong requirement on using VA rather than PA if a
>     driver
>      >    supports VA, so defaulting to DC in such a case.
>      >
>      > Signed-off-by: Ben Walker <benjamin.walker@intel.com
>     <mailto:benjamin.walker@intel.com>>
>      > Signed-off-by: David Marchand <david.marchand@redhat.com
>     <mailto:david.marchand@redhat.com>>
>      > ---
> 
>     <snip>
> 
>      > +                  const struct rte_pci_device *pdev)
>      >   {
>      > -     struct rte_pci_device *dev = NULL;
>      > -     struct rte_pci_driver *drv = NULL;
>      > +     enum rte_iova_mode iova_mode = RTE_IOVA_DC;
>      > +     static int iommu_no_va = -1;
>      >
>      > -     FOREACH_DRIVER_ON_PCIBUS(drv) {
>      > -             FOREACH_DEVICE_ON_PCIBUS(dev) {
>      > -                     if (!rte_pci_match(drv, dev))
>      > -                             continue;
>      > -                     /*
>      > -                      * just one PCI device needs to be checked
>     out because
>      > -                      * the IOMMU hardware is the same for all
>     of them.
>      > -                      */
>      > -                     return pci_one_device_iommu_support_va(dev);
>      > +     switch (pdev->kdrv) {
>      > +     case RTE_KDRV_VFIO: {
>      > +#ifdef VFIO_PRESENT
>      > +             static int is_vfio_noiommu_enabled = -1;
>      > +
>      > +             if (is_vfio_noiommu_enabled == -1) {
>      > +                     if (rte_vfio_noiommu_is_enabled() == 1)
>      > +                             is_vfio_noiommu_enabled = 1;
>      > +                     else
>      > +                             is_vfio_noiommu_enabled = 0;
>      > +             }
>      > +             if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) {
>      > +                     iova_mode = RTE_IOVA_PA;
>      > +             } else if (is_vfio_noiommu_enabled != 0) {
>      > +                     RTE_LOG(DEBUG, EAL, "Forcing to 'PA',
>     vfio-noiommu mode configured\n");
>      > +                     iova_mode = RTE_IOVA_PA;
>      >               }
>      > +#endif
>      > +             break;
> 
>     I'm not too well-versed in bus code, so please excuse my ignorance of
>     this codebase.
> 
>     It seems that we would be ignoring drv_flags in case VFIO wasn't
>     compiled - if the driver has no RTE_PCI_DRV_IOVA_AS_VA flag, i'm pretty
>     sure we can set IOVA mode to PA without caring about VFIO at all. I
>     think it would be better to have something like this:
> 
>     if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) {
>              iova_mode = RTE_IOVA_PA;
>              break; // early exit
>     }
> 
> 
> If the device is bound to VFIO, but the dpdk binary has no vfio support, 
> we don't need to consider this device in the decision.
> Did I miss something in what you suggest?
> 

Yep, you're correct :)

Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>

> 
> -- 
> David Marchand
  
David Marchand July 4, 2019, 10:47 a.m. UTC | #4
On Thu, Jul 4, 2019 at 12:44 PM Burakov, Anatoly <anatoly.burakov@intel.com>
wrote:

> On 04-Jul-19 10:18 AM, David Marchand wrote:
> >
> >
> > On Wed, Jul 3, 2019 at 12:45 PM Burakov, Anatoly
> > <anatoly.burakov@intel.com <mailto:anatoly.burakov@intel.com>> wrote:
> >
> >     On 14-Jun-19 10:39 AM, David Marchand wrote:
> >      > From: Ben Walker <benjamin.walker@intel.com
> >     <mailto:benjamin.walker@intel.com>>
> >      >
> >      > When selecting the preferred IOVA mode of the pci bus, the current
> >      > heuristic ("are devices bound?", "are devices bound to UIO?",
> >     "are pmd
> >      > drivers supporting IOVA as VA?" etc..) should honor the device
> >      > white/blacklist so that an unwanted device does not impact the
> >     decision.
> >      >
> >      > There is no reason to consider a device which has no driver
> >     available.
> >      >
> >      > This applies to all OS, so implements this in common code then
> call a
> >      > OS specific callback.
> >      >
> >      > On Linux side:
> >      > - the VFIO special considerations should be evaluated only if VFIO
> >      >    support is built,
> >      > - there is no strong requirement on using VA rather than PA if a
> >     driver
> >      >    supports VA, so defaulting to DC in such a case.
> >      >
> >      > Signed-off-by: Ben Walker <benjamin.walker@intel.com
> >     <mailto:benjamin.walker@intel.com>>
> >      > Signed-off-by: David Marchand <david.marchand@redhat.com
> >     <mailto:david.marchand@redhat.com>>
> >      > ---
> >
> >     <snip>
> >
> >      > +                  const struct rte_pci_device *pdev)
> >      >   {
> >      > -     struct rte_pci_device *dev = NULL;
> >      > -     struct rte_pci_driver *drv = NULL;
> >      > +     enum rte_iova_mode iova_mode = RTE_IOVA_DC;
> >      > +     static int iommu_no_va = -1;
> >      >
> >      > -     FOREACH_DRIVER_ON_PCIBUS(drv) {
> >      > -             FOREACH_DEVICE_ON_PCIBUS(dev) {
> >      > -                     if (!rte_pci_match(drv, dev))
> >      > -                             continue;
> >      > -                     /*
> >      > -                      * just one PCI device needs to be checked
> >     out because
> >      > -                      * the IOMMU hardware is the same for all
> >     of them.
> >      > -                      */
> >      > -                     return pci_one_device_iommu_support_va(dev);
> >      > +     switch (pdev->kdrv) {
> >      > +     case RTE_KDRV_VFIO: {
> >      > +#ifdef VFIO_PRESENT
> >      > +             static int is_vfio_noiommu_enabled = -1;
> >      > +
> >      > +             if (is_vfio_noiommu_enabled == -1) {
> >      > +                     if (rte_vfio_noiommu_is_enabled() == 1)
> >      > +                             is_vfio_noiommu_enabled = 1;
> >      > +                     else
> >      > +                             is_vfio_noiommu_enabled = 0;
> >      > +             }
> >      > +             if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) ==
> 0) {
> >      > +                     iova_mode = RTE_IOVA_PA;
> >      > +             } else if (is_vfio_noiommu_enabled != 0) {
> >      > +                     RTE_LOG(DEBUG, EAL, "Forcing to 'PA',
> >     vfio-noiommu mode configured\n");
> >      > +                     iova_mode = RTE_IOVA_PA;
> >      >               }
> >      > +#endif
> >      > +             break;
> >
> >     I'm not too well-versed in bus code, so please excuse my ignorance of
> >     this codebase.
> >
> >     It seems that we would be ignoring drv_flags in case VFIO wasn't
> >     compiled - if the driver has no RTE_PCI_DRV_IOVA_AS_VA flag, i'm
> pretty
> >     sure we can set IOVA mode to PA without caring about VFIO at all. I
> >     think it would be better to have something like this:
> >
> >     if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) {
> >              iova_mode = RTE_IOVA_PA;
> >              break; // early exit
> >     }
> >
> >
> > If the device is bound to VFIO, but the dpdk binary has no vfio support,
> > we don't need to consider this device in the decision.
> > Did I miss something in what you suggest?
> >
>
> Yep, you're correct :)
>
> Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>
>

Cool, thanks Anatoly!
  
Stephen Hemminger July 4, 2019, 5:14 p.m. UTC | #5
On Fri, 14 Jun 2019 11:39:17 +0200
David Marchand <david.marchand@redhat.com> wrote:

>  	/* Supports only RTE_KDRV_NIC_UIO */
> +	if (pdev->kdrv != RTE_KDRV_NIC_UIO)
> +		RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n");

Maybe NOTICE level, rather than DEBUG which is usually suppressed.

> +		} else if (is_vfio_noiommu_enabled != 0) {
> +			RTE_LOG(DEBUG, EAL, "Forcing to 'PA', vfio-noiommu mode configured\n");
ditto
  
David Marchand July 5, 2019, 7:58 a.m. UTC | #6
On Thu, Jul 4, 2019 at 7:14 PM Stephen Hemminger <stephen@networkplumber.org>
wrote:

> On Fri, 14 Jun 2019 11:39:17 +0200
> David Marchand <david.marchand@redhat.com> wrote:
>
> >       /* Supports only RTE_KDRV_NIC_UIO */
> > +     if (pdev->kdrv != RTE_KDRV_NIC_UIO)
> > +             RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting
> to IOVA as 'PA'\n");
>
> Maybe NOTICE level, rather than DEBUG which is usually suppressed.
>

What do you mean by "suppressed" ?

We had a hardwired log level some time ago, but it has been removed and all
logs are in the binaries now.
https://git.dpdk.org/dpdk/commit/?id=5d8f0baf69ea


> > +             } else if (is_vfio_noiommu_enabled != 0) {
> > +                     RTE_LOG(DEBUG, EAL, "Forcing to 'PA', vfio-noiommu
> mode configured\n");
> ditto
>
>
  
Thomas Monjalon July 5, 2019, 8:26 a.m. UTC | #7
04/07/2019 19:14, Stephen Hemminger:
> On Fri, 14 Jun 2019 11:39:17 +0200
> David Marchand <david.marchand@redhat.com> wrote:
> 
> >  	/* Supports only RTE_KDRV_NIC_UIO */
> > +	if (pdev->kdrv != RTE_KDRV_NIC_UIO)
> > +		RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n");
> 
> Maybe NOTICE level, rather than DEBUG which is usually suppressed.

DEBUG may be enough here, as it is not something unexpected.
The IOVA choice will be printed in INFO level:
       RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
               rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
  
Stephen Hemminger July 5, 2019, 4:27 p.m. UTC | #8
On Fri, 5 Jul 2019 09:58:44 +0200
David Marchand <david.marchand@redhat.com> wrote:

> On Thu, Jul 4, 2019 at 7:14 PM Stephen Hemminger <stephen@networkplumber.org>
> wrote:
> 
> > On Fri, 14 Jun 2019 11:39:17 +0200
> > David Marchand <david.marchand@redhat.com> wrote:
> >  
> > >       /* Supports only RTE_KDRV_NIC_UIO */
> > > +     if (pdev->kdrv != RTE_KDRV_NIC_UIO)
> > > +             RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting  
> > to IOVA as 'PA'\n");
> >
> > Maybe NOTICE level, rather than DEBUG which is usually suppressed.
> >  
> 
> What do you mean by "suppressed" ?
> 
> We had a hardwired log level some time ago, but it has been removed and all
> logs are in the binaries now.
> https://git.dpdk.org/dpdk/commit/?id=5d8f0baf69ea

Unless user increases the log level, the default is to not print debug logs.
  

Patch

diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c
index c7b90cb..a2de709 100644
--- a/drivers/bus/pci/bsd/pci.c
+++ b/drivers/bus/pci/bsd/pci.c
@@ -376,13 +376,14 @@ 
 	return -1;
 }
 
-/*
- * Get iommu class of PCI devices on the bus.
- */
 enum rte_iova_mode
-rte_pci_get_iommu_class(void)
+pci_device_iova_mode(const struct rte_pci_driver *pdrv __rte_unused,
+		     const struct rte_pci_device *pdev)
 {
 	/* Supports only RTE_KDRV_NIC_UIO */
+	if (pdev->kdrv != RTE_KDRV_NIC_UIO)
+		RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n");
+
 	return RTE_IOVA_PA;
 }
 
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index b931cf9..33c8ea7 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -500,95 +500,14 @@ 
 	return -1;
 }
 
-/*
- * Is pci device bound to any kdrv
- */
-static inline int
-pci_one_device_is_bound(void)
-{
-	struct rte_pci_device *dev = NULL;
-	int ret = 0;
-
-	FOREACH_DEVICE_ON_PCIBUS(dev) {
-		if (dev->kdrv == RTE_KDRV_UNKNOWN ||
-		    dev->kdrv == RTE_KDRV_NONE) {
-			continue;
-		} else {
-			ret = 1;
-			break;
-		}
-	}
-	return ret;
-}
-
-/*
- * Any one of the device bound to uio
- */
-static inline int
-pci_one_device_bound_uio(void)
-{
-	struct rte_pci_device *dev = NULL;
-	struct rte_devargs *devargs;
-	int need_check;
-
-	FOREACH_DEVICE_ON_PCIBUS(dev) {
-		devargs = dev->device.devargs;
-
-		need_check = 0;
-		switch (rte_pci_bus.bus.conf.scan_mode) {
-		case RTE_BUS_SCAN_WHITELIST:
-			if (devargs && devargs->policy == RTE_DEV_WHITELISTED)
-				need_check = 1;
-			break;
-		case RTE_BUS_SCAN_UNDEFINED:
-		case RTE_BUS_SCAN_BLACKLIST:
-			if (devargs == NULL ||
-			    devargs->policy != RTE_DEV_BLACKLISTED)
-				need_check = 1;
-			break;
-		}
-
-		if (!need_check)
-			continue;
-
-		if (dev->kdrv == RTE_KDRV_IGB_UIO ||
-		   dev->kdrv == RTE_KDRV_UIO_GENERIC) {
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * Any one of the device has iova as va
- */
-static inline int
-pci_one_device_has_iova_va(void)
-{
-	struct rte_pci_device *dev = NULL;
-	struct rte_pci_driver *drv = NULL;
-
-	FOREACH_DRIVER_ON_PCIBUS(drv) {
-		if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) {
-			FOREACH_DEVICE_ON_PCIBUS(dev) {
-				if ((dev->kdrv == RTE_KDRV_VFIO ||
-				     dev->kdrv == RTE_KDRV_NIC_MLX) &&
-				    rte_pci_match(drv, dev))
-					return 1;
-			}
-		}
-	}
-	return 0;
-}
-
 #if defined(RTE_ARCH_X86)
 static bool
-pci_one_device_iommu_support_va(struct rte_pci_device *dev)
+pci_one_device_iommu_support_va(const struct rte_pci_device *dev)
 {
 #define VTD_CAP_MGAW_SHIFT	16
 #define VTD_CAP_MGAW_MASK	(0x3fULL << VTD_CAP_MGAW_SHIFT)
 #define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
-	struct rte_pci_addr *addr = &dev->addr;
+	const struct rte_pci_addr *addr = &dev->addr;
 	char filename[PATH_MAX];
 	FILE *fp;
 	uint64_t mgaw, vtd_cap_reg = 0;
@@ -632,80 +551,76 @@ 
 }
 #elif defined(RTE_ARCH_PPC_64)
 static bool
-pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev)
+pci_one_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev)
 {
 	return false;
 }
 #else
 static bool
-pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev)
+pci_one_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev)
 {
 	return true;
 }
 #endif
 
-/*
- * All devices IOMMUs support VA as IOVA
- */
-static bool
-pci_devices_iommu_support_va(void)
+enum rte_iova_mode
+pci_device_iova_mode(const struct rte_pci_driver *pdrv,
+		     const struct rte_pci_device *pdev)
 {
-	struct rte_pci_device *dev = NULL;
-	struct rte_pci_driver *drv = NULL;
+	enum rte_iova_mode iova_mode = RTE_IOVA_DC;
+	static int iommu_no_va = -1;
 
-	FOREACH_DRIVER_ON_PCIBUS(drv) {
-		FOREACH_DEVICE_ON_PCIBUS(dev) {
-			if (!rte_pci_match(drv, dev))
-				continue;
-			/*
-			 * just one PCI device needs to be checked out because
-			 * the IOMMU hardware is the same for all of them.
-			 */
-			return pci_one_device_iommu_support_va(dev);
+	switch (pdev->kdrv) {
+	case RTE_KDRV_VFIO: {
+#ifdef VFIO_PRESENT
+		static int is_vfio_noiommu_enabled = -1;
+
+		if (is_vfio_noiommu_enabled == -1) {
+			if (rte_vfio_noiommu_is_enabled() == 1)
+				is_vfio_noiommu_enabled = 1;
+			else
+				is_vfio_noiommu_enabled = 0;
+		}
+		if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) {
+			iova_mode = RTE_IOVA_PA;
+		} else if (is_vfio_noiommu_enabled != 0) {
+			RTE_LOG(DEBUG, EAL, "Forcing to 'PA', vfio-noiommu mode configured\n");
+			iova_mode = RTE_IOVA_PA;
 		}
+#endif
+		break;
 	}
-	return true;
-}
 
-/*
- * Get iommu class of PCI devices on the bus.
- */
-enum rte_iova_mode
-rte_pci_get_iommu_class(void)
-{
-	bool is_bound;
-	bool is_vfio_noiommu_enabled = true;
-	bool has_iova_va;
-	bool is_bound_uio;
-	bool iommu_no_va;
-
-	is_bound = pci_one_device_is_bound();
-	if (!is_bound)
-		return RTE_IOVA_DC;
-
-	has_iova_va = pci_one_device_has_iova_va();
-	is_bound_uio = pci_one_device_bound_uio();
-	iommu_no_va = !pci_devices_iommu_support_va();
-#ifdef VFIO_PRESENT
-	is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ?
-					true : false;
-#endif
+	case RTE_KDRV_NIC_MLX:
+		if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0)
+			iova_mode = RTE_IOVA_PA;
+		break;
 
-	if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled &&
-			!iommu_no_va)
-		return RTE_IOVA_VA;
+	case RTE_KDRV_IGB_UIO:
+	case RTE_KDRV_UIO_GENERIC:
+		iova_mode = RTE_IOVA_PA;
+		break;
 
-	if (has_iova_va) {
-		RTE_LOG(WARNING, EAL, "Some devices want iova as va but pa will be used because.. ");
-		if (is_vfio_noiommu_enabled)
-			RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
-		if (is_bound_uio)
-			RTE_LOG(WARNING, EAL, "few device bound to UIO\n");
-		if (iommu_no_va)
-			RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as VA\n");
+	default:
+		RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n");
+		iova_mode = RTE_IOVA_PA;
+		break;
 	}
 
-	return RTE_IOVA_PA;
+	if (iova_mode != RTE_IOVA_PA) {
+		/*
+		 * We can check this only once, because the IOMMU hardware is
+		 * the same for all of them.
+		 */
+		if (iommu_no_va == -1)
+			iommu_no_va = pci_one_device_iommu_support_va(pdev)
+					? 0 : 1;
+		if (iommu_no_va != 0) {
+			RTE_LOG(DEBUG, EAL, "Forcing to 'PA', IOMMU does not support IOVA as 'VA'\n");
+			iova_mode = RTE_IOVA_PA;
+		}
+	}
+	return iova_mode;
 }
 
 /* Read PCI config space. */
diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 704b9d7..d2af472 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -574,6 +574,71 @@  static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
 	return -1;
 }
 
+static bool
+pci_ignore_device(const struct rte_pci_device *dev)
+{
+	struct rte_devargs *devargs = dev->device.devargs;
+
+	switch (rte_pci_bus.bus.conf.scan_mode) {
+	case RTE_BUS_SCAN_WHITELIST:
+		if (devargs && devargs->policy == RTE_DEV_WHITELISTED)
+			return false;
+		break;
+	case RTE_BUS_SCAN_UNDEFINED:
+	case RTE_BUS_SCAN_BLACKLIST:
+		if (devargs == NULL ||
+		    devargs->policy != RTE_DEV_BLACKLISTED)
+			return false;
+		break;
+	}
+	return true;
+}
+
+enum rte_iova_mode
+rte_pci_get_iommu_class(void)
+{
+	enum rte_iova_mode iova_mode = RTE_IOVA_DC;
+	const struct rte_pci_device *dev;
+	const struct rte_pci_driver *drv;
+	bool devices_want_va = false;
+	bool devices_want_pa = false;
+
+	FOREACH_DEVICE_ON_PCIBUS(dev) {
+		if (pci_ignore_device(dev))
+			continue;
+		if (dev->kdrv == RTE_KDRV_UNKNOWN ||
+		    dev->kdrv == RTE_KDRV_NONE)
+			continue;
+		FOREACH_DRIVER_ON_PCIBUS(drv) {
+			enum rte_iova_mode dev_iova_mode;
+
+			if (!rte_pci_match(drv, dev))
+				continue;
+
+			dev_iova_mode = pci_device_iova_mode(drv, dev);
+			RTE_LOG(DEBUG, EAL, "PCI driver %s for device "
+				PCI_PRI_FMT " wants IOVA as '%s'\n",
+				drv->driver.name,
+				dev->addr.domain, dev->addr.bus,
+				dev->addr.devid, dev->addr.function,
+				dev_iova_mode == RTE_IOVA_DC ? "DC" :
+				(dev_iova_mode == RTE_IOVA_PA ? "PA" : "VA"));
+			if (dev_iova_mode == RTE_IOVA_PA)
+				devices_want_pa = true;
+			else if (dev_iova_mode == RTE_IOVA_VA)
+				devices_want_va = true;
+		}
+	}
+	if (devices_want_pa) {
+		iova_mode = RTE_IOVA_PA;
+		if (devices_want_va)
+			RTE_LOG(WARNING, EAL, "Some devices want 'VA' but forcing 'PA' because other devices want it\n");
+	} else if (devices_want_va) {
+		iova_mode = RTE_IOVA_VA;
+	}
+	return iova_mode;
+}
+
 struct rte_pci_bus rte_pci_bus = {
 	.bus = {
 		.scan = rte_pci_scan,
diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
index 13c3324..8a55240 100644
--- a/drivers/bus/pci/private.h
+++ b/drivers/bus/pci/private.h
@@ -173,6 +173,14 @@  int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
 	      const struct rte_pci_device *pci_dev);
 
 /**
+ * OS specific callback for rte_pci_get_iommu_class
+ *
+ */
+enum rte_iova_mode
+pci_device_iova_mode(const struct rte_pci_driver *pci_drv,
+		     const struct rte_pci_device *pci_dev);
+
+/**
  * Get iommu class of PCI devices on the bus.
  * And return their preferred iova mapping mode.
  *