[dpdk-dev,v3,04/11] linuxapp/eal_pci: get iommu class

Message ID 20170711061631.5018-5-santosh.shukla@caviumnetworks.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Santosh Shukla July 11, 2017, 6:16 a.m. UTC
  Get iommu class of PCI device on the bus and returns preferred iova
mapping mode for that bus.

Algorithm for iova scheme selection for PCI bus:
0. Look for device attached to vfio kdrv and has .drv_flag set
to RTE_PCI_DRV_NEED_IOVA_VA.
1. Look for any device attached to UIO class of driver.
2. Check for vfio-noiommu mode enabled.

If 1) & 2) is false and 0) is true then select
mapping scheme as iova=va. Otherwise use default
mapping scheme (iova_pa).

Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
v1 --> v2:
- Removed Linux version check in vfio_noiommu func. Refer [1].
- Extending autodetction logic for _iommu_class.
Refer [2].

[1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
[2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html

 lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
 lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
 lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
 4 files changed, 90 insertions(+)
  

Comments

Maxime Coquelin July 11, 2017, 9:23 a.m. UTC | #1
On 07/11/2017 08:16 AM, Santosh Shukla wrote:
> Get iommu class of PCI device on the bus and returns preferred iova
> mapping mode for that bus.
> 
> Algorithm for iova scheme selection for PCI bus:
> 0. Look for device attached to vfio kdrv and has .drv_flag set
> to RTE_PCI_DRV_NEED_IOVA_VA.
> 1. Look for any device attached to UIO class of driver.
> 2. Check for vfio-noiommu mode enabled.
> 
> If 1) & 2) is false and 0) is true then select
> mapping scheme as iova=va. Otherwise use default
> mapping scheme (iova_pa).
> 
> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> ---
> v1 --> v2:
> - Removed Linux version check in vfio_noiommu func. Refer [1].
> - Extending autodetction logic for _iommu_class.
> Refer [2].
> 
> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
> 
>   lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
>   lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
>   lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>   lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>   4 files changed, 90 insertions(+)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
> index 7d9e1a99b..573caa000 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> @@ -45,6 +45,7 @@
>   #include "eal_filesystem.h"
>   #include "eal_private.h"
>   #include "eal_pci_init.h"
> +#include "eal_vfio.h"
>   
>   /**
>    * @file
> @@ -488,6 +489,71 @@ rte_pci_scan(void)
>   	return -1;
>   }
>   
> +/*
> + * Any one of the device bound to uio
> + */
> +static inline int
> +pci_device_bound_uio(void)
> +{
> +	struct rte_pci_device *dev = NULL;
> +
> +	FOREACH_DEVICE_ON_PCIBUS(dev) {
> +		if (dev->kdrv == RTE_KDRV_IGB_UIO ||
> +		   dev->kdrv == RTE_KDRV_UIO_GENERIC) {
> +			return 1;
> +		}
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Any one of the device has iova as va
> + */
> +static inline int
> +pci_device_has_iova_va(void)
> +{
> +	struct rte_pci_device *dev = NULL;
> +	struct rte_pci_driver *drv = NULL;
> +
> +	FOREACH_DRIVER_ON_PCIBUS(drv) {
> +		if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
> +			FOREACH_DEVICE_ON_PCIBUS(dev) {
> +				if (dev->kdrv == RTE_KDRV_VFIO &&
> +				    rte_pci_match(drv, dev))
> +					return 1;
> +			}
> +		}
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Get iommu class of PCI devices on the bus.
> + */
> +enum rte_iova_mode
> +rte_pci_get_iommu_class(void)
> +{
> +	bool is_vfio_noiommu_enabled;
> +	bool has_iova_va;
> +	bool is_bound_uio;
> +
> +	has_iova_va = pci_device_has_iova_va();
> +	is_bound_uio = pci_device_bound_uio();
> +	is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
> +
> +	if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
> +		return RTE_IOVA_VA;
> +
> +	if (has_iova_va) {
> +		if (is_vfio_noiommu_enabled)
> +			RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
> +		if (is_bound_uio)
> +			RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");

Maybe worth having more verbose warning for user not familiar with the
feature. Like stating that some devices want VA but PA will be used
because of vfio-noiommu or UIO.

Maxime
  
Santosh Shukla July 11, 2017, 10:43 a.m. UTC | #2
On Tuesday 11 July 2017 02:53 PM, Maxime Coquelin wrote:

>
> On 07/11/2017 08:16 AM, Santosh Shukla wrote:
>> Get iommu class of PCI device on the bus and returns preferred iova
>> mapping mode for that bus.
>>
>> Algorithm for iova scheme selection for PCI bus:
>> 0. Look for device attached to vfio kdrv and has .drv_flag set
>> to RTE_PCI_DRV_NEED_IOVA_VA.
>> 1. Look for any device attached to UIO class of driver.
>> 2. Check for vfio-noiommu mode enabled.
>>
>> If 1) & 2) is false and 0) is true then select
>> mapping scheme as iova=va. Otherwise use default
>> mapping scheme (iova_pa).
>>
>> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
>> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
>> ---
>> v1 --> v2:
>> - Removed Linux version check in vfio_noiommu func. Refer [1].
>> - Extending autodetction logic for _iommu_class.
>> Refer [2].
>>
>> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
>> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
>>
>>   lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
>>   lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
>>   lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>>   lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>>   4 files changed, 90 insertions(+)
>>
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
>> index 7d9e1a99b..573caa000 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>> @@ -45,6 +45,7 @@
>>   #include "eal_filesystem.h"
>>   #include "eal_private.h"
>>   #include "eal_pci_init.h"
>> +#include "eal_vfio.h"
>>     /**
>>    * @file
>> @@ -488,6 +489,71 @@ rte_pci_scan(void)
>>       return -1;
>>   }
>>   +/*
>> + * Any one of the device bound to uio
>> + */
>> +static inline int
>> +pci_device_bound_uio(void)
>> +{
>> +    struct rte_pci_device *dev = NULL;
>> +
>> +    FOREACH_DEVICE_ON_PCIBUS(dev) {
>> +        if (dev->kdrv == RTE_KDRV_IGB_UIO ||
>> +           dev->kdrv == RTE_KDRV_UIO_GENERIC) {
>> +            return 1;
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +/*
>> + * Any one of the device has iova as va
>> + */
>> +static inline int
>> +pci_device_has_iova_va(void)
>> +{
>> +    struct rte_pci_device *dev = NULL;
>> +    struct rte_pci_driver *drv = NULL;
>> +
>> +    FOREACH_DRIVER_ON_PCIBUS(drv) {
>> +        if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
>> +            FOREACH_DEVICE_ON_PCIBUS(dev) {
>> +                if (dev->kdrv == RTE_KDRV_VFIO &&
>> +                    rte_pci_match(drv, dev))
>> +                    return 1;
>> +            }
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +/*
>> + * Get iommu class of PCI devices on the bus.
>> + */
>> +enum rte_iova_mode
>> +rte_pci_get_iommu_class(void)
>> +{
>> +    bool is_vfio_noiommu_enabled;
>> +    bool has_iova_va;
>> +    bool is_bound_uio;
>> +
>> +    has_iova_va = pci_device_has_iova_va();
>> +    is_bound_uio = pci_device_bound_uio();
>> +    is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
>> +
>> +    if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
>> +        return RTE_IOVA_VA;
>> +
>> +    if (has_iova_va) {
>> +        if (is_vfio_noiommu_enabled)
>> +            RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
>> +        if (is_bound_uio)
>> +            RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");
>
> Maybe worth having more verbose warning for user not familiar with the
> feature. Like stating that some devices want VA but PA will be used
> because of vfio-noiommu or UIO.
>
Yes. in v4.
Thanks.

> Maxime
  
Sergio Gonzalez Monroy July 12, 2017, 8:20 a.m. UTC | #3
On 11/07/2017 07:16, Santosh Shukla wrote:
> Get iommu class of PCI device on the bus and returns preferred iova
> mapping mode for that bus.
>
> Algorithm for iova scheme selection for PCI bus:
> 0. Look for device attached to vfio kdrv and has .drv_flag set
> to RTE_PCI_DRV_NEED_IOVA_VA.
> 1. Look for any device attached to UIO class of driver.
> 2. Check for vfio-noiommu mode enabled.
>
> If 1) & 2) is false and 0) is true then select
> mapping scheme as iova=va. Otherwise use default
> mapping scheme (iova_pa).
>
> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> ---
> v1 --> v2:
> - Removed Linux version check in vfio_noiommu func. Refer [1].
> - Extending autodetction logic for _iommu_class.
> Refer [2].
>
> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html

Just wondering how it all works with device hotplug.
Correct me if I am wrong but if EAL decides to use IOVA_AS_VA scheme,
if we were to attach a device that needs IOVA_AS_PA, it will not work 
and should fail to attach, right?

Thanks,
Sergio

>   lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
>   lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
>   lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>   lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>   4 files changed, 90 insertions(+)
>
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
> index 7d9e1a99b..573caa000 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> @@ -45,6 +45,7 @@
>   #include "eal_filesystem.h"
>   #include "eal_private.h"
>   #include "eal_pci_init.h"
> +#include "eal_vfio.h"
>   
>   /**
>    * @file
> @@ -488,6 +489,71 @@ rte_pci_scan(void)
>   	return -1;
>   }
>   
> +/*
> + * Any one of the device bound to uio
> + */
> +static inline int
> +pci_device_bound_uio(void)
> +{
> +	struct rte_pci_device *dev = NULL;
> +
> +	FOREACH_DEVICE_ON_PCIBUS(dev) {
> +		if (dev->kdrv == RTE_KDRV_IGB_UIO ||
> +		   dev->kdrv == RTE_KDRV_UIO_GENERIC) {
> +			return 1;
> +		}
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Any one of the device has iova as va
> + */
> +static inline int
> +pci_device_has_iova_va(void)
> +{
> +	struct rte_pci_device *dev = NULL;
> +	struct rte_pci_driver *drv = NULL;
> +
> +	FOREACH_DRIVER_ON_PCIBUS(drv) {
> +		if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
> +			FOREACH_DEVICE_ON_PCIBUS(dev) {
> +				if (dev->kdrv == RTE_KDRV_VFIO &&
> +				    rte_pci_match(drv, dev))
> +					return 1;
> +			}
> +		}
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Get iommu class of PCI devices on the bus.
> + */
> +enum rte_iova_mode
> +rte_pci_get_iommu_class(void)
> +{
> +	bool is_vfio_noiommu_enabled;
> +	bool has_iova_va;
> +	bool is_bound_uio;
> +
> +	has_iova_va = pci_device_has_iova_va();
> +	is_bound_uio = pci_device_bound_uio();
> +	is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
> +
> +	if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
> +		return RTE_IOVA_VA;
> +
> +	if (has_iova_va) {
> +		if (is_vfio_noiommu_enabled)
> +			RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
> +		if (is_bound_uio)
> +			RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");
> +	}
> +
> +	return RTE_IOVA_PA;
> +}
> +
>   /* Read PCI config space. */
>   int rte_pci_read_config(const struct rte_pci_device *device,
>   		void *buf, size_t len, off_t offset)
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
> index 946df7e31..c8a97b7e7 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
> @@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
>   	return 0;
>   }
>   
> +int
> +vfio_noiommu_is_enabled(void)
> +{
> +	int fd, ret, cnt __rte_unused;
> +	char c;
> +
> +	ret = -1;
> +	fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
> +	if (fd < 0)
> +		return -1;
> +
> +	cnt = read(fd, &c, 1);
> +	if (c == 'Y')
> +		ret = 1;
> +
> +	close(fd);
> +	return ret;
> +}
> +
>   #endif
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
> index 5ff63e5d7..26ea8e119 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
> @@ -150,6 +150,8 @@ struct vfio_config {
>   #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
>   #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
>   #define VFIO_GET_REGION_IDX(x) (x >> 40)
> +#define VFIO_NOIOMMU_MODE      \
> +	"/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
>   
>   /* DMA mapping function prototype.
>    * Takes VFIO container fd as a parameter.
> @@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void);
>   
>   int vfio_mp_sync_setup(void);
>   
> +int vfio_noiommu_is_enabled(void);
> +
>   #define SOCKET_REQ_CONTAINER 0x100
>   #define SOCKET_REQ_GROUP 0x200
>   #define SOCKET_CLR_GROUP 0x300
> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> index c91dd44c4..044f89c7c 100644
> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> @@ -206,6 +206,7 @@ DPDK_17.08 {
>   	rte_bus_find_by_device;
>   	rte_bus_find_by_name;
>   	rte_pci_match;
> +	rte_pci_get_iommu_class;
>   
>   } DPDK_17.05;
>
  
Santosh Shukla July 13, 2017, 8:23 a.m. UTC | #4
Hi Sergio,

On Wednesday 12 July 2017 01:50 PM, Sergio Gonzalez Monroy wrote:

> On 11/07/2017 07:16, Santosh Shukla wrote:
>> Get iommu class of PCI device on the bus and returns preferred iova
>> mapping mode for that bus.
>>
>> Algorithm for iova scheme selection for PCI bus:
>> 0. Look for device attached to vfio kdrv and has .drv_flag set
>> to RTE_PCI_DRV_NEED_IOVA_VA.
>> 1. Look for any device attached to UIO class of driver.
>> 2. Check for vfio-noiommu mode enabled.
>>
>> If 1) & 2) is false and 0) is true then select
>> mapping scheme as iova=va. Otherwise use default
>> mapping scheme (iova_pa).
>>
>> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
>> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
>> ---
>> v1 --> v2:
>> - Removed Linux version check in vfio_noiommu func. Refer [1].
>> - Extending autodetction logic for _iommu_class.
>> Refer [2].
>>
>> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
>> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
>
> Just wondering how it all works with device hotplug.
> Correct me if I am wrong but if EAL decides to use IOVA_AS_VA scheme,
> if we were to attach a device that needs IOVA_AS_PA, it will not work and should fail to attach, right?
>
It will work for igb_uio case, and won't work for vfio-noiommu hotplug case(Invalid case).

Yes, we can dictate iova awareness to hotplug/unplug area.

> Thanks,
> Sergio
>
>>   lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
>>   lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
>>   lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>>   lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>>   4 files changed, 90 insertions(+)
>>
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
>> index 7d9e1a99b..573caa000 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>> @@ -45,6 +45,7 @@
>>   #include "eal_filesystem.h"
>>   #include "eal_private.h"
>>   #include "eal_pci_init.h"
>> +#include "eal_vfio.h"
>>     /**
>>    * @file
>> @@ -488,6 +489,71 @@ rte_pci_scan(void)
>>       return -1;
>>   }
>>   +/*
>> + * Any one of the device bound to uio
>> + */
>> +static inline int
>> +pci_device_bound_uio(void)
>> +{
>> +    struct rte_pci_device *dev = NULL;
>> +
>> +    FOREACH_DEVICE_ON_PCIBUS(dev) {
>> +        if (dev->kdrv == RTE_KDRV_IGB_UIO ||
>> +           dev->kdrv == RTE_KDRV_UIO_GENERIC) {
>> +            return 1;
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +/*
>> + * Any one of the device has iova as va
>> + */
>> +static inline int
>> +pci_device_has_iova_va(void)
>> +{
>> +    struct rte_pci_device *dev = NULL;
>> +    struct rte_pci_driver *drv = NULL;
>> +
>> +    FOREACH_DRIVER_ON_PCIBUS(drv) {
>> +        if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
>> +            FOREACH_DEVICE_ON_PCIBUS(dev) {
>> +                if (dev->kdrv == RTE_KDRV_VFIO &&
>> +                    rte_pci_match(drv, dev))
>> +                    return 1;
>> +            }
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +/*
>> + * Get iommu class of PCI devices on the bus.
>> + */
>> +enum rte_iova_mode
>> +rte_pci_get_iommu_class(void)
>> +{
>> +    bool is_vfio_noiommu_enabled;
>> +    bool has_iova_va;
>> +    bool is_bound_uio;
>> +
>> +    has_iova_va = pci_device_has_iova_va();
>> +    is_bound_uio = pci_device_bound_uio();
>> +    is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
>> +
>> +    if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
>> +        return RTE_IOVA_VA;
>> +
>> +    if (has_iova_va) {
>> +        if (is_vfio_noiommu_enabled)
>> +            RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
>> +        if (is_bound_uio)
>> +            RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");
>> +    }
>> +
>> +    return RTE_IOVA_PA;
>> +}
>> +
>>   /* Read PCI config space. */
>>   int rte_pci_read_config(const struct rte_pci_device *device,
>>           void *buf, size_t len, off_t offset)
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>> index 946df7e31..c8a97b7e7 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>> @@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
>>       return 0;
>>   }
>>   +int
>> +vfio_noiommu_is_enabled(void)
>> +{
>> +    int fd, ret, cnt __rte_unused;
>> +    char c;
>> +
>> +    ret = -1;
>> +    fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
>> +    if (fd < 0)
>> +        return -1;
>> +
>> +    cnt = read(fd, &c, 1);
>> +    if (c == 'Y')
>> +        ret = 1;
>> +
>> +    close(fd);
>> +    return ret;
>> +}
>> +
>>   #endif
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>> index 5ff63e5d7..26ea8e119 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>> @@ -150,6 +150,8 @@ struct vfio_config {
>>   #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
>>   #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
>>   #define VFIO_GET_REGION_IDX(x) (x >> 40)
>> +#define VFIO_NOIOMMU_MODE      \
>> +    "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
>>     /* DMA mapping function prototype.
>>    * Takes VFIO container fd as a parameter.
>> @@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void);
>>     int vfio_mp_sync_setup(void);
>>   +int vfio_noiommu_is_enabled(void);
>> +
>>   #define SOCKET_REQ_CONTAINER 0x100
>>   #define SOCKET_REQ_GROUP 0x200
>>   #define SOCKET_CLR_GROUP 0x300
>> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>> index c91dd44c4..044f89c7c 100644
>> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>> @@ -206,6 +206,7 @@ DPDK_17.08 {
>>       rte_bus_find_by_device;
>>       rte_bus_find_by_name;
>>       rte_pci_match;
>> +    rte_pci_get_iommu_class;
>>     } DPDK_17.05;
>>   
>
>
  
Hemant Agrawal July 14, 2017, 7:39 a.m. UTC | #5
On 7/11/2017 11:46 AM, Santosh Shukla wrote:
> Get iommu class of PCI device on the bus and returns preferred iova
> mapping mode for that bus.
>
> Algorithm for iova scheme selection for PCI bus:
> 0. Look for device attached to vfio kdrv and has .drv_flag set
> to RTE_PCI_DRV_NEED_IOVA_VA.
> 1. Look for any device attached to UIO class of driver.
> 2. Check for vfio-noiommu mode enabled.
>
> If 1) & 2) is false and 0) is true then select
> mapping scheme as iova=va. Otherwise use default
> mapping scheme (iova_pa).
>
> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> ---
> v1 --> v2:
> - Removed Linux version check in vfio_noiommu func. Refer [1].
> - Extending autodetction logic for _iommu_class.
> Refer [2].
>
> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
>
>  lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
>  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
>  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>  4 files changed, 90 insertions(+)
>
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
> index 7d9e1a99b..573caa000 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> @@ -45,6 +45,7 @@
>  #include "eal_filesystem.h"
>  #include "eal_private.h"
>  #include "eal_pci_init.h"
> +#include "eal_vfio.h"
>
>  /**
>   * @file
> @@ -488,6 +489,71 @@ rte_pci_scan(void)
>  	return -1;
>  }
>
> +/*
> + * Any one of the device bound to uio
> + */
> +static inline int
> +pci_device_bound_uio(void)
> +{
> +	struct rte_pci_device *dev = NULL;
> +
> +	FOREACH_DEVICE_ON_PCIBUS(dev) {
> +		if (dev->kdrv == RTE_KDRV_IGB_UIO ||
> +		   dev->kdrv == RTE_KDRV_UIO_GENERIC) {
> +			return 1;
> +		}
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Any one of the device has iova as va
> + */
> +static inline int
> +pci_device_has_iova_va(void)
> +{
> +	struct rte_pci_device *dev = NULL;
> +	struct rte_pci_driver *drv = NULL;
> +
> +	FOREACH_DRIVER_ON_PCIBUS(drv) {
> +		if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
> +			FOREACH_DEVICE_ON_PCIBUS(dev) {
> +				if (dev->kdrv == RTE_KDRV_VFIO &&
> +				    rte_pci_match(drv, dev))
> +					return 1;
> +			}
> +		}
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Get iommu class of PCI devices on the bus.
> + */
> +enum rte_iova_mode
> +rte_pci_get_iommu_class(void)
> +{
> +	bool is_vfio_noiommu_enabled;
> +	bool has_iova_va;
> +	bool is_bound_uio;
> +
> +	has_iova_va = pci_device_has_iova_va();
> +	is_bound_uio = pci_device_bound_uio();
> +	is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
> +
> +	if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
> +		return RTE_IOVA_VA;
> +

PCI is generally present in all platform including dpaa2.
There may not be any device found or available for dpdk usages in such 
cases. The PCI bus will still return RTE_IOVA_PA, which will make the 
system mode as PA.

> +	if (has_iova_va) {
> +		if (is_vfio_noiommu_enabled)
> +			RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
> +		if (is_bound_uio)
> +			RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");
> +	}
> +
> +	return RTE_IOVA_PA;
> +}
> +
>  /* Read PCI config space. */
>  int rte_pci_read_config(const struct rte_pci_device *device,
>  		void *buf, size_t len, off_t offset)
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
> index 946df7e31..c8a97b7e7 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
> @@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
>  	return 0;
>  }
>
> +int
> +vfio_noiommu_is_enabled(void)
> +{
> +	int fd, ret, cnt __rte_unused;
> +	char c;
> +
> +	ret = -1;
> +	fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
> +	if (fd < 0)
> +		return -1;
> +
> +	cnt = read(fd, &c, 1);
> +	if (c == 'Y')
> +		ret = 1;
> +
> +	close(fd);
> +	return ret;
> +}
> +
>  #endif
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
> index 5ff63e5d7..26ea8e119 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
> @@ -150,6 +150,8 @@ struct vfio_config {
>  #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
>  #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
>  #define VFIO_GET_REGION_IDX(x) (x >> 40)
> +#define VFIO_NOIOMMU_MODE      \
> +	"/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
>
>  /* DMA mapping function prototype.
>   * Takes VFIO container fd as a parameter.
> @@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void);
>
>  int vfio_mp_sync_setup(void);
>
> +int vfio_noiommu_is_enabled(void);
> +
>  #define SOCKET_REQ_CONTAINER 0x100
>  #define SOCKET_REQ_GROUP 0x200
>  #define SOCKET_CLR_GROUP 0x300
> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> index c91dd44c4..044f89c7c 100644
> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> @@ -206,6 +206,7 @@ DPDK_17.08 {
>  	rte_bus_find_by_device;
>  	rte_bus_find_by_name;
>  	rte_pci_match;
> +	rte_pci_get_iommu_class;
>
>  } DPDK_17.05;
>
>
  
Sergio Gonzalez Monroy July 14, 2017, 7:43 a.m. UTC | #6
On 13/07/2017 09:23, santosh wrote:
> Hi Sergio,
>
> On Wednesday 12 July 2017 01:50 PM, Sergio Gonzalez Monroy wrote:
>
>> On 11/07/2017 07:16, Santosh Shukla wrote:
>>> Get iommu class of PCI device on the bus and returns preferred iova
>>> mapping mode for that bus.
>>>
>>> Algorithm for iova scheme selection for PCI bus:
>>> 0. Look for device attached to vfio kdrv and has .drv_flag set
>>> to RTE_PCI_DRV_NEED_IOVA_VA.
>>> 1. Look for any device attached to UIO class of driver.
>>> 2. Check for vfio-noiommu mode enabled.
>>>
>>> If 1) & 2) is false and 0) is true then select
>>> mapping scheme as iova=va. Otherwise use default
>>> mapping scheme (iova_pa).
>>>
>>> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
>>> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
>>> ---
>>> v1 --> v2:
>>> - Removed Linux version check in vfio_noiommu func. Refer [1].
>>> - Extending autodetction logic for _iommu_class.
>>> Refer [2].
>>>
>>> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
>>> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
>> Just wondering how it all works with device hotplug.
>> Correct me if I am wrong but if EAL decides to use IOVA_AS_VA scheme,
>> if we were to attach a device that needs IOVA_AS_PA, it will not work and should fail to attach, right?
>>
> It will work for igb_uio case, and won't work for vfio-noiommu hotplug case(Invalid case).

Why are those two cases (igb_uio, vfio-noiommu) different? do they not 
have the same requirements, ie. need IOVA_PA sheme?

Thanks,
Sergio

> Yes, we can dictate iova awareness to hotplug/unplug area.
>
>> Thanks,
>> Sergio
>>
>>>    lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
>>>    lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
>>>    lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>>>    lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>>>    4 files changed, 90 insertions(+)
>>>
>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> index 7d9e1a99b..573caa000 100644
>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> @@ -45,6 +45,7 @@
>>>    #include "eal_filesystem.h"
>>>    #include "eal_private.h"
>>>    #include "eal_pci_init.h"
>>> +#include "eal_vfio.h"
>>>      /**
>>>     * @file
>>> @@ -488,6 +489,71 @@ rte_pci_scan(void)
>>>        return -1;
>>>    }
>>>    +/*
>>> + * Any one of the device bound to uio
>>> + */
>>> +static inline int
>>> +pci_device_bound_uio(void)
>>> +{
>>> +    struct rte_pci_device *dev = NULL;
>>> +
>>> +    FOREACH_DEVICE_ON_PCIBUS(dev) {
>>> +        if (dev->kdrv == RTE_KDRV_IGB_UIO ||
>>> +           dev->kdrv == RTE_KDRV_UIO_GENERIC) {
>>> +            return 1;
>>> +        }
>>> +    }
>>> +    return 0;
>>> +}
>>> +
>>> +/*
>>> + * Any one of the device has iova as va
>>> + */
>>> +static inline int
>>> +pci_device_has_iova_va(void)
>>> +{
>>> +    struct rte_pci_device *dev = NULL;
>>> +    struct rte_pci_driver *drv = NULL;
>>> +
>>> +    FOREACH_DRIVER_ON_PCIBUS(drv) {
>>> +        if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
>>> +            FOREACH_DEVICE_ON_PCIBUS(dev) {
>>> +                if (dev->kdrv == RTE_KDRV_VFIO &&
>>> +                    rte_pci_match(drv, dev))
>>> +                    return 1;
>>> +            }
>>> +        }
>>> +    }
>>> +    return 0;
>>> +}
>>> +
>>> +/*
>>> + * Get iommu class of PCI devices on the bus.
>>> + */
>>> +enum rte_iova_mode
>>> +rte_pci_get_iommu_class(void)
>>> +{
>>> +    bool is_vfio_noiommu_enabled;
>>> +    bool has_iova_va;
>>> +    bool is_bound_uio;
>>> +
>>> +    has_iova_va = pci_device_has_iova_va();
>>> +    is_bound_uio = pci_device_bound_uio();
>>> +    is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
>>> +
>>> +    if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
>>> +        return RTE_IOVA_VA;
>>> +
>>> +    if (has_iova_va) {
>>> +        if (is_vfio_noiommu_enabled)
>>> +            RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
>>> +        if (is_bound_uio)
>>> +            RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");
>>> +    }
>>> +
>>> +    return RTE_IOVA_PA;
>>> +}
>>> +
>>>    /* Read PCI config space. */
>>>    int rte_pci_read_config(const struct rte_pci_device *device,
>>>            void *buf, size_t len, off_t offset)
>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>> index 946df7e31..c8a97b7e7 100644
>>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>> @@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
>>>        return 0;
>>>    }
>>>    +int
>>> +vfio_noiommu_is_enabled(void)
>>> +{
>>> +    int fd, ret, cnt __rte_unused;
>>> +    char c;
>>> +
>>> +    ret = -1;
>>> +    fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
>>> +    if (fd < 0)
>>> +        return -1;
>>> +
>>> +    cnt = read(fd, &c, 1);
>>> +    if (c == 'Y')
>>> +        ret = 1;
>>> +
>>> +    close(fd);
>>> +    return ret;
>>> +}
>>> +
>>>    #endif
>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>> index 5ff63e5d7..26ea8e119 100644
>>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>> @@ -150,6 +150,8 @@ struct vfio_config {
>>>    #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
>>>    #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
>>>    #define VFIO_GET_REGION_IDX(x) (x >> 40)
>>> +#define VFIO_NOIOMMU_MODE      \
>>> +    "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
>>>      /* DMA mapping function prototype.
>>>     * Takes VFIO container fd as a parameter.
>>> @@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void);
>>>      int vfio_mp_sync_setup(void);
>>>    +int vfio_noiommu_is_enabled(void);
>>> +
>>>    #define SOCKET_REQ_CONTAINER 0x100
>>>    #define SOCKET_REQ_GROUP 0x200
>>>    #define SOCKET_CLR_GROUP 0x300
>>> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> index c91dd44c4..044f89c7c 100644
>>> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> @@ -206,6 +206,7 @@ DPDK_17.08 {
>>>        rte_bus_find_by_device;
>>>        rte_bus_find_by_name;
>>>        rte_pci_match;
>>> +    rte_pci_get_iommu_class;
>>>      } DPDK_17.05;
>>>    
>>
  
Santosh Shukla July 14, 2017, 7:55 a.m. UTC | #7
On Friday 14 July 2017 01:09 PM, Hemant Agrawal wrote:

> On 7/11/2017 11:46 AM, Santosh Shukla wrote:
>> Get iommu class of PCI device on the bus and returns preferred iova
>> mapping mode for that bus.
>>
>> Algorithm for iova scheme selection for PCI bus:
>> 0. Look for device attached to vfio kdrv and has .drv_flag set
>> to RTE_PCI_DRV_NEED_IOVA_VA.
>> 1. Look for any device attached to UIO class of driver.
>> 2. Check for vfio-noiommu mode enabled.
>>
>> If 1) & 2) is false and 0) is true then select
>> mapping scheme as iova=va. Otherwise use default
>> mapping scheme (iova_pa).
>>
>> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
>> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
>> ---
>> v1 --> v2:
>> - Removed Linux version check in vfio_noiommu func. Refer [1].
>> - Extending autodetction logic for _iommu_class.
>> Refer [2].
>>
>> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
>> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
>>
>>  lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
>>  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
>>  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>>  4 files changed, 90 insertions(+)
>>
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
>> index 7d9e1a99b..573caa000 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>> @@ -45,6 +45,7 @@
>>  #include "eal_filesystem.h"
>>  #include "eal_private.h"
>>  #include "eal_pci_init.h"
>> +#include "eal_vfio.h"
>>
>>  /**
>>   * @file
>> @@ -488,6 +489,71 @@ rte_pci_scan(void)
>>      return -1;
>>  }
>>
>> +/*
>> + * Any one of the device bound to uio
>> + */
>> +static inline int
>> +pci_device_bound_uio(void)
>> +{
>> +    struct rte_pci_device *dev = NULL;
>> +
>> +    FOREACH_DEVICE_ON_PCIBUS(dev) {
>> +        if (dev->kdrv == RTE_KDRV_IGB_UIO ||
>> +           dev->kdrv == RTE_KDRV_UIO_GENERIC) {
>> +            return 1;
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +/*
>> + * Any one of the device has iova as va
>> + */
>> +static inline int
>> +pci_device_has_iova_va(void)
>> +{
>> +    struct rte_pci_device *dev = NULL;
>> +    struct rte_pci_driver *drv = NULL;
>> +
>> +    FOREACH_DRIVER_ON_PCIBUS(drv) {
>> +        if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
>> +            FOREACH_DEVICE_ON_PCIBUS(dev) {
>> +                if (dev->kdrv == RTE_KDRV_VFIO &&
>> +                    rte_pci_match(drv, dev))
>> +                    return 1;
>> +            }
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +/*
>> + * Get iommu class of PCI devices on the bus.
>> + */
>> +enum rte_iova_mode
>> +rte_pci_get_iommu_class(void)
>> +{
>> +    bool is_vfio_noiommu_enabled;
>> +    bool has_iova_va;
>> +    bool is_bound_uio;
>> +
>> +    has_iova_va = pci_device_has_iova_va();
>> +    is_bound_uio = pci_device_bound_uio();
>> +    is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
>> +
>> +    if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
>> +        return RTE_IOVA_VA;
>> +
>
> PCI is generally present in all platform including dpaa2.
> There may not be any device found or available for dpdk usages in such cases. The PCI bus will still return RTE_IOVA_PA, which will make the system mode as PA.
>
That's the expected behavior. And implementation makes sure
that PCI_bus return default mode aka _PA if no-pci device found.

Isn't code taking care of same?

Let me walk through the code:

has_iova_va = 0 (if no pci device then pci_device_has_iov_va() will return 0).

And if (has_iova_va & ,,,) will fail therefore rte_pci_get_iommu_class() retuns RTE_IOVA_PA mode.
which is default mode. Right?

>> +    if (has_iova_va) {
>> +        if (is_vfio_noiommu_enabled)
>> +            RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
>> +        if (is_bound_uio)
>> +            RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");
>> +    }
>> +
>> +    return RTE_IOVA_PA;
>> +}
>> +
>>  /* Read PCI config space. */
>>  int rte_pci_read_config(const struct rte_pci_device *device,
>>          void *buf, size_t len, off_t offset)
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>> index 946df7e31..c8a97b7e7 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>> @@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
>>      return 0;
>>  }
>>
>> +int
>> +vfio_noiommu_is_enabled(void)
>> +{
>> +    int fd, ret, cnt __rte_unused;
>> +    char c;
>> +
>> +    ret = -1;
>> +    fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
>> +    if (fd < 0)
>> +        return -1;
>> +
>> +    cnt = read(fd, &c, 1);
>> +    if (c == 'Y')
>> +        ret = 1;
>> +
>> +    close(fd);
>> +    return ret;
>> +}
>> +
>>  #endif
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>> index 5ff63e5d7..26ea8e119 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>> @@ -150,6 +150,8 @@ struct vfio_config {
>>  #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
>>  #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
>>  #define VFIO_GET_REGION_IDX(x) (x >> 40)
>> +#define VFIO_NOIOMMU_MODE      \
>> +    "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
>>
>>  /* DMA mapping function prototype.
>>   * Takes VFIO container fd as a parameter.
>> @@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void);
>>
>>  int vfio_mp_sync_setup(void);
>>
>> +int vfio_noiommu_is_enabled(void);
>> +
>>  #define SOCKET_REQ_CONTAINER 0x100
>>  #define SOCKET_REQ_GROUP 0x200
>>  #define SOCKET_CLR_GROUP 0x300
>> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>> index c91dd44c4..044f89c7c 100644
>> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>> @@ -206,6 +206,7 @@ DPDK_17.08 {
>>      rte_bus_find_by_device;
>>      rte_bus_find_by_name;
>>      rte_pci_match;
>> +    rte_pci_get_iommu_class;
>>
>>  } DPDK_17.05;
>>
>>
>
>
  
Hemant Agrawal July 14, 2017, 8:06 a.m. UTC | #8
On 7/14/2017 1:25 PM, santosh wrote:
> On Friday 14 July 2017 01:09 PM, Hemant Agrawal wrote:
>
>> On 7/11/2017 11:46 AM, Santosh Shukla wrote:
>>> Get iommu class of PCI device on the bus and returns preferred iova
>>> mapping mode for that bus.
>>>
>>> Algorithm for iova scheme selection for PCI bus:
>>> 0. Look for device attached to vfio kdrv and has .drv_flag set
>>> to RTE_PCI_DRV_NEED_IOVA_VA.
>>> 1. Look for any device attached to UIO class of driver.
>>> 2. Check for vfio-noiommu mode enabled.
>>>
>>> If 1) & 2) is false and 0) is true then select
>>> mapping scheme as iova=va. Otherwise use default
>>> mapping scheme (iova_pa).
>>>
>>> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
>>> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
>>> ---
>>> v1 --> v2:
>>> - Removed Linux version check in vfio_noiommu func. Refer [1].
>>> - Extending autodetction logic for _iommu_class.
>>> Refer [2].
>>>
>>> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
>>> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
>>>
>>>  lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
>>>  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
>>>  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>>>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>>>  4 files changed, 90 insertions(+)
>>>
>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> index 7d9e1a99b..573caa000 100644
>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> @@ -45,6 +45,7 @@
>>>  #include "eal_filesystem.h"
>>>  #include "eal_private.h"
>>>  #include "eal_pci_init.h"
>>> +#include "eal_vfio.h"
>>>
>>>  /**
>>>   * @file
>>> @@ -488,6 +489,71 @@ rte_pci_scan(void)
>>>      return -1;
>>>  }
>>>
>>> +/*
>>> + * Any one of the device bound to uio
>>> + */
>>> +static inline int
>>> +pci_device_bound_uio(void)
>>> +{
>>> +    struct rte_pci_device *dev = NULL;
>>> +
>>> +    FOREACH_DEVICE_ON_PCIBUS(dev) {
>>> +        if (dev->kdrv == RTE_KDRV_IGB_UIO ||
>>> +           dev->kdrv == RTE_KDRV_UIO_GENERIC) {
>>> +            return 1;
>>> +        }
>>> +    }
>>> +    return 0;
>>> +}
>>> +
>>> +/*
>>> + * Any one of the device has iova as va
>>> + */
>>> +static inline int
>>> +pci_device_has_iova_va(void)
>>> +{
>>> +    struct rte_pci_device *dev = NULL;
>>> +    struct rte_pci_driver *drv = NULL;
>>> +
>>> +    FOREACH_DRIVER_ON_PCIBUS(drv) {
>>> +        if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
>>> +            FOREACH_DEVICE_ON_PCIBUS(dev) {
>>> +                if (dev->kdrv == RTE_KDRV_VFIO &&
>>> +                    rte_pci_match(drv, dev))
>>> +                    return 1;
>>> +            }
>>> +        }
>>> +    }
>>> +    return 0;
>>> +}
>>> +
>>> +/*
>>> + * Get iommu class of PCI devices on the bus.
>>> + */
>>> +enum rte_iova_mode
>>> +rte_pci_get_iommu_class(void)
>>> +{
>>> +    bool is_vfio_noiommu_enabled;
>>> +    bool has_iova_va;
>>> +    bool is_bound_uio;
>>> +
>>> +    has_iova_va = pci_device_has_iova_va();
>>> +    is_bound_uio = pci_device_bound_uio();
>>> +    is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
>>> +
>>> +    if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
>>> +        return RTE_IOVA_VA;
>>> +
>>
>> PCI is generally present in all platform including dpaa2.
>> There may not be any device found or available for dpdk usages in such cases. The PCI bus will still return RTE_IOVA_PA, which will make the system mode as PA.
>>
> That's the expected behavior. And implementation makes sure
> that PCI_bus return default mode aka _PA if no-pci device found.
>
> Isn't code taking care of same?
>

I have attached a PCI device to the board. But it is being managed by 
kernel only.

EAL: PCI device 0000:01:00.0 on NUMA socket 0
EAL:   probe driver: 8086:10d3 net_e1000_em
EAL:   Not managed by a supported kernel driver, skipped

So, there are devices in the PCI list. But none of them is probed or 
being used by dpdk.


> Let me walk through the code:
>
> has_iova_va = 0 (if no pci device then pci_device_has_iov_va() will return 0).
>
> And if (has_iova_va & ,,,) will fail therefore rte_pci_get_iommu_class() retuns RTE_IOVA_PA mode.
> which is default mode. Right?
>
This will create issue for the 2nd bus, which is a VA bus. The combined 
mode will becomes '3', so the system mode will be PA.

>>> +    if (has_iova_va) {
>>> +        if (is_vfio_noiommu_enabled)
>>> +            RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
>>> +        if (is_bound_uio)
>>> +            RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");
>>> +    }
>>> +
>>> +    return RTE_IOVA_PA;
>>> +}
>>> +
>>>  /* Read PCI config space. */
>>>  int rte_pci_read_config(const struct rte_pci_device *device,
>>>          void *buf, size_t len, off_t offset)
>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>> index 946df7e31..c8a97b7e7 100644
>>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>> @@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
>>>      return 0;
>>>  }
>>>
>>> +int
>>> +vfio_noiommu_is_enabled(void)
>>> +{
>>> +    int fd, ret, cnt __rte_unused;
>>> +    char c;
>>> +
>>> +    ret = -1;
>>> +    fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
>>> +    if (fd < 0)
>>> +        return -1;
>>> +
>>> +    cnt = read(fd, &c, 1);
>>> +    if (c == 'Y')
>>> +        ret = 1;
>>> +
>>> +    close(fd);
>>> +    return ret;
>>> +}
>>> +
>>>  #endif
>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>> index 5ff63e5d7..26ea8e119 100644
>>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>> @@ -150,6 +150,8 @@ struct vfio_config {
>>>  #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
>>>  #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
>>>  #define VFIO_GET_REGION_IDX(x) (x >> 40)
>>> +#define VFIO_NOIOMMU_MODE      \
>>> +    "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
>>>
>>>  /* DMA mapping function prototype.
>>>   * Takes VFIO container fd as a parameter.
>>> @@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void);
>>>
>>>  int vfio_mp_sync_setup(void);
>>>
>>> +int vfio_noiommu_is_enabled(void);
>>> +
>>>  #define SOCKET_REQ_CONTAINER 0x100
>>>  #define SOCKET_REQ_GROUP 0x200
>>>  #define SOCKET_CLR_GROUP 0x300
>>> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> index c91dd44c4..044f89c7c 100644
>>> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> @@ -206,6 +206,7 @@ DPDK_17.08 {
>>>      rte_bus_find_by_device;
>>>      rte_bus_find_by_name;
>>>      rte_pci_match;
>>> +    rte_pci_get_iommu_class;
>>>
>>>  } DPDK_17.05;
>>>
>>>
>>
>>
>
>
  
Santosh Shukla July 14, 2017, 8:11 a.m. UTC | #9
Hi Sergio,

On Friday 14 July 2017 01:13 PM, Sergio Gonzalez Monroy wrote:

> On 13/07/2017 09:23, santosh wrote:
>> Hi Sergio,
>>
>> On Wednesday 12 July 2017 01:50 PM, Sergio Gonzalez Monroy wrote:
>>
>>> On 11/07/2017 07:16, Santosh Shukla wrote:
>>>> Get iommu class of PCI device on the bus and returns preferred iova
>>>> mapping mode for that bus.
>>>>
>>>> Algorithm for iova scheme selection for PCI bus:
>>>> 0. Look for device attached to vfio kdrv and has .drv_flag set
>>>> to RTE_PCI_DRV_NEED_IOVA_VA.
>>>> 1. Look for any device attached to UIO class of driver.
>>>> 2. Check for vfio-noiommu mode enabled.
>>>>
>>>> If 1) & 2) is false and 0) is true then select
>>>> mapping scheme as iova=va. Otherwise use default
>>>> mapping scheme (iova_pa).
>>>>
>>>> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
>>>> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
>>>> ---
>>>> v1 --> v2:
>>>> - Removed Linux version check in vfio_noiommu func. Refer [1].
>>>> - Extending autodetction logic for _iommu_class.
>>>> Refer [2].
>>>>
>>>> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
>>>> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
>>> Just wondering how it all works with device hotplug.
>>> Correct me if I am wrong but if EAL decides to use IOVA_AS_VA scheme,
>>> if we were to attach a device that needs IOVA_AS_PA, it will not work and should fail to attach, right?
>>>
>> It will work for igb_uio case, and won't work for vfio-noiommu hotplug case(Invalid case).
>
> Why are those two cases (igb_uio, vfio-noiommu) different? do they not have the same requirements, ie. need IOVA_PA sheme?
>
Behavior remains same.

For vfio-noiommu case in the context of hot-plugging - Rest of the VFIO(/iommu) devices will be functionally
effected thats why mentioned invalid case.

> Thanks,
> Sergio
>
>> Yes, we can dictate iova awareness to hotplug/unplug area.
>>
>>> Thanks,
>>> Sergio
>>>
>>>>    lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
>>>>    lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
>>>>    lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>>>>    lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>>>>    4 files changed, 90 insertions(+)
>>>>
>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> index 7d9e1a99b..573caa000 100644
>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> @@ -45,6 +45,7 @@
>>>>    #include "eal_filesystem.h"
>>>>    #include "eal_private.h"
>>>>    #include "eal_pci_init.h"
>>>> +#include "eal_vfio.h"
>>>>      /**
>>>>     * @file
>>>> @@ -488,6 +489,71 @@ rte_pci_scan(void)
>>>>        return -1;
>>>>    }
>>>>    +/*
>>>> + * Any one of the device bound to uio
>>>> + */
>>>> +static inline int
>>>> +pci_device_bound_uio(void)
>>>> +{
>>>> +    struct rte_pci_device *dev = NULL;
>>>> +
>>>> +    FOREACH_DEVICE_ON_PCIBUS(dev) {
>>>> +        if (dev->kdrv == RTE_KDRV_IGB_UIO ||
>>>> +           dev->kdrv == RTE_KDRV_UIO_GENERIC) {
>>>> +            return 1;
>>>> +        }
>>>> +    }
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +/*
>>>> + * Any one of the device has iova as va
>>>> + */
>>>> +static inline int
>>>> +pci_device_has_iova_va(void)
>>>> +{
>>>> +    struct rte_pci_device *dev = NULL;
>>>> +    struct rte_pci_driver *drv = NULL;
>>>> +
>>>> +    FOREACH_DRIVER_ON_PCIBUS(drv) {
>>>> +        if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
>>>> +            FOREACH_DEVICE_ON_PCIBUS(dev) {
>>>> +                if (dev->kdrv == RTE_KDRV_VFIO &&
>>>> +                    rte_pci_match(drv, dev))
>>>> +                    return 1;
>>>> +            }
>>>> +        }
>>>> +    }
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +/*
>>>> + * Get iommu class of PCI devices on the bus.
>>>> + */
>>>> +enum rte_iova_mode
>>>> +rte_pci_get_iommu_class(void)
>>>> +{
>>>> +    bool is_vfio_noiommu_enabled;
>>>> +    bool has_iova_va;
>>>> +    bool is_bound_uio;
>>>> +
>>>> +    has_iova_va = pci_device_has_iova_va();
>>>> +    is_bound_uio = pci_device_bound_uio();
>>>> +    is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
>>>> +
>>>> +    if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
>>>> +        return RTE_IOVA_VA;
>>>> +
>>>> +    if (has_iova_va) {
>>>> +        if (is_vfio_noiommu_enabled)
>>>> +            RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
>>>> +        if (is_bound_uio)
>>>> +            RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");
>>>> +    }
>>>> +
>>>> +    return RTE_IOVA_PA;
>>>> +}
>>>> +
>>>>    /* Read PCI config space. */
>>>>    int rte_pci_read_config(const struct rte_pci_device *device,
>>>>            void *buf, size_t len, off_t offset)
>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>>> index 946df7e31..c8a97b7e7 100644
>>>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>>> @@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
>>>>        return 0;
>>>>    }
>>>>    +int
>>>> +vfio_noiommu_is_enabled(void)
>>>> +{
>>>> +    int fd, ret, cnt __rte_unused;
>>>> +    char c;
>>>> +
>>>> +    ret = -1;
>>>> +    fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
>>>> +    if (fd < 0)
>>>> +        return -1;
>>>> +
>>>> +    cnt = read(fd, &c, 1);
>>>> +    if (c == 'Y')
>>>> +        ret = 1;
>>>> +
>>>> +    close(fd);
>>>> +    return ret;
>>>> +}
>>>> +
>>>>    #endif
>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>>> index 5ff63e5d7..26ea8e119 100644
>>>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>>> @@ -150,6 +150,8 @@ struct vfio_config {
>>>>    #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
>>>>    #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
>>>>    #define VFIO_GET_REGION_IDX(x) (x >> 40)
>>>> +#define VFIO_NOIOMMU_MODE      \
>>>> +    "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
>>>>      /* DMA mapping function prototype.
>>>>     * Takes VFIO container fd as a parameter.
>>>> @@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void);
>>>>      int vfio_mp_sync_setup(void);
>>>>    +int vfio_noiommu_is_enabled(void);
>>>> +
>>>>    #define SOCKET_REQ_CONTAINER 0x100
>>>>    #define SOCKET_REQ_GROUP 0x200
>>>>    #define SOCKET_CLR_GROUP 0x300
>>>> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>>> index c91dd44c4..044f89c7c 100644
>>>> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>>> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>>> @@ -206,6 +206,7 @@ DPDK_17.08 {
>>>>        rte_bus_find_by_device;
>>>>        rte_bus_find_by_name;
>>>>        rte_pci_match;
>>>> +    rte_pci_get_iommu_class;
>>>>      } DPDK_17.05;
>>>>    
>>>
>
  
Santosh Shukla July 14, 2017, 8:46 a.m. UTC | #10
On Friday 14 July 2017 01:36 PM, Hemant Agrawal wrote:

> On 7/14/2017 1:25 PM, santosh wrote:
>> On Friday 14 July 2017 01:09 PM, Hemant Agrawal wrote:
>>
>>> On 7/11/2017 11:46 AM, Santosh Shukla wrote:
>>>> Get iommu class of PCI device on the bus and returns preferred iova
>>>> mapping mode for that bus.
>>>>
>>>> Algorithm for iova scheme selection for PCI bus:
>>>> 0. Look for device attached to vfio kdrv and has .drv_flag set
>>>> to RTE_PCI_DRV_NEED_IOVA_VA.
>>>> 1. Look for any device attached to UIO class of driver.
>>>> 2. Check for vfio-noiommu mode enabled.
>>>>
>>>> If 1) & 2) is false and 0) is true then select
>>>> mapping scheme as iova=va. Otherwise use default
>>>> mapping scheme (iova_pa).
>>>>
>>>> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
>>>> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
>>>> ---
>>>> v1 --> v2:
>>>> - Removed Linux version check in vfio_noiommu func. Refer [1].
>>>> - Extending autodetction logic for _iommu_class.
>>>> Refer [2].
>>>>
>>>> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
>>>> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
>>>>
>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
>>>>  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
>>>>  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>>>>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>>>>  4 files changed, 90 insertions(+)
>>>>
>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> index 7d9e1a99b..573caa000 100644
>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> @@ -45,6 +45,7 @@
>>>>  #include "eal_filesystem.h"
>>>>  #include "eal_private.h"
>>>>  #include "eal_pci_init.h"
>>>> +#include "eal_vfio.h"
>>>>
>>>>  /**
>>>>   * @file
>>>> @@ -488,6 +489,71 @@ rte_pci_scan(void)
>>>>      return -1;
>>>>  }
>>>>
>>>> +/*
>>>> + * Any one of the device bound to uio
>>>> + */
>>>> +static inline int
>>>> +pci_device_bound_uio(void)
>>>> +{
>>>> +    struct rte_pci_device *dev = NULL;
>>>> +
>>>> +    FOREACH_DEVICE_ON_PCIBUS(dev) {
>>>> +        if (dev->kdrv == RTE_KDRV_IGB_UIO ||
>>>> +           dev->kdrv == RTE_KDRV_UIO_GENERIC) {
>>>> +            return 1;
>>>> +        }
>>>> +    }
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +/*
>>>> + * Any one of the device has iova as va
>>>> + */
>>>> +static inline int
>>>> +pci_device_has_iova_va(void)
>>>> +{
>>>> +    struct rte_pci_device *dev = NULL;
>>>> +    struct rte_pci_driver *drv = NULL;
>>>> +
>>>> +    FOREACH_DRIVER_ON_PCIBUS(drv) {
>>>> +        if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
>>>> +            FOREACH_DEVICE_ON_PCIBUS(dev) {
>>>> +                if (dev->kdrv == RTE_KDRV_VFIO &&
>>>> +                    rte_pci_match(drv, dev))
>>>> +                    return 1;
>>>> +            }
>>>> +        }
>>>> +    }
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +/*
>>>> + * Get iommu class of PCI devices on the bus.
>>>> + */
>>>> +enum rte_iova_mode
>>>> +rte_pci_get_iommu_class(void)
>>>> +{
>>>> +    bool is_vfio_noiommu_enabled;
>>>> +    bool has_iova_va;
>>>> +    bool is_bound_uio;
>>>> +
>>>> +    has_iova_va = pci_device_has_iova_va();
>>>> +    is_bound_uio = pci_device_bound_uio();
>>>> +    is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
>>>> +
>>>> +    if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
>>>> +        return RTE_IOVA_VA;
>>>> +
>>>
>>> PCI is generally present in all platform including dpaa2.
>>> There may not be any device found or available for dpdk usages in such cases. The PCI bus will still return RTE_IOVA_PA, which will make the system mode as PA.
>>>
>> That's the expected behavior. And implementation makes sure
>> that PCI_bus return default mode aka _PA if no-pci device found.
>>
>> Isn't code taking care of same?
>>
>
> I have attached a PCI device to the board. But it is being managed by kernel only.
>
> EAL: PCI device 0000:01:00.0 on NUMA socket 0
> EAL:   probe driver: 8086:10d3 net_e1000_em
> EAL:   Not managed by a supported kernel driver, skipped
>
> So, there are devices in the PCI list. But none of them is probed or being used by dpdk.
>
>
Therefore _pci_get_iommu_class scan result would be _PA, As no device bound to dpdk.

>> Let me walk through the code:
>>
>> has_iova_va = 0 (if no pci device then pci_device_has_iov_va() will return 0).
>>
>> And if (has_iova_va & ,,,) will fail therefore rte_pci_get_iommu_class() retuns RTE_IOVA_PA mode.
>> which is default mode. Right?
>>
> This will create issue for the 2nd bus, which is a VA bus. The combined mode will becomes '3', so the system mode will be PA.
>
Yes, If both modes detected at two different bus 
then policy is to use default iova mapping mode across the buses(which is _pa).

Are you operating on two different mode like _pa for PCI-bus and _va for fslmc bus in dpaa2? 

>>>> +    if (has_iova_va) {
>>>> +        if (is_vfio_noiommu_enabled)
>>>> +            RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
>>>> +        if (is_bound_uio)
>>>> +            RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");
>>>> +    }
>>>> +
>>>> +    return RTE_IOVA_PA;
>>>> +}
>>>> +
>>>>  /* Read PCI config space. */
>>>>  int rte_pci_read_config(const struct rte_pci_device *device,
>>>>          void *buf, size_t len, off_t offset)
>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>>> index 946df7e31..c8a97b7e7 100644
>>>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
>>>> @@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
>>>>      return 0;
>>>>  }
>>>>
>>>> +int
>>>> +vfio_noiommu_is_enabled(void)
>>>> +{
>>>> +    int fd, ret, cnt __rte_unused;
>>>> +    char c;
>>>> +
>>>> +    ret = -1;
>>>> +    fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
>>>> +    if (fd < 0)
>>>> +        return -1;
>>>> +
>>>> +    cnt = read(fd, &c, 1);
>>>> +    if (c == 'Y')
>>>> +        ret = 1;
>>>> +
>>>> +    close(fd);
>>>> +    return ret;
>>>> +}
>>>> +
>>>>  #endif
>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>>> index 5ff63e5d7..26ea8e119 100644
>>>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
>>>> @@ -150,6 +150,8 @@ struct vfio_config {
>>>>  #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
>>>>  #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
>>>>  #define VFIO_GET_REGION_IDX(x) (x >> 40)
>>>> +#define VFIO_NOIOMMU_MODE      \
>>>> +    "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
>>>>
>>>>  /* DMA mapping function prototype.
>>>>   * Takes VFIO container fd as a parameter.
>>>> @@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void);
>>>>
>>>>  int vfio_mp_sync_setup(void);
>>>>
>>>> +int vfio_noiommu_is_enabled(void);
>>>> +
>>>>  #define SOCKET_REQ_CONTAINER 0x100
>>>>  #define SOCKET_REQ_GROUP 0x200
>>>>  #define SOCKET_CLR_GROUP 0x300
>>>> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>>> index c91dd44c4..044f89c7c 100644
>>>> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>>> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>>> @@ -206,6 +206,7 @@ DPDK_17.08 {
>>>>      rte_bus_find_by_device;
>>>>      rte_bus_find_by_name;
>>>>      rte_pci_match;
>>>> +    rte_pci_get_iommu_class;
>>>>
>>>>  } DPDK_17.05;
>>>>
>>>>
>>>
>>>
>>
>>
>
>
  
Santosh Shukla July 14, 2017, 9:13 a.m. UTC | #11
On Friday 14 July 2017 02:16 PM, santosh wrote:

> On Friday 14 July 2017 01:36 PM, Hemant Agrawal wrote:
>
>> On 7/14/2017 1:25 PM, santosh wrote:
>>> On Friday 14 July 2017 01:09 PM, Hemant Agrawal wrote:
>>>
>>>> On 7/11/2017 11:46 AM, Santosh Shukla wrote:
>>>>> Get iommu class of PCI device on the bus and returns preferred iova
>>>>> mapping mode for that bus.
>>>>>
>>>>> Algorithm for iova scheme selection for PCI bus:
>>>>> 0. Look for device attached to vfio kdrv and has .drv_flag set
>>>>> to RTE_PCI_DRV_NEED_IOVA_VA.
>>>>> 1. Look for any device attached to UIO class of driver.
>>>>> 2. Check for vfio-noiommu mode enabled.
>>>>>
>>>>> If 1) & 2) is false and 0) is true then select
>>>>> mapping scheme as iova=va. Otherwise use default
>>>>> mapping scheme (iova_pa).
>>>>>
>>>>> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
>>>>> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
>>>>> ---
>>>>> v1 --> v2:
>>>>> - Removed Linux version check in vfio_noiommu func. Refer [1].
>>>>> - Extending autodetction logic for _iommu_class.
>>>>> Refer [2].
>>>>>
>>>>> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
>>>>> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
>>>>>
>>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c           | 66 +++++++++++++++++++++++++
>>>>>  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++++
>>>>>  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>>>>>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>>>>>  4 files changed, 90 insertions(+)
>>>>>
>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>> index 7d9e1a99b..573caa000 100644
>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>> @@ -45,6 +45,7 @@
>>>>>  #include "eal_filesystem.h"
>>>>>  #include "eal_private.h"
>>>>>  #include "eal_pci_init.h"
>>>>> +#include "eal_vfio.h"
>>>>>
>>>>>  /**
>>>>>   * @file
>>>>> @@ -488,6 +489,71 @@ rte_pci_scan(void)
>>>>>      return -1;
>>>>>  }
>>>>>
>>>>> +/*
>>>>> + * Any one of the device bound to uio
>>>>> + */
>>>>> +static inline int
>>>>> +pci_device_bound_uio(void)
>>>>> +{
>>>>> +    struct rte_pci_device *dev = NULL;
>>>>> +
>>>>> +    FOREACH_DEVICE_ON_PCIBUS(dev) {
>>>>> +        if (dev->kdrv == RTE_KDRV_IGB_UIO ||
>>>>> +           dev->kdrv == RTE_KDRV_UIO_GENERIC) {
>>>>> +            return 1;
>>>>> +        }
>>>>> +    }
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> + * Any one of the device has iova as va
>>>>> + */
>>>>> +static inline int
>>>>> +pci_device_has_iova_va(void)
>>>>> +{
>>>>> +    struct rte_pci_device *dev = NULL;
>>>>> +    struct rte_pci_driver *drv = NULL;
>>>>> +
>>>>> +    FOREACH_DRIVER_ON_PCIBUS(drv) {
>>>>> +        if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
>>>>> +            FOREACH_DEVICE_ON_PCIBUS(dev) {
>>>>> +                if (dev->kdrv == RTE_KDRV_VFIO &&
>>>>> +                    rte_pci_match(drv, dev))
>>>>> +                    return 1;
>>>>> +            }
>>>>> +        }
>>>>> +    }
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> + * Get iommu class of PCI devices on the bus.
>>>>> + */
>>>>> +enum rte_iova_mode
>>>>> +rte_pci_get_iommu_class(void)
>>>>> +{
>>>>> +    bool is_vfio_noiommu_enabled;
>>>>> +    bool has_iova_va;
>>>>> +    bool is_bound_uio;
>>>>> +
>>>>> +    has_iova_va = pci_device_has_iova_va();
>>>>> +    is_bound_uio = pci_device_bound_uio();
>>>>> +    is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
>>>>> +
>>>>> +    if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
>>>>> +        return RTE_IOVA_VA;
>>>>> +
>>>> PCI is generally present in all platform including dpaa2.
>>>> There may not be any device found or available for dpdk usages in such cases. The PCI bus will still return RTE_IOVA_PA, which will make the system mode as PA.
>>>>
>>> That's the expected behavior. And implementation makes sure
>>> that PCI_bus return default mode aka _PA if no-pci device found.
>>>
>>> Isn't code taking care of same?
>>>
>> I have attached a PCI device to the board. But it is being managed by kernel only.
>>
>> EAL: PCI device 0000:01:00.0 on NUMA socket 0
>> EAL:   probe driver: 8086:10d3 net_e1000_em
>> EAL:   Not managed by a supported kernel driver, skipped
>>
>> So, there are devices in the PCI list. But none of them is probed or being used by dpdk.
>>
>>
> Therefore _pci_get_iommu_class scan result would be _PA, As no device bound to dpdk.
>
>>> Let me walk through the code:
>>>
>>> has_iova_va = 0 (if no pci device then pci_device_has_iov_va() will return 0).
>>>
>>> And if (has_iova_va & ,,,) will fail therefore rte_pci_get_iommu_class() retuns RTE_IOVA_PA mode.
>>> which is default mode. Right?
>>>
>> This will create issue for the 2nd bus, which is a VA bus. The combined mode will becomes '3', so the system mode will be PA.
>>
> Yes, If both modes detected at two different bus 
> then policy is to use default iova mapping mode across the buses(which is _pa).
>
> Are you operating on two different mode like _pa for PCI-bus and _va for fslmc bus in dpaa2? 

Is vfio kernel infrastructure for dpaa2 allows case like below:
0) Use PCI- vfio(/iommu) mode and map vfio.dma_map to RTE_IOVA_PA
AND
1) Use platform/fslmc vfio-platform mode and map vfio.dma_map to RTE_IOVA_VA?

Does dpaa2 supports?

(Speculating) Lets say if dpaa2 platform supports above case 
 then will you see any issue if both buses using default iova_mapping (_pa),
like dpdk pci has currently?
  

Patch

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 7d9e1a99b..573caa000 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -45,6 +45,7 @@ 
 #include "eal_filesystem.h"
 #include "eal_private.h"
 #include "eal_pci_init.h"
+#include "eal_vfio.h"
 
 /**
  * @file
@@ -488,6 +489,71 @@  rte_pci_scan(void)
 	return -1;
 }
 
+/*
+ * Any one of the device bound to uio
+ */
+static inline int
+pci_device_bound_uio(void)
+{
+	struct rte_pci_device *dev = NULL;
+
+	FOREACH_DEVICE_ON_PCIBUS(dev) {
+		if (dev->kdrv == RTE_KDRV_IGB_UIO ||
+		   dev->kdrv == RTE_KDRV_UIO_GENERIC) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Any one of the device has iova as va
+ */
+static inline int
+pci_device_has_iova_va(void)
+{
+	struct rte_pci_device *dev = NULL;
+	struct rte_pci_driver *drv = NULL;
+
+	FOREACH_DRIVER_ON_PCIBUS(drv) {
+		if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
+			FOREACH_DEVICE_ON_PCIBUS(dev) {
+				if (dev->kdrv == RTE_KDRV_VFIO &&
+				    rte_pci_match(drv, dev))
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+/*
+ * Get iommu class of PCI devices on the bus.
+ */
+enum rte_iova_mode
+rte_pci_get_iommu_class(void)
+{
+	bool is_vfio_noiommu_enabled;
+	bool has_iova_va;
+	bool is_bound_uio;
+
+	has_iova_va = pci_device_has_iova_va();
+	is_bound_uio = pci_device_bound_uio();
+	is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
+
+	if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
+		return RTE_IOVA_VA;
+
+	if (has_iova_va) {
+		if (is_vfio_noiommu_enabled)
+			RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
+		if (is_bound_uio)
+			RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");
+	}
+
+	return RTE_IOVA_PA;
+}
+
 /* Read PCI config space. */
 int rte_pci_read_config(const struct rte_pci_device *device,
 		void *buf, size_t len, off_t offset)
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 946df7e31..c8a97b7e7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -816,4 +816,23 @@  vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
 	return 0;
 }
 
+int
+vfio_noiommu_is_enabled(void)
+{
+	int fd, ret, cnt __rte_unused;
+	char c;
+
+	ret = -1;
+	fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	cnt = read(fd, &c, 1);
+	if (c == 'Y')
+		ret = 1;
+
+	close(fd);
+	return ret;
+}
+
 #endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 5ff63e5d7..26ea8e119 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -150,6 +150,8 @@  struct vfio_config {
 #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
 #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
 #define VFIO_GET_REGION_IDX(x) (x >> 40)
+#define VFIO_NOIOMMU_MODE      \
+	"/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
 
 /* DMA mapping function prototype.
  * Takes VFIO container fd as a parameter.
@@ -210,6 +212,8 @@  int pci_vfio_is_enabled(void);
 
 int vfio_mp_sync_setup(void);
 
+int vfio_noiommu_is_enabled(void);
+
 #define SOCKET_REQ_CONTAINER 0x100
 #define SOCKET_REQ_GROUP 0x200
 #define SOCKET_CLR_GROUP 0x300
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index c91dd44c4..044f89c7c 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -206,6 +206,7 @@  DPDK_17.08 {
 	rte_bus_find_by_device;
 	rte_bus_find_by_name;
 	rte_pci_match;
+	rte_pci_get_iommu_class;
 
 } DPDK_17.05;