[v2] pci: read amd iommu virtual address width

Message ID 20220914134950.3675770-1-mpiszczek@ddn.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [v2] pci: read amd iommu virtual address width |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/Intel-compilation fail Compilation issues
ci/intel-Testing success Testing PASS

Commit Message

Michael Piszczek Sept. 14, 2022, 1:49 p.m. UTC
  Add code to read the virtual address width for AMD processors.

Signed-off-by: Michael Piszczek <mpiszczek@ddn.com>
---
 drivers/bus/pci/linux/pci.c | 43 +++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
  

Comments

David Marchand Oct. 3, 2022, 7:48 a.m. UTC | #1
On Thu, Sep 15, 2022 at 8:40 AM Michael Piszczek <mpiszczek@ddn.com> wrote:
>
> Add code to read the virtual address width for AMD processors.
>
> Signed-off-by: Michael Piszczek <mpiszczek@ddn.com>
> ---
>  drivers/bus/pci/linux/pci.c | 43 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 43 insertions(+)
>
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index e521459870..0c6d79ca74 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -4,6 +4,7 @@
>
>  #include <string.h>
>  #include <dirent.h>
> +#include <sys/stat.h>
>
>  #include <rte_log.h>
>  #include <rte_bus.h>
> @@ -492,6 +493,38 @@ rte_pci_scan(void)
>  }
>
>  #if defined(RTE_ARCH_X86)
> +
> +static uint64_t
> +pci_device_amd_iommu_support_va(const struct rte_pci_addr *addr)
> +{
> +#define RD_AMD_CAP_VASIZE_SHIFT 15
> +#define RD_AMD_CAP_VASIZE_MASK (0x7F << RD_AMD_CAP_VASIZE_SHIFT)
> +
> +       char filename[PATH_MAX];
> +       FILE *fp;
> +       uint64_t amd_cap_reg = 0;
> +
> +       snprintf(filename, sizeof(filename),
> +               "%s/" PCI_PRI_FMT "/iommu/amd-iommu/cap",
> +               rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
> +               addr->function);
> +
> +       fp = fopen(filename, "r");
> +       if (fp == NULL)
> +               return 0;
> +
> +       /* We have an Amd IOMMU */
> +       if (fscanf(fp, "%" PRIx64, &amd_cap_reg) != 1) {
> +               RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
> +               fclose(fp);
> +               return 0;
> +       }
> +
> +       fclose(fp);
> +
> +       return ((amd_cap_reg & RD_AMD_CAP_VASIZE_MASK) >> RD_AMD_CAP_VASIZE_SHIFT) + 1;
> +}
> +
>  bool
>  pci_device_iommu_support_va(const struct rte_pci_device *dev)
>  {
> @@ -501,6 +534,16 @@ pci_device_iommu_support_va(const struct rte_pci_device *dev)
>         char filename[PATH_MAX];
>         FILE *fp;
>         uint64_t mgaw, vtd_cap_reg = 0;
> +       struct stat s;
> +
> +       if (stat("/sys/class/iommu/ivhd2/amd-iommu", &s) == 0) {
> +               mgaw = pci_device_amd_iommu_support_va(addr);
> +               if (mgaw > 0) {
> +                       rte_mem_set_dma_mask(mgaw);
> +                       return true;
> +               }
> +               return false;
> +       }
>
>         snprintf(filename, sizeof(filename),
>                  "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
> --
> 2.34.1
>

Sorry, I picked you guys with @amd.com mail addresses.
If there is someone with knowledge of this piece of AMD hw available,
can this patch be reviewed?
Thanks.
  
Vipin Varghese Oct. 10, 2022, 1:12 p.m. UTC | #2
[Public]

Hi Michael,

Thank you for looking into the change, please find my comments shared below

<snipped>

> >
> > Add code to read the virtual address width for AMD processors.
> >
> > Signed-off-by: Michael Piszczek <mpiszczek@ddn.com>
> > ---
> >  drivers/bus/pci/linux/pci.c | 43
> > +++++++++++++++++++++++++++++++++++++
> >  1 file changed, 43 insertions(+)
> >
> > diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> > index e521459870..0c6d79ca74 100644
> > --- a/drivers/bus/pci/linux/pci.c
> > +++ b/drivers/bus/pci/linux/pci.c
> > @@ -4,6 +4,7 @@
> >
> >  #include <string.h>
> >  #include <dirent.h>
> > +#include <sys/stat.h>
> >
> >  #include <rte_log.h>
> >  #include <rte_bus.h>
> > @@ -492,6 +493,38 @@ rte_pci_scan(void)  }
> >
> >  #if defined(RTE_ARCH_X86)
> > +
> > +static uint64_t
> > +pci_device_amd_iommu_support_va(const struct rte_pci_addr *addr) {
> > +#define RD_AMD_CAP_VASIZE_SHIFT 15 #define
> RD_AMD_CAP_VASIZE_MASK
> > +(0x7F << RD_AMD_CAP_VASIZE_SHIFT)
> > +
> > +       char filename[PATH_MAX];
> > +       FILE *fp;
> > +       uint64_t amd_cap_reg = 0;
> > +
> > +       snprintf(filename, sizeof(filename),
> > +               "%s/" PCI_PRI_FMT "/iommu/amd-iommu/cap",
> > +               rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
> > +               addr->function);
> > +
> > +       fp = fopen(filename, "r");
> > +       if (fp == NULL)
> > +               return 0;
> > +
> > +       /* We have an Amd IOMMU */
> > +       if (fscanf(fp, "%" PRIx64, &amd_cap_reg) != 1) {
> > +               RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
> > +               fclose(fp);
> > +               return 0;
> > +       }
> > +
> > +       fclose(fp);
> > +
> > +       return ((amd_cap_reg & RD_AMD_CAP_VASIZE_MASK) >>
> > +RD_AMD_CAP_VASIZE_SHIFT) + 1; }
> > +
> >  bool
> >  pci_device_iommu_support_va(const struct rte_pci_device *dev)  { @@
> > -501,6 +534,16 @@ pci_device_iommu_support_va(const struct
> rte_pci_device *dev)
> >         char filename[PATH_MAX];
> >         FILE *fp;
> >         uint64_t mgaw, vtd_cap_reg = 0;
> > +       struct stat s;
> > +
> > +       if (stat("/sys/class/iommu/ivhd2/amd-iommu", &s) == 0) {

AMD EPYC can be configured into various Logical NUMA divisions for both 1P and 2P (sockets) for Naples, Rome and Milan.  This leads various combinations as 1, 2, 4 for a single socket. Hence hard coding the check for ` ivhd2` could fail even for a valid NPS configuration.

> > +               mgaw = pci_device_amd_iommu_support_va(addr);

Instead of checking the device availability, one can exercise the iommu capability directly. For example

```
# ls /sys/bus/pci/devices/0000\:41\:00.0/iommu/amd-iommu/cap -l
-r--r--r-- 1 root root 4096 Oct 10 06:10 /sys/bus/pci/devices/0000:41:00.0/iommu/amd-iommu/cap
```

> > +               if (mgaw > 0) {
> > +                       rte_mem_set_dma_mask(mgaw);
> > +                       return true;
> > +               }
> > +               return false;
> > +       }
> >
> >         snprintf(filename, sizeof(filename),
> >                  "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",

Hence my suggestion would be retaining the same code flow as original function with added check for `amd-iommu` along with `intel-iommu` for x86 devices. The additional function call ` pci_device_amd_iommu_support_va` can be avoided.

> > --
> > 2.34.1
> >
> 
> Sorry, I picked you guys with @amd.com mail addresses.
> If there is someone with knowledge of this piece of AMD hw available, can
> this patch be reviewed?
> Thanks.
> 
> 
> --
> David Marchand
  

Patch

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index e521459870..0c6d79ca74 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -4,6 +4,7 @@ 
 
 #include <string.h>
 #include <dirent.h>
+#include <sys/stat.h>
 
 #include <rte_log.h>
 #include <rte_bus.h>
@@ -492,6 +493,38 @@  rte_pci_scan(void)
 }
 
 #if defined(RTE_ARCH_X86)
+
+static uint64_t
+pci_device_amd_iommu_support_va(const struct rte_pci_addr *addr)
+{
+#define RD_AMD_CAP_VASIZE_SHIFT 15
+#define RD_AMD_CAP_VASIZE_MASK (0x7F << RD_AMD_CAP_VASIZE_SHIFT)
+
+	char filename[PATH_MAX];
+	FILE *fp;
+	uint64_t amd_cap_reg = 0;
+
+	snprintf(filename, sizeof(filename),
+		"%s/" PCI_PRI_FMT "/iommu/amd-iommu/cap",
+		rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
+		addr->function);
+
+	fp = fopen(filename, "r");
+	if (fp == NULL)
+		return 0;
+
+	/* We have an Amd IOMMU */
+	if (fscanf(fp, "%" PRIx64, &amd_cap_reg) != 1) {
+		RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
+		fclose(fp);
+		return 0;
+	}
+
+	fclose(fp);
+
+	return ((amd_cap_reg & RD_AMD_CAP_VASIZE_MASK) >> RD_AMD_CAP_VASIZE_SHIFT) + 1;
+}
+
 bool
 pci_device_iommu_support_va(const struct rte_pci_device *dev)
 {
@@ -501,6 +534,16 @@  pci_device_iommu_support_va(const struct rte_pci_device *dev)
 	char filename[PATH_MAX];
 	FILE *fp;
 	uint64_t mgaw, vtd_cap_reg = 0;
+	struct stat s;
+
+	if (stat("/sys/class/iommu/ivhd2/amd-iommu", &s) == 0) {
+		mgaw = pci_device_amd_iommu_support_va(addr);
+		if (mgaw > 0) {
+			rte_mem_set_dma_mask(mgaw);
+			return true;
+		}
+		return false;
+	}
 
 	snprintf(filename, sizeof(filename),
 		 "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",