pci: support both PIO and MMIO BAR for legacy virtio on x86

Message ID 19840b6a-b08f-4877-a530-15ced259bedf.huawei.xhw@alibaba-inc.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series pci: support both PIO and MMIO BAR for legacy virtio on x86 |

Checks

Context Check Description
ci/travis-robot success Travis build: passed
ci/Intel-compilation fail apply issues

Commit Message

谢华伟(此时此刻) Sept. 15, 2020, 8:18 a.m. UTC
From d0138f24037d8df14cac04c2c24831e4b5d27b8c Mon Sep 17 00:00:00 2001
From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
Date: Mon, 14 Sep 2020 23:44:56 +0800
Subject: [PATCH] pci:  support both PIO and MMIO BAR for legacy virtio on x86

In previous implementation, with igb_uio we get PIO address from igb_uio
sysfs entry; with uio_pci_generic, we get PIO address from
/proc/ioports.
For PIO/MMIO RW, there is different path for different drivers and arch.
For VFIO, PIO/MMIO RW is through syscall, which has big performance
issue.
On X86, it assumes only PIO is supported.

This is too much twisted.
This patch unifies the way to get both PIO and MMIO address for different driver
and arch, all from standard resource attr under pci sysfs.

We distinguish PIO and MMIO by their address. It is ugly but works.

Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
---
 drivers/bus/pci/linux/pci.c     |  89 +---------------------
 drivers/bus/pci/linux/pci_uio.c | 163 +++++++++++++++++++++++++++-------------
 2 files changed, 116 insertions(+), 136 deletions(-)
  

Comments

谢华伟(此时此刻) Sept. 15, 2020, 3:21 p.m. UTC | #1
Hi Ferruh:
Legacy virtio only supports PIO BAR resource. As we need to create lots 
of virtio devices and PIO resource on x86 is very limited, we expose 
MMIO BAR.
Kernel support both PIO and MMIO BAR for legacy virtio device. This 
patch deals with two cases in the similar way.

On 2020/9/15 16:18, 谢华伟(此时此刻) wrote:
>  From d0138f24037d8df14cac04c2c24831e4b5d27b8c Mon Sep 17 00:00:00 2001
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
> Date: Mon, 14 Sep 2020 23:44:56 +0800
> Subject: [PATCH] pci:  support both PIO and MMIO BAR for legacy virtio on x86
>
> In previous implementation, with igb_uio we get PIO address from igb_uio
> sysfs entry; with uio_pci_generic, we get PIO address from
> /proc/ioports.
> For PIO/MMIO RW, there is different path for different drivers and arch.
> For VFIO, PIO/MMIO RW is through syscall, which has big performance
> issue.
> On X86, it assumes only PIO is supported.
>
> This is too much twisted.
> This patch unifies the way to get both PIO and MMIO address for different driver
> and arch, all from standard resource attr under pci sysfs.
>
> We distinguish PIO and MMIO by their address. It is ugly but works.
>
> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
> ---
>   drivers/bus/pci/linux/pci.c     |  89 +---------------------
>   drivers/bus/pci/linux/pci_uio.c | 163 +++++++++++++++++++++++++++-------------
>   2 files changed, 116 insertions(+), 136 deletions(-)
>
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index a2198ab..11cc7df 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -687,71 +687,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>   	}
>   }
>   
> -#if defined(RTE_ARCH_X86)
> -static int
> -pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
> -		struct rte_pci_ioport *p)
> -{
> -	uint16_t start, end;
> -	FILE *fp;
> -	char *line = NULL;
> -	char pci_id[16];
> -	int found = 0;
> -	size_t linesz;
> -
> -	if (rte_eal_iopl_init() != 0) {
> -		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
> -			__func__, dev->name);
> -		return -1;
> -	}
> -
> -	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
> -		 dev->addr.domain, dev->addr.bus,
> -		 dev->addr.devid, dev->addr.function);
> -
> -	fp = fopen("/proc/ioports", "r");
> -	if (fp == NULL) {
> -		RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
> -		return -1;
> -	}
> -
> -	while (getdelim(&line, &linesz, '\n', fp) > 0) {
> -		char *ptr = line;
> -		char *left;
> -		int n;
> -
> -		n = strcspn(ptr, ":");
> -		ptr[n] = 0;
> -		left = &ptr[n + 1];
> -
> -		while (*left && isspace(*left))
> -			left++;
> -
> -		if (!strncmp(left, pci_id, strlen(pci_id))) {
> -			found = 1;
> -
> -			while (*ptr && isspace(*ptr))
> -				ptr++;
> -
> -			sscanf(ptr, "%04hx-%04hx", &start, &end);
> -
> -			break;
> -		}
> -	}
> -
> -	free(line);
> -	fclose(fp);
> -
> -	if (!found)
> -		return -1;
> -
> -	p->base = start;
> -	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
> -
> -	return 0;
> -}
> -#endif
> -
>   int
>   rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
>   		struct rte_pci_ioport *p)
> @@ -762,18 +697,12 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>   #ifdef VFIO_PRESENT
>   	case RTE_KDRV_VFIO:
>   		if (pci_vfio_is_enabled())
> -			ret = pci_vfio_ioport_map(dev, bar, p);
> +			ret = pci_uio_ioport_map(dev, bar, p);
>   		break;
>   #endif
>   	case RTE_KDRV_IGB_UIO:
> -		ret = pci_uio_ioport_map(dev, bar, p);
> -		break;
>   	case RTE_KDRV_UIO_GENERIC:
> -#if defined(RTE_ARCH_X86)
> -		ret = pci_ioport_map(dev, bar, p);
> -#else
>   		ret = pci_uio_ioport_map(dev, bar, p);
> -#endif
>   		break;
>   	default:
>   		break;
> @@ -792,12 +721,10 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>   	switch (p->dev->kdrv) {
>   #ifdef VFIO_PRESENT
>   	case RTE_KDRV_VFIO:
> -		pci_vfio_ioport_read(p, data, len, offset);
> +		pci_uio_ioport_read(p, data, len, offset);
>   		break;
>   #endif
>   	case RTE_KDRV_IGB_UIO:
> -		pci_uio_ioport_read(p, data, len, offset);
> -		break;
>   	case RTE_KDRV_UIO_GENERIC:
>   		pci_uio_ioport_read(p, data, len, offset);
>   		break;
> @@ -813,12 +740,10 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>   	switch (p->dev->kdrv) {
>   #ifdef VFIO_PRESENT
>   	case RTE_KDRV_VFIO:
> -		pci_vfio_ioport_write(p, data, len, offset);
> +		pci_uio_ioport_write(p, data, len, offset);
>   		break;
>   #endif
>   	case RTE_KDRV_IGB_UIO:
> -		pci_uio_ioport_write(p, data, len, offset);
> -		break;
>   	case RTE_KDRV_UIO_GENERIC:
>   		pci_uio_ioport_write(p, data, len, offset);
>   		break;
> @@ -836,18 +761,12 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>   #ifdef VFIO_PRESENT
>   	case RTE_KDRV_VFIO:
>   		if (pci_vfio_is_enabled())
> -			ret = pci_vfio_ioport_unmap(p);
> +			ret = pci_uio_ioport_unmap(p);
>   		break;
>   #endif
>   	case RTE_KDRV_IGB_UIO:
> -		ret = pci_uio_ioport_unmap(p);
> -		break;
>   	case RTE_KDRV_UIO_GENERIC:
> -#if defined(RTE_ARCH_X86)
> -		ret = 0;
> -#else
>   		ret = pci_uio_ioport_unmap(p);
> -#endif
>   		break;
>   	default:
>   		break;
> diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
> index 097dc19..83374e8 100644
> --- a/drivers/bus/pci/linux/pci_uio.c
> +++ b/drivers/bus/pci/linux/pci_uio.c
> @@ -372,52 +372,82 @@
>   pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
>   		   struct rte_pci_ioport *p)
>   {
> +	FILE *f = NULL;
>   	char dirname[PATH_MAX];
>   	char filename[PATH_MAX];
> +	char buf[BUFSIZ];
> +	int64_t phys_addr, end_addr, flags;
>   	int uio_num;
> -	unsigned long start;
> +	unsigned long base;
> +	bool iobar;
> +	int i;
>   
> -	if (rte_eal_iopl_init() != 0) {
> -		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
> -			__func__, dev->name);
> +	/* open and read addresses of the corresponding resource in sysfs */
> +	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
> +		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
> +		dev->addr.devid, dev->addr.function);
> +	f = fopen(filename, "r");
> +	if (f == NULL) {
> +		RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
> +			strerror(errno));
>   		return -1;
>   	}
>   
> -	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
> -	if (uio_num < 0)
> -		return -1;
> +	for (i = 0; i < bar + 1; i++) {
> +		if (fgets(buf, sizeof(buf), f) == NULL) {
> +			RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
> +			goto error;
> +		}
> +	}
> +	if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
> +		&end_addr, &flags) < 0)
> +		goto error;
>   
> -	/* get portio start */
> -	snprintf(filename, sizeof(filename),
> -		 "%s/portio/port%d/start", dirname, bar);
> -	if (eal_parse_sysfs_value(filename, &start) < 0) {
> -		RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
> -			__func__);
> -		return -1;
> +	if (flags & IORESOURCE_IO) {
> +		iobar = 1;
> +		base = (unsigned long)phys_addr;
> +		RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
> +	} else if (flags & IORESOURCE_MEM) {
> +		iobar = 0;
> +		base = (unsigned long)dev->mem_resource[bar].addr;
> +		RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, base);
> +	} else {
> +		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
> +		goto error;
> +	}
> +
> +	if (iobar && rte_eal_iopl_init() != 0) {
> +		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
> +			__func__, dev->name);
> +		goto error;
>   	}
> -	/* ensure we don't get anything funny here, read/write will cast to
> -	 * uin16_t */
> -	if (start > UINT16_MAX)
> -		return -1;
>   
>   	/* FIXME only for primary process ? */
>   	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
> +		uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
> +		if (uio_num < 0)
> +			goto error;
>   
>   		snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
>   		dev->intr_handle.fd = open(filename, O_RDWR);
>   		if (dev->intr_handle.fd < 0) {
>   			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>   				filename, strerror(errno));
> -			return -1;
> +			goto error;
>   		}
>   		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
>   	}
>   
> -	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
> +	RTE_LOG(DEBUG, EAL, "PCI IO port found start=0x%lx\n", base);
>   
> -	p->base = start;
> +	p->base = base;
>   	p->len = 0;
> +	fclose(f);
>   	return 0;
> +error:
> +	if (f)
> +		fclose(f);
> +	return -1;
>   }
>   #else
>   int
> @@ -488,6 +518,61 @@
>   }
>   #endif
>   
> +#define PIO_MAX 0x10000
> +static inline uint8_t ioread8(void *addr)
> +{
> +	uint8_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint8_t *)addr :
> +		inb((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline uint16_t ioread16(void *addr)
> +{
> +	uint16_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint16_t *)addr :
> +		inw((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline uint32_t ioread32(void *addr)
> +{
> +	uint32_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint32_t *)addr :
> +	inl((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline void iowrite8(uint8_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint8_t *)addr = val :
> +		outb(val, (unsigned long)addr);
> +}
> +
> +static inline void iowrite16(uint16_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint16_t *)addr = val :
> +		outw(val, (unsigned long)addr);
> +}
> +
> +static inline void iowrite32(uint32_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint32_t *)addr = val :
> +		outl(val, (unsigned long)addr);
> +}
> +
>   void
>   pci_uio_ioport_read(struct rte_pci_ioport *p,
>   		    void *data, size_t len, off_t offset)
> @@ -499,25 +584,13 @@
>   	for (d = data; len > 0; d += size, reg += size, len -= size) {
>   		if (len >= 4) {
>   			size = 4;
> -#if defined(RTE_ARCH_X86)
> -			*(uint32_t *)d = inl(reg);
> -#else
> -			*(uint32_t *)d = *(volatile uint32_t *)reg;
> -#endif
> +			*(uint32_t *)d = ioread32((void *)reg);
>   		} else if (len >= 2) {
>   			size = 2;
> -#if defined(RTE_ARCH_X86)
> -			*(uint16_t *)d = inw(reg);
> -#else
> -			*(uint16_t *)d = *(volatile uint16_t *)reg;
> -#endif
> +			*(uint16_t *)d = ioread16((void *)reg);
>   		} else {
>   			size = 1;
> -#if defined(RTE_ARCH_X86)
> -			*d = inb(reg);
> -#else
> -			*d = *(volatile uint8_t *)reg;
> -#endif
> +			*d = ioread8((void *)reg);
>   		}
>   	}
>   }
> @@ -533,25 +606,13 @@
>   	for (s = data; len > 0; s += size, reg += size, len -= size) {
>   		if (len >= 4) {
>   			size = 4;
> -#if defined(RTE_ARCH_X86)
> -			outl_p(*(const uint32_t *)s, reg);
> -#else
> -			*(volatile uint32_t *)reg = *(const uint32_t *)s;
> -#endif
> +			iowrite32(*(const uint32_t *)s, (void *)reg);
>   		} else if (len >= 2) {
>   			size = 2;
> -#if defined(RTE_ARCH_X86)
> -			outw_p(*(const uint16_t *)s, reg);
> -#else
> -			*(volatile uint16_t *)reg = *(const uint16_t *)s;
> -#endif
> +			iowrite16(*(const uint16_t *)s, (void *)reg);
>   		} else {
>   			size = 1;
> -#if defined(RTE_ARCH_X86)
> -			outb_p(*s, reg);
> -#else
> -			*(volatile uint8_t *)reg = *s;
> -#endif
> +			iowrite8(*s, (void *)reg);
>   		}
>   	}
>   }
  
Ferruh Yigit Sept. 24, 2020, 6:02 p.m. UTC | #2
On 9/15/2020 4:21 PM, 谢华伟(此时此刻) wrote:
> Hi Ferruh:
> Legacy virtio only supports PIO BAR resource. As we need to create lots 
> of virtio devices and PIO resource on x86 is very limited, we expose 
> MMIO BAR.
> Kernel support both PIO and MMIO BAR for legacy virtio device. This 
> patch deals with two cases in the similar way.
> 

Thanks Huawei,

I was about to ask the actual motivation for the patch, better to put 
this explenation into the commit log.

If legacy virtio device only supports PIO BAR, how exposing MMIO BAR 
helps? Or if there is MMIO BAR available in the device, why not mapping 
them instead of using PIO?
Most probably I am missing something but can you please help to understand.

Also below code unifies vfio & uio cases for PIO into same function. Are 
you using igb_uio or both? I am not sure about unifiying the vfio case, 
what do you think not touching that case in this patch?

There are a few more comments inline, please check.

Btw, the patch doesn't apply cleanly, can you please send a new version 
on top of latest head of repo?



> On 2020/9/15 16:18, 谢华伟(此时此刻) wrote:
>>  From d0138f24037d8df14cac04c2c24831e4b5d27b8c Mon Sep 17 00:00:00 2001
>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>> Date: Mon, 14 Sep 2020 23:44:56 +0800
>> Subject: [PATCH] pci:  support both PIO and MMIO BAR for legacy virtio 
>> on x86
>>
>> In previous implementation, with igb_uio we get PIO address from igb_uio
>> sysfs entry; with uio_pci_generic, we get PIO address from
>> /proc/ioports.
>> For PIO/MMIO RW, there is different path for different drivers and arch.
>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>> issue.
>> On X86, it assumes only PIO is supported.
>>
>> This is too much twisted.
>> This patch unifies the way to get both PIO and MMIO address for 
>> different driver
>> and arch, all from standard resource attr under pci sysfs.
>>
>> We distinguish PIO and MMIO by their address. It is ugly but works.
>>
>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>

<...>

>> --- a/drivers/bus/pci/linux/pci_uio.c
>> +++ b/drivers/bus/pci/linux/pci_uio.c
>> @@ -372,52 +372,82 @@
>>   pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
>>              struct rte_pci_ioport *p)
>>   {

There are two 'pci_uio_ioport_map()', one for x86 and other for rest. 
This one is for x86, since read/write functions for multiple arch 
combined together, should non-x86 version of this function updated or 
removed completely?

<...>

>> +    if (flags & IORESOURCE_IO) {
>> +        iobar = 1;
>> +        base = (unsigned long)phys_addr;
>> +        RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", 
>> __func__, base);
>> +    } else if (flags & IORESOURCE_MEM) {
>> +        iobar = 0;
>> +        base = (unsigned long)dev->mem_resource[bar].addr;

Isn't the 'mem_resource[bar].addr' value set only when it is mapped? For 
this case I guess it will be 0x0. If not who maps it first?

>> +        RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", 
>> __func__, base);
>> +    } else {
>> +        RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
>> +        goto error;
>> +    }
>> +
>> +    if (iobar && rte_eal_iopl_init() != 0) {
>> +        RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for 
>> PCI device %s\n",
>> +            __func__, dev->name);
>> +        goto error;
>>       }
>> -    /* ensure we don't get anything funny here, read/write will cast to
>> -     * uin16_t */
>> -    if (start > UINT16_MAX)
>> -        return -1;
>>       /* FIXME only for primary process ? */
>>       if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
>> +        uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
>> +        if (uio_num < 0)
>> +            goto error;

How this will work with vfio?
  
谢华伟(此时此刻) Sept. 27, 2020, 12:03 p.m. UTC | #3
On 2020/9/25 2:02, Ferruh Yigit wrote:
> On 9/15/2020 4:21 PM, 谢华伟(此时此刻) wrote:
>> Hi Ferruh:
>> Legacy virtio only supports PIO BAR resource. As we need to create 
>> lots of virtio devices and PIO resource on x86 is very limited, we 
>> expose MMIO BAR.
>> Kernel support both PIO and MMIO BAR for legacy virtio device. This 
>> patch deals with two cases in the similar way.
>>
>
> Thanks Huawei,
>
> I was about to ask the actual motivation for the patch, better to put 
> this explenation into the commit log.
>

OK.


> If legacy virtio device only supports PIO BAR, how exposing MMIO BAR 
> helps? Or if there is MMIO BAR available in the device, why not 
> mapping them instead of using PIO?
> Most probably I am missing something but can you please help to 
> understand.
>
DPDK virtio-pmd for legacy device only looks for PIO resource, which 
doesn't work if we expose MMIO bar.

virtio-pci kernel driver doesn't have this requirement. It gets IO 
address either from PIO port or mapped bar. It uses address range to 
distinguish PIO bar and MMIO bar.

That is what this patch fixes.


> Also below code unifies vfio & uio cases for PIO into same function. 
> Are you using igb_uio or both? I am not sure about unifiying the vfio 
> case, what do you think not touching that case in this patch?

All virtio PMD need is to get bar address from pci resouce attribute 
under sysfs like what kernel driver does. UIO/IGB_UIO/VFIO driver path 
for virtio bar mapping is too complicated.

Actually i think we shouldn't put those mapping functions into PCI 
layer, because they are only for virtio legacy device, not generic. I 
don't remember if it is me who put them there.

Besides, VFIO path uses vfio fd to read/write IO bar which has 
performance issue. Syscall is needed to notify virtio backend.


I see IGB_UIO is to be removed in this release.  UIO_PCI_GENERIC doesn't 
support msix. If we expose MMIO bar, we have to use VFIO driver before 
uio_pci_generic kernel driver is enhanced. So i have to fix vfio path.

>
> There are a few more comments inline, please check.
>
> Btw, the patch doesn't apply cleanly, can you please send a new 
> version on top of latest head of repo?
>
>
I will check if we recently applied some pci related patches.

>
>> On 2020/9/15 16:18, 谢华伟(此时此刻) wrote:
>>>  From d0138f24037d8df14cac04c2c24831e4b5d27b8c Mon Sep 17 00:00:00 2001
>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>> Date: Mon, 14 Sep 2020 23:44:56 +0800
>>> Subject: [PATCH] pci:  support both PIO and MMIO BAR for legacy 
>>> virtio on x86
>>>
>>> In previous implementation, with igb_uio we get PIO address from 
>>> igb_uio
>>> sysfs entry; with uio_pci_generic, we get PIO address from
>>> /proc/ioports.
>>> For PIO/MMIO RW, there is different path for different drivers and 
>>> arch.
>>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>>> issue.
>>> On X86, it assumes only PIO is supported.
>>>
>>> This is too much twisted.
>>> This patch unifies the way to get both PIO and MMIO address for 
>>> different driver
>>> and arch, all from standard resource attr under pci sysfs.
>>>
>>> We distinguish PIO and MMIO by their address. It is ugly but works.
>>>
>>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>
> <...>
>
>>> --- a/drivers/bus/pci/linux/pci_uio.c
>>> +++ b/drivers/bus/pci/linux/pci_uio.c
>>> @@ -372,52 +372,82 @@
>>>   pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
>>>              struct rte_pci_ioport *p)
>>>   {
>
> There are two 'pci_uio_ioport_map()', one for x86 and other for rest. 
> This one is for x86, since read/write functions for multiple arch 
> combined together, should non-x86 version of this function updated or 
> removed completely?
>
non x86 version has weird behavior. It requires bar with  IO attribute 
but does the mmap. Besides, it adds mmap address with physical address.

Fortunately, all other devices on non x86 platform seems to use MMIO bar.
> <...>
>
>>> +    if (flags & IORESOURCE_IO) {
>>> +        iobar = 1;
>>> +        base = (unsigned long)phys_addr;
>>> +        RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", 
>>> __func__, base);
>>> +    } else if (flags & IORESOURCE_MEM) {
>>> +        iobar = 0;
>>> +        base = (unsigned long)dev->mem_resource[bar].addr;
>
> Isn't the 'mem_resource[bar].addr' value set only when it is mapped? 
> For this case I guess it will be 0x0. If not who maps it first?

Yes, it is set only when it is MMIO bar which gets mapped. So when it is 
MMIO bar, we use the already maped address in else path.

>
>>> +        RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", 
>>> __func__, base);
>>> +    } else {
>>> +        RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
>>> +        goto error;
>>> +    }
>>> +
>>> +    if (iobar && rte_eal_iopl_init() != 0) {
>>> +        RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions 
>>> for PCI device %s\n",
>>> +            __func__, dev->name);
>>> +        goto error;
>>>       }
>>> -    /* ensure we don't get anything funny here, read/write will 
>>> cast to
>>> -     * uin16_t */
>>> -    if (start > UINT16_MAX)
>>> -        return -1;
>>>       /* FIXME only for primary process ? */
>>>       if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
>>> +        uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
>>> +        if (uio_num < 0)
>>> +            goto error;
>
> How this will work with vfio?

In original implementation, only uio fixes intr handle. I could add UIO 
driver check to keep the same behavior.
  

Patch

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index a2198ab..11cc7df 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -687,71 +687,6 @@  int rte_pci_write_config(const struct rte_pci_device *device,
 	}
 }
 
-#if defined(RTE_ARCH_X86)
-static int
-pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
-		struct rte_pci_ioport *p)
-{
-	uint16_t start, end;
-	FILE *fp;
-	char *line = NULL;
-	char pci_id[16];
-	int found = 0;
-	size_t linesz;
-
-	if (rte_eal_iopl_init() != 0) {
-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
-			__func__, dev->name);
-		return -1;
-	}
-
-	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
-		 dev->addr.domain, dev->addr.bus,
-		 dev->addr.devid, dev->addr.function);
-
-	fp = fopen("/proc/ioports", "r");
-	if (fp == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
-		return -1;
-	}
-
-	while (getdelim(&line, &linesz, '\n', fp) > 0) {
-		char *ptr = line;
-		char *left;
-		int n;
-
-		n = strcspn(ptr, ":");
-		ptr[n] = 0;
-		left = &ptr[n + 1];
-
-		while (*left && isspace(*left))
-			left++;
-
-		if (!strncmp(left, pci_id, strlen(pci_id))) {
-			found = 1;
-
-			while (*ptr && isspace(*ptr))
-				ptr++;
-
-			sscanf(ptr, "%04hx-%04hx", &start, &end);
-
-			break;
-		}
-	}
-
-	free(line);
-	fclose(fp);
-
-	if (!found)
-		return -1;
-
-	p->base = start;
-	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
-
-	return 0;
-}
-#endif
-
 int
 rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
 		struct rte_pci_ioport *p)
@@ -762,18 +697,12 @@  int rte_pci_write_config(const struct rte_pci_device *device,
 #ifdef VFIO_PRESENT
 	case RTE_KDRV_VFIO:
 		if (pci_vfio_is_enabled())
-			ret = pci_vfio_ioport_map(dev, bar, p);
+			ret = pci_uio_ioport_map(dev, bar, p);
 		break;
 #endif
 	case RTE_KDRV_IGB_UIO:
-		ret = pci_uio_ioport_map(dev, bar, p);
-		break;
 	case RTE_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-		ret = pci_ioport_map(dev, bar, p);
-#else
 		ret = pci_uio_ioport_map(dev, bar, p);
-#endif
 		break;
 	default:
 		break;
@@ -792,12 +721,10 @@  int rte_pci_write_config(const struct rte_pci_device *device,
 	switch (p->dev->kdrv) {
 #ifdef VFIO_PRESENT
 	case RTE_KDRV_VFIO:
-		pci_vfio_ioport_read(p, data, len, offset);
+		pci_uio_ioport_read(p, data, len, offset);
 		break;
 #endif
 	case RTE_KDRV_IGB_UIO:
-		pci_uio_ioport_read(p, data, len, offset);
-		break;
 	case RTE_KDRV_UIO_GENERIC:
 		pci_uio_ioport_read(p, data, len, offset);
 		break;
@@ -813,12 +740,10 @@  int rte_pci_write_config(const struct rte_pci_device *device,
 	switch (p->dev->kdrv) {
 #ifdef VFIO_PRESENT
 	case RTE_KDRV_VFIO:
-		pci_vfio_ioport_write(p, data, len, offset);
+		pci_uio_ioport_write(p, data, len, offset);
 		break;
 #endif
 	case RTE_KDRV_IGB_UIO:
-		pci_uio_ioport_write(p, data, len, offset);
-		break;
 	case RTE_KDRV_UIO_GENERIC:
 		pci_uio_ioport_write(p, data, len, offset);
 		break;
@@ -836,18 +761,12 @@  int rte_pci_write_config(const struct rte_pci_device *device,
 #ifdef VFIO_PRESENT
 	case RTE_KDRV_VFIO:
 		if (pci_vfio_is_enabled())
-			ret = pci_vfio_ioport_unmap(p);
+			ret = pci_uio_ioport_unmap(p);
 		break;
 #endif
 	case RTE_KDRV_IGB_UIO:
-		ret = pci_uio_ioport_unmap(p);
-		break;
 	case RTE_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-		ret = 0;
-#else
 		ret = pci_uio_ioport_unmap(p);
-#endif
 		break;
 	default:
 		break;
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index 097dc19..83374e8 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -372,52 +372,82 @@ 
 pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
 		   struct rte_pci_ioport *p)
 {
+	FILE *f = NULL;
 	char dirname[PATH_MAX];
 	char filename[PATH_MAX];
+	char buf[BUFSIZ];
+	int64_t phys_addr, end_addr, flags;
 	int uio_num;
-	unsigned long start;
+	unsigned long base;
+	bool iobar;
+	int i;
 
-	if (rte_eal_iopl_init() != 0) {
-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
-			__func__, dev->name);
+	/* open and read addresses of the corresponding resource in sysfs */
+	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
+		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
+		dev->addr.devid, dev->addr.function);
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
+			strerror(errno));
 		return -1;
 	}
 
-	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
-	if (uio_num < 0)
-		return -1;
+	for (i = 0; i < bar + 1; i++) {
+		if (fgets(buf, sizeof(buf), f) == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
+			goto error;
+		}
+	}
+	if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
+		&end_addr, &flags) < 0)
+		goto error;
 
-	/* get portio start */
-	snprintf(filename, sizeof(filename),
-		 "%s/portio/port%d/start", dirname, bar);
-	if (eal_parse_sysfs_value(filename, &start) < 0) {
-		RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
-			__func__);
-		return -1;
+	if (flags & IORESOURCE_IO) {
+		iobar = 1;
+		base = (unsigned long)phys_addr;
+		RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
+	} else if (flags & IORESOURCE_MEM) {
+		iobar = 0;
+		base = (unsigned long)dev->mem_resource[bar].addr;
+		RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, base);
+	} else {
+		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
+		goto error;
+	}
+
+	if (iobar && rte_eal_iopl_init() != 0) {
+		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
+			__func__, dev->name);
+		goto error;
 	}
-	/* ensure we don't get anything funny here, read/write will cast to
-	 * uin16_t */
-	if (start > UINT16_MAX)
-		return -1;
 
 	/* FIXME only for primary process ? */
 	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
+		uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
+		if (uio_num < 0)
+			goto error;
 
 		snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
 		dev->intr_handle.fd = open(filename, O_RDWR);
 		if (dev->intr_handle.fd < 0) {
 			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
 				filename, strerror(errno));
-			return -1;
+			goto error;
 		}
 		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
 	}
 
-	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
+	RTE_LOG(DEBUG, EAL, "PCI IO port found start=0x%lx\n", base);
 
-	p->base = start;
+	p->base = base;
 	p->len = 0;
+	fclose(f);
 	return 0;
+error:
+	if (f)
+		fclose(f);
+	return -1;
 }
 #else
 int
@@ -488,6 +518,61 @@ 
 }
 #endif
 
+#define PIO_MAX 0x10000
+static inline uint8_t ioread8(void *addr)
+{
+	uint8_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr :
+		inb((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint16_t ioread16(void *addr)
+{
+	uint16_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr :
+		inw((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint32_t ioread32(void *addr)
+{
+	uint32_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr :
+	inl((unsigned long)addr);
+
+	return val;
+}
+
+static inline void iowrite8(uint8_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr = val :
+		outb(val, (unsigned long)addr);
+}
+
+static inline void iowrite16(uint16_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr = val :
+		outw(val, (unsigned long)addr);
+}
+
+static inline void iowrite32(uint32_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr = val :
+		outl(val, (unsigned long)addr);
+}
+
 void
 pci_uio_ioport_read(struct rte_pci_ioport *p,
 		    void *data, size_t len, off_t offset)
@@ -499,25 +584,13 @@ 
 	for (d = data; len > 0; d += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			*(uint32_t *)d = inl(reg);
-#else
-			*(uint32_t *)d = *(volatile uint32_t *)reg;
-#endif
+			*(uint32_t *)d = ioread32((void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			*(uint16_t *)d = inw(reg);
-#else
-			*(uint16_t *)d = *(volatile uint16_t *)reg;
-#endif
+			*(uint16_t *)d = ioread16((void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			*d = inb(reg);
-#else
-			*d = *(volatile uint8_t *)reg;
-#endif
+			*d = ioread8((void *)reg);
 		}
 	}
 }
@@ -533,25 +606,13 @@ 
 	for (s = data; len > 0; s += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			outl_p(*(const uint32_t *)s, reg);
-#else
-			*(volatile uint32_t *)reg = *(const uint32_t *)s;
-#endif
+			iowrite32(*(const uint32_t *)s, (void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			outw_p(*(const uint16_t *)s, reg);
-#else
-			*(volatile uint16_t *)reg = *(const uint16_t *)s;
-#endif
+			iowrite16(*(const uint16_t *)s, (void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			outb_p(*s, reg);
-#else
-			*(volatile uint8_t *)reg = *s;
-#endif
+			iowrite8(*s, (void *)reg);
 		}
 	}
 }