[v11,2/2] bus/pci: support MMIO in PCI ioport accessors

Message ID 1615397790-16169-3-git-send-email-huawei.xhw@alibaba-inc.com (mailing list archive)
State Accepted, archived
Delegated to: David Marchand
Headers
Series support both PIO and MMIO BAR for legacy virito device |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/travis-robot success travis build: passed
ci/Intel-compilation success Compilation OK
ci/github-robot success github build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-mellanox-Functional success Functional Testing PASS
ci/iol-testing success Testing PASS
ci/intel-Testing success Testing PASS

Commit Message

谢华伟(此时此刻) March 10, 2021, 5:36 p.m. UTC
With I/O BAR, we get PIO(port-mapped I/O) address.
With MMIO(memory-mapped I/O) BAR, we get mapped virtual address.
We distinguish PIO and MMIO by their address range like how kernel does,
i.e, address below 64K is PIO.
ioread/write8/16/32 is provided to access PIO/MMIO.
By the way, for virtio on arch other than x86, BAR flag indicates PIO
but is mapped.

Signed-off-by: huawei xie <huawei.xhw@alibaba-inc.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/bus/pci/linux/pci.c     |   4 --
 drivers/bus/pci/linux/pci_uio.c | 156 +++++++++++++++++++++++++++++-----------
 2 files changed, 113 insertions(+), 47 deletions(-)
  

Comments

Wang, Haiyue March 11, 2021, 6:42 a.m. UTC | #1
Hi Huawei,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of ???(????)
> Sent: Thursday, March 11, 2021 01:37
> To: david.marchand@redhat.com; maxime.coquelin@redhat.com; Yigit, Ferruh <ferruh.yigit@intel.com>
> Cc: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; xuemingl@nvidia.com; grive@u256.net;
> 谢华伟(此时此刻) <huawei.xhw@alibaba-inc.com>
> Subject: [dpdk-dev] [PATCH v11 2/2] bus/pci: support MMIO in PCI ioport accessors
> 
> With I/O BAR, we get PIO(port-mapped I/O) address.
> With MMIO(memory-mapped I/O) BAR, we get mapped virtual address.
> We distinguish PIO and MMIO by their address range like how kernel does,
> i.e, address below 64K is PIO.
> ioread/write8/16/32 is provided to access PIO/MMIO.
> By the way, for virtio on arch other than x86, BAR flag indicates PIO
> but is mapped.
> 
> Signed-off-by: huawei xie <huawei.xhw@alibaba-inc.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  drivers/bus/pci/linux/pci.c     |   4 --
>  drivers/bus/pci/linux/pci_uio.c | 156 +++++++++++++++++++++++++++++-----------
>  2 files changed, 113 insertions(+), 47 deletions(-)
> 


> 
> +#if defined(RTE_ARCH_X86)
> +static inline uint8_t ioread8(void *addr)
> +{
> +	uint8_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint8_t *)addr :
> +		inb_p((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline uint16_t ioread16(void *addr)
> +{
> +	uint16_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint16_t *)addr :
> +		inw_p((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline uint32_t ioread32(void *addr)
> +{
> +	uint32_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint32_t *)addr :
> +		inl_p((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline void iowrite8(uint8_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint8_t *)addr = val :
> +		outb_p(val, (unsigned long)addr);
> +}
> +
> +static inline void iowrite16(uint16_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint16_t *)addr = val :
> +		outw_p(val, (unsigned long)addr);
> +}
> +
> +static inline void iowrite32(uint32_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint32_t *)addr = val :
> +		outl_p(val, (unsigned long)addr);
> +}
> +#else
> +static inline uint8_t ioread8(void *addr)
> +{
> +	return *(volatile uint8_t *)addr;
> +}
> +
> +static inline uint16_t ioread16(void *addr)
> +{
> +	return *(volatile uint16_t *)addr;
> +}
> +
> +static inline uint32_t ioread32(void *addr)
> +{
> +	return *(volatile uint32_t *)addr;
> +}
> +
> +static inline void iowrite8(uint8_t val, void *addr)
> +{
> +	*(volatile uint8_t *)addr = val;
> +}
> +
> +static inline void iowrite16(uint16_t val, void *addr)
> +{
> +	*(volatile uint16_t *)addr = val;
> +}
> +
> +static inline void iowrite32(uint32_t val, void *addr)
> +{
> +	*(volatile uint32_t *)addr = val;
> +}
> +#endif
> +

Like kernel use macro to do pio and mmio, maybe we can also to do so for
making code clean:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/iomap.c

#define IO_COND(addr, is_pio, is_mmio) do {			\
	unsigned long port = (unsigned long __force)addr;	\
	if (port >= PIO_RESERVED) {				\
		is_mmio;					\
	} else if (port > PIO_OFFSET) {				\
		port &= PIO_MASK;				\
		is_pio;						\
	} else							\
		bad_io_access(port, #is_pio );			\
} while (0)


Like:

#if defined(RTE_ARCH_X86)
#define IO_COND(addr, is_pio, is_mmio) do {           \
	if ((uint64_t)(uintptr_t)addr >= PIO_MAX) {   \
		is_mmio;                              \
	} else {                                      \
		is_pio;                               \
	}                                             \
} while (0)
#else
#define IO_COND(addr, is_pio, is_mmio) do {           \
		is_mmio;                              \
} while (0)
#endif

static inline uint8_t ioread8(void *addr)
{
	uint8_t val;

	IO_COND(addr,
		val = inb_p((unsigned long)addr),
		val = *(volatile uint8_t *)addr);

	return val;
}

static inline uint16_t ioread16(void *addr)
{
	uint16_t val;

	IO_COND(addr,
		val = inw_p((unsigned long)addr),
		val = *(volatile uint16_t *)addr);

	return val;
}

static inline uint32_t ioread32(void *addr)
{
	uint32_t val;

	IO_COND(addr,
		val = inl_p((unsigned long)addr),
		val = *(volatile uint32_t *)addr);

	return val;
}

static inline void iowrite8(uint8_t val, void *addr)
{
	IO_COND(addr,
		outb_p(val, (unsigned long)addr),
		*(volatile uint8_t *)addr = val);
}

static inline void iowrite16(uint16_t val, void *addr)
{
	IO_COND(addr,
		outw_p(val, (unsigned long)addr),
		*(volatile uint16_t *)addr = val);
}

static inline void iowrite32(uint32_t val, void *addr)
{
	IO_COND(addr,
		outl_p(val, (unsigned long)addr),
		*(volatile uint32_t *)addr = val);
}

>  void
>  pci_uio_ioport_read(struct rte_pci_ioport *p,
>  		    void *data, size_t len, off_t offset)
> @@ -528,25 +622,13 @@
>  	for (d = data; len > 0; d += size, reg += size, len -= size) {
>  		if (len >= 4) {
>  			size = 4;
> -#if defined(RTE_ARCH_X86)
> -			*(uint32_t *)d = inl(reg);
> -#else
> -			*(uint32_t *)d = *(volatile uint32_t *)reg;
> -#endif
> +			*(uint32_t *)d = ioread32((void *)reg);
>  		} else if (len >= 2) {
>  			size = 2;
> -#if defined(RTE_ARCH_X86)
> -			*(uint16_t *)d = inw(reg);
> -#else
> -			*(uint16_t *)d = *(volatile uint16_t *)reg;
> -#endif
> +			*(uint16_t *)d = ioread16((void *)reg);
>  		} else {
>  			size = 1;
> -#if defined(RTE_ARCH_X86)
> -			*d = inb(reg);
> -#else
> -			*d = *(volatile uint8_t *)reg;
> -#endif
> +			*d = ioread8((void *)reg);
>  		}
>  	}
>  }
> @@ -562,25 +644,13 @@
>  	for (s = data; len > 0; s += size, reg += size, len -= size) {
>  		if (len >= 4) {
>  			size = 4;
> -#if defined(RTE_ARCH_X86)
> -			outl_p(*(const uint32_t *)s, reg);
> -#else
> -			*(volatile uint32_t *)reg = *(const uint32_t *)s;
> -#endif
> +			iowrite32(*(const uint32_t *)s, (void *)reg);
>  		} else if (len >= 2) {
>  			size = 2;
> -#if defined(RTE_ARCH_X86)
> -			outw_p(*(const uint16_t *)s, reg);
> -#else
> -			*(volatile uint16_t *)reg = *(const uint16_t *)s;
> -#endif
> +			iowrite16(*(const uint16_t *)s, (void *)reg);
>  		} else {
>  			size = 1;
> -#if defined(RTE_ARCH_X86)
> -			outb_p(*s, reg);
> -#else
> -			*(volatile uint8_t *)reg = *s;
> -#endif
> +			iowrite8(*s, (void *)reg);
>  		}
>  	}
>  }
> --
> 1.8.3.1
  
David Marchand March 15, 2021, 10:19 a.m. UTC | #2
On Thu, Mar 11, 2021 at 7:43 AM Wang, Haiyue <haiyue.wang@intel.com> wrote:
> Like kernel use macro to do pio and mmio, maybe we can also to do so for
> making code clean:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/iomap.c
>
> #define IO_COND(addr, is_pio, is_mmio) do {                     \
>         unsigned long port = (unsigned long __force)addr;       \
>         if (port >= PIO_RESERVED) {                             \
>                 is_mmio;                                        \
>         } else if (port > PIO_OFFSET) {                         \
>                 port &= PIO_MASK;                               \
>                 is_pio;                                         \
>         } else                                                  \
>                 bad_io_access(port, #is_pio );                  \
> } while (0)
>
>
> Like:
>
> #if defined(RTE_ARCH_X86)
> #define IO_COND(addr, is_pio, is_mmio) do {           \
>         if ((uint64_t)(uintptr_t)addr >= PIO_MAX) {   \
>                 is_mmio;                              \
>         } else {                                      \
>                 is_pio;                               \
>         }                                             \
> } while (0)
> #else
> #define IO_COND(addr, is_pio, is_mmio) do {           \
>                 is_mmio;                              \
> } while (0)
> #endif

We should not just copy/paste kernel code.

Plus here, this seems a bit overkill.
And there are other parts in this code that could use some polishing.

What do you think of merging this series as is (now that we got non
regression reports) and doing such cleanups in followup patches?
  
谢华伟(此时此刻) March 15, 2021, 11:25 a.m. UTC | #3
On 2021/3/15 18:19, David Marchand wrote:
>> #else
>> #define IO_COND(addr, is_pio, is_mmio) do {           \
>>                  is_mmio;                              \
>> } while (0)
>> #endif
> We should not just copy/paste kernel code.
>
> Plus here, this seems a bit overkill.
> And there are other parts in this code that could use some polishing.
>
> What do you think of merging this series as is (now that we got non
> regression reports) and doing such cleanups in followup patches?

I am OK. Yes, we could do some cleanup after it is merged, for example 
against vfio, if it is really necessary for virtio PMD only to use vfio 
to access IO port.
  
Wang, Haiyue March 15, 2021, 1:11 p.m. UTC | #4
> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Monday, March 15, 2021 18:20
> To: Wang, Haiyue <haiyue.wang@intel.com>; 谢华伟(此时此刻) <huawei.xhw@alibaba-inc.com>
> Cc: maxime.coquelin@redhat.com; Yigit, Ferruh <ferruh.yigit@intel.com>; dev@dpdk.org; Burakov, Anatoly
> <anatoly.burakov@intel.com>; xuemingl@nvidia.com; grive@u256.net
> Subject: Re: [dpdk-dev] [PATCH v11 2/2] bus/pci: support MMIO in PCI ioport accessors
> 
> On Thu, Mar 11, 2021 at 7:43 AM Wang, Haiyue <haiyue.wang@intel.com> wrote:
> > Like kernel use macro to do pio and mmio, maybe we can also to do so for
> > making code clean:
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/iomap.c
> >
> > #define IO_COND(addr, is_pio, is_mmio) do {                     \
> >         unsigned long port = (unsigned long __force)addr;       \
> >         if (port >= PIO_RESERVED) {                             \
> >                 is_mmio;                                        \
> >         } else if (port > PIO_OFFSET) {                         \
> >                 port &= PIO_MASK;                               \
> >                 is_pio;                                         \
> >         } else                                                  \
> >                 bad_io_access(port, #is_pio );                  \
> > } while (0)
> >
> >
> > Like:
> >
> > #if defined(RTE_ARCH_X86)
> > #define IO_COND(addr, is_pio, is_mmio) do {           \
> >         if ((uint64_t)(uintptr_t)addr >= PIO_MAX) {   \
> >                 is_mmio;                              \
> >         } else {                                      \
> >                 is_pio;                               \
> >         }                                             \
> > } while (0)
> > #else
> > #define IO_COND(addr, is_pio, is_mmio) do {           \
> >                 is_mmio;                              \
> > } while (0)
> > #endif
> 
> We should not just copy/paste kernel code.
> 

Got it ;-)

> 
> 
> --
> David Marchand
  

Patch

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 0f38abf..0dc99e9 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -715,8 +715,6 @@  int rte_pci_write_config(const struct rte_pci_device *device,
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		pci_uio_ioport_read(p, data, len, offset);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
 		pci_uio_ioport_read(p, data, len, offset);
 		break;
@@ -736,8 +734,6 @@  int rte_pci_write_config(const struct rte_pci_device *device,
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		pci_uio_ioport_write(p, data, len, offset);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
 		pci_uio_ioport_write(p, data, len, offset);
 		break;
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index 01f2a40..0907051 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -368,6 +368,8 @@ 
 	return -1;
 }
 
+#define PIO_MAX 0x10000
+
 #if defined(RTE_ARCH_X86)
 int
 pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
@@ -381,12 +383,6 @@ 
 	unsigned long base;
 	int i;
 
-	if (rte_eal_iopl_init() != 0) {
-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
-			__func__, dev->name);
-		return -1;
-	}
-
 	/* open and read addresses of the corresponding resource in sysfs */
 	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
 		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
@@ -408,15 +404,27 @@ 
 		&end_addr, &flags) < 0)
 		goto error;
 
-	if (!(flags & IORESOURCE_IO)) {
-		RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not supported\n", __func__);
-		goto error;
-	}
-	base = (unsigned long)phys_addr;
-	RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
+	if (flags & IORESOURCE_IO) {
+		if (rte_eal_iopl_init()) {
+			RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
+				__func__, dev->name);
+			goto error;
+		}
 
-	if (base > UINT16_MAX)
+		base = (unsigned long)phys_addr;
+		if (base > PIO_MAX) {
+			RTE_LOG(ERR, EAL, "%s(): %08lx too large PIO resource\n", __func__, base);
+			goto error;
+		}
+
+		RTE_LOG(DEBUG, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
+	} else if (flags & IORESOURCE_MEM) {
+		base = (unsigned long)dev->mem_resource[bar].addr;
+		RTE_LOG(DEBUG, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, base);
+	} else {
+		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
 		goto error;
+	}
 
 	/* FIXME only for primary process ? */
 	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
@@ -517,6 +525,92 @@ 
 }
 #endif
 
+#if defined(RTE_ARCH_X86)
+static inline uint8_t ioread8(void *addr)
+{
+	uint8_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr :
+		inb_p((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint16_t ioread16(void *addr)
+{
+	uint16_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr :
+		inw_p((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint32_t ioread32(void *addr)
+{
+	uint32_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr :
+		inl_p((unsigned long)addr);
+
+	return val;
+}
+
+static inline void iowrite8(uint8_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr = val :
+		outb_p(val, (unsigned long)addr);
+}
+
+static inline void iowrite16(uint16_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr = val :
+		outw_p(val, (unsigned long)addr);
+}
+
+static inline void iowrite32(uint32_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr = val :
+		outl_p(val, (unsigned long)addr);
+}
+#else
+static inline uint8_t ioread8(void *addr)
+{
+	return *(volatile uint8_t *)addr;
+}
+
+static inline uint16_t ioread16(void *addr)
+{
+	return *(volatile uint16_t *)addr;
+}
+
+static inline uint32_t ioread32(void *addr)
+{
+	return *(volatile uint32_t *)addr;
+}
+
+static inline void iowrite8(uint8_t val, void *addr)
+{
+	*(volatile uint8_t *)addr = val;
+}
+
+static inline void iowrite16(uint16_t val, void *addr)
+{
+	*(volatile uint16_t *)addr = val;
+}
+
+static inline void iowrite32(uint32_t val, void *addr)
+{
+	*(volatile uint32_t *)addr = val;
+}
+#endif
+
 void
 pci_uio_ioport_read(struct rte_pci_ioport *p,
 		    void *data, size_t len, off_t offset)
@@ -528,25 +622,13 @@ 
 	for (d = data; len > 0; d += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			*(uint32_t *)d = inl(reg);
-#else
-			*(uint32_t *)d = *(volatile uint32_t *)reg;
-#endif
+			*(uint32_t *)d = ioread32((void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			*(uint16_t *)d = inw(reg);
-#else
-			*(uint16_t *)d = *(volatile uint16_t *)reg;
-#endif
+			*(uint16_t *)d = ioread16((void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			*d = inb(reg);
-#else
-			*d = *(volatile uint8_t *)reg;
-#endif
+			*d = ioread8((void *)reg);
 		}
 	}
 }
@@ -562,25 +644,13 @@ 
 	for (s = data; len > 0; s += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			outl_p(*(const uint32_t *)s, reg);
-#else
-			*(volatile uint32_t *)reg = *(const uint32_t *)s;
-#endif
+			iowrite32(*(const uint32_t *)s, (void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			outw_p(*(const uint16_t *)s, reg);
-#else
-			*(volatile uint16_t *)reg = *(const uint16_t *)s;
-#endif
+			iowrite16(*(const uint16_t *)s, (void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			outb_p(*s, reg);
-#else
-			*(volatile uint8_t *)reg = *s;
-#endif
+			iowrite8(*s, (void *)reg);
 		}
 	}
 }