[v11,2/2] bus/pci: support MMIO in PCI ioport accessors
Checks
Commit Message
With I/O BAR, we get PIO(port-mapped I/O) address.
With MMIO(memory-mapped I/O) BAR, we get mapped virtual address.
We distinguish PIO and MMIO by their address range like how kernel does,
i.e, address below 64K is PIO.
ioread/write8/16/32 is provided to access PIO/MMIO.
By the way, for virtio on arch other than x86, BAR flag indicates PIO
but is mapped.
Signed-off-by: huawei xie <huawei.xhw@alibaba-inc.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
drivers/bus/pci/linux/pci.c | 4 --
drivers/bus/pci/linux/pci_uio.c | 156 +++++++++++++++++++++++++++++-----------
2 files changed, 113 insertions(+), 47 deletions(-)
Comments
Hi Huawei,
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of ???(????)
> Sent: Thursday, March 11, 2021 01:37
> To: david.marchand@redhat.com; maxime.coquelin@redhat.com; Yigit, Ferruh <ferruh.yigit@intel.com>
> Cc: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; xuemingl@nvidia.com; grive@u256.net;
> 谢华伟(此时此刻) <huawei.xhw@alibaba-inc.com>
> Subject: [dpdk-dev] [PATCH v11 2/2] bus/pci: support MMIO in PCI ioport accessors
>
> With I/O BAR, we get PIO(port-mapped I/O) address.
> With MMIO(memory-mapped I/O) BAR, we get mapped virtual address.
> We distinguish PIO and MMIO by their address range like how kernel does,
> i.e, address below 64K is PIO.
> ioread/write8/16/32 is provided to access PIO/MMIO.
> By the way, for virtio on arch other than x86, BAR flag indicates PIO
> but is mapped.
>
> Signed-off-by: huawei xie <huawei.xhw@alibaba-inc.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> drivers/bus/pci/linux/pci.c | 4 --
> drivers/bus/pci/linux/pci_uio.c | 156 +++++++++++++++++++++++++++++-----------
> 2 files changed, 113 insertions(+), 47 deletions(-)
>
>
> +#if defined(RTE_ARCH_X86)
> +static inline uint8_t ioread8(void *addr)
> +{
> + uint8_t val;
> +
> + val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> + *(volatile uint8_t *)addr :
> + inb_p((unsigned long)addr);
> +
> + return val;
> +}
> +
> +static inline uint16_t ioread16(void *addr)
> +{
> + uint16_t val;
> +
> + val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> + *(volatile uint16_t *)addr :
> + inw_p((unsigned long)addr);
> +
> + return val;
> +}
> +
> +static inline uint32_t ioread32(void *addr)
> +{
> + uint32_t val;
> +
> + val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> + *(volatile uint32_t *)addr :
> + inl_p((unsigned long)addr);
> +
> + return val;
> +}
> +
> +static inline void iowrite8(uint8_t val, void *addr)
> +{
> + (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> + *(volatile uint8_t *)addr = val :
> + outb_p(val, (unsigned long)addr);
> +}
> +
> +static inline void iowrite16(uint16_t val, void *addr)
> +{
> + (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> + *(volatile uint16_t *)addr = val :
> + outw_p(val, (unsigned long)addr);
> +}
> +
> +static inline void iowrite32(uint32_t val, void *addr)
> +{
> + (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> + *(volatile uint32_t *)addr = val :
> + outl_p(val, (unsigned long)addr);
> +}
> +#else
> +static inline uint8_t ioread8(void *addr)
> +{
> + return *(volatile uint8_t *)addr;
> +}
> +
> +static inline uint16_t ioread16(void *addr)
> +{
> + return *(volatile uint16_t *)addr;
> +}
> +
> +static inline uint32_t ioread32(void *addr)
> +{
> + return *(volatile uint32_t *)addr;
> +}
> +
> +static inline void iowrite8(uint8_t val, void *addr)
> +{
> + *(volatile uint8_t *)addr = val;
> +}
> +
> +static inline void iowrite16(uint16_t val, void *addr)
> +{
> + *(volatile uint16_t *)addr = val;
> +}
> +
> +static inline void iowrite32(uint32_t val, void *addr)
> +{
> + *(volatile uint32_t *)addr = val;
> +}
> +#endif
> +
Like kernel use macro to do pio and mmio, maybe we can also to do so for
making code clean:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/iomap.c
#define IO_COND(addr, is_pio, is_mmio) do { \
unsigned long port = (unsigned long __force)addr; \
if (port >= PIO_RESERVED) { \
is_mmio; \
} else if (port > PIO_OFFSET) { \
port &= PIO_MASK; \
is_pio; \
} else \
bad_io_access(port, #is_pio ); \
} while (0)
Like:
#if defined(RTE_ARCH_X86)
#define IO_COND(addr, is_pio, is_mmio) do { \
if ((uint64_t)(uintptr_t)addr >= PIO_MAX) { \
is_mmio; \
} else { \
is_pio; \
} \
} while (0)
#else
#define IO_COND(addr, is_pio, is_mmio) do { \
is_mmio; \
} while (0)
#endif
static inline uint8_t ioread8(void *addr)
{
uint8_t val;
IO_COND(addr,
val = inb_p((unsigned long)addr),
val = *(volatile uint8_t *)addr);
return val;
}
static inline uint16_t ioread16(void *addr)
{
uint16_t val;
IO_COND(addr,
val = inw_p((unsigned long)addr),
val = *(volatile uint16_t *)addr);
return val;
}
static inline uint32_t ioread32(void *addr)
{
uint32_t val;
IO_COND(addr,
val = inl_p((unsigned long)addr),
val = *(volatile uint32_t *)addr);
return val;
}
static inline void iowrite8(uint8_t val, void *addr)
{
IO_COND(addr,
outb_p(val, (unsigned long)addr),
*(volatile uint8_t *)addr = val);
}
static inline void iowrite16(uint16_t val, void *addr)
{
IO_COND(addr,
outw_p(val, (unsigned long)addr),
*(volatile uint16_t *)addr = val);
}
static inline void iowrite32(uint32_t val, void *addr)
{
IO_COND(addr,
outl_p(val, (unsigned long)addr),
*(volatile uint32_t *)addr = val);
}
> void
> pci_uio_ioport_read(struct rte_pci_ioport *p,
> void *data, size_t len, off_t offset)
> @@ -528,25 +622,13 @@
> for (d = data; len > 0; d += size, reg += size, len -= size) {
> if (len >= 4) {
> size = 4;
> -#if defined(RTE_ARCH_X86)
> - *(uint32_t *)d = inl(reg);
> -#else
> - *(uint32_t *)d = *(volatile uint32_t *)reg;
> -#endif
> + *(uint32_t *)d = ioread32((void *)reg);
> } else if (len >= 2) {
> size = 2;
> -#if defined(RTE_ARCH_X86)
> - *(uint16_t *)d = inw(reg);
> -#else
> - *(uint16_t *)d = *(volatile uint16_t *)reg;
> -#endif
> + *(uint16_t *)d = ioread16((void *)reg);
> } else {
> size = 1;
> -#if defined(RTE_ARCH_X86)
> - *d = inb(reg);
> -#else
> - *d = *(volatile uint8_t *)reg;
> -#endif
> + *d = ioread8((void *)reg);
> }
> }
> }
> @@ -562,25 +644,13 @@
> for (s = data; len > 0; s += size, reg += size, len -= size) {
> if (len >= 4) {
> size = 4;
> -#if defined(RTE_ARCH_X86)
> - outl_p(*(const uint32_t *)s, reg);
> -#else
> - *(volatile uint32_t *)reg = *(const uint32_t *)s;
> -#endif
> + iowrite32(*(const uint32_t *)s, (void *)reg);
> } else if (len >= 2) {
> size = 2;
> -#if defined(RTE_ARCH_X86)
> - outw_p(*(const uint16_t *)s, reg);
> -#else
> - *(volatile uint16_t *)reg = *(const uint16_t *)s;
> -#endif
> + iowrite16(*(const uint16_t *)s, (void *)reg);
> } else {
> size = 1;
> -#if defined(RTE_ARCH_X86)
> - outb_p(*s, reg);
> -#else
> - *(volatile uint8_t *)reg = *s;
> -#endif
> + iowrite8(*s, (void *)reg);
> }
> }
> }
> --
> 1.8.3.1
On Thu, Mar 11, 2021 at 7:43 AM Wang, Haiyue <haiyue.wang@intel.com> wrote:
> Like kernel use macro to do pio and mmio, maybe we can also to do so for
> making code clean:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/iomap.c
>
> #define IO_COND(addr, is_pio, is_mmio) do { \
> unsigned long port = (unsigned long __force)addr; \
> if (port >= PIO_RESERVED) { \
> is_mmio; \
> } else if (port > PIO_OFFSET) { \
> port &= PIO_MASK; \
> is_pio; \
> } else \
> bad_io_access(port, #is_pio ); \
> } while (0)
>
>
> Like:
>
> #if defined(RTE_ARCH_X86)
> #define IO_COND(addr, is_pio, is_mmio) do { \
> if ((uint64_t)(uintptr_t)addr >= PIO_MAX) { \
> is_mmio; \
> } else { \
> is_pio; \
> } \
> } while (0)
> #else
> #define IO_COND(addr, is_pio, is_mmio) do { \
> is_mmio; \
> } while (0)
> #endif
We should not just copy/paste kernel code.
Plus here, this seems a bit overkill.
And there are other parts in this code that could use some polishing.
What do you think of merging this series as is (now that we got non
regression reports) and doing such cleanups in followup patches?
On 2021/3/15 18:19, David Marchand wrote:
>> #else
>> #define IO_COND(addr, is_pio, is_mmio) do { \
>> is_mmio; \
>> } while (0)
>> #endif
> We should not just copy/paste kernel code.
>
> Plus here, this seems a bit overkill.
> And there are other parts in this code that could use some polishing.
>
> What do you think of merging this series as is (now that we got non
> regression reports) and doing such cleanups in followup patches?
I am OK. Yes, we could do some cleanup after it is merged, for example
against vfio, if it is really necessary for virtio PMD only to use vfio
to access IO port.
> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Monday, March 15, 2021 18:20
> To: Wang, Haiyue <haiyue.wang@intel.com>; 谢华伟(此时此刻) <huawei.xhw@alibaba-inc.com>
> Cc: maxime.coquelin@redhat.com; Yigit, Ferruh <ferruh.yigit@intel.com>; dev@dpdk.org; Burakov, Anatoly
> <anatoly.burakov@intel.com>; xuemingl@nvidia.com; grive@u256.net
> Subject: Re: [dpdk-dev] [PATCH v11 2/2] bus/pci: support MMIO in PCI ioport accessors
>
> On Thu, Mar 11, 2021 at 7:43 AM Wang, Haiyue <haiyue.wang@intel.com> wrote:
> > Like kernel use macro to do pio and mmio, maybe we can also to do so for
> > making code clean:
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/iomap.c
> >
> > #define IO_COND(addr, is_pio, is_mmio) do { \
> > unsigned long port = (unsigned long __force)addr; \
> > if (port >= PIO_RESERVED) { \
> > is_mmio; \
> > } else if (port > PIO_OFFSET) { \
> > port &= PIO_MASK; \
> > is_pio; \
> > } else \
> > bad_io_access(port, #is_pio ); \
> > } while (0)
> >
> >
> > Like:
> >
> > #if defined(RTE_ARCH_X86)
> > #define IO_COND(addr, is_pio, is_mmio) do { \
> > if ((uint64_t)(uintptr_t)addr >= PIO_MAX) { \
> > is_mmio; \
> > } else { \
> > is_pio; \
> > } \
> > } while (0)
> > #else
> > #define IO_COND(addr, is_pio, is_mmio) do { \
> > is_mmio; \
> > } while (0)
> > #endif
>
> We should not just copy/paste kernel code.
>
Got it ;-)
>
>
> --
> David Marchand
@@ -715,8 +715,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
break;
#endif
case RTE_PCI_KDRV_IGB_UIO:
- pci_uio_ioport_read(p, data, len, offset);
- break;
case RTE_PCI_KDRV_UIO_GENERIC:
pci_uio_ioport_read(p, data, len, offset);
break;
@@ -736,8 +734,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
break;
#endif
case RTE_PCI_KDRV_IGB_UIO:
- pci_uio_ioport_write(p, data, len, offset);
- break;
case RTE_PCI_KDRV_UIO_GENERIC:
pci_uio_ioport_write(p, data, len, offset);
break;
@@ -368,6 +368,8 @@
return -1;
}
+#define PIO_MAX 0x10000
+
#if defined(RTE_ARCH_X86)
int
pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
@@ -381,12 +383,6 @@
unsigned long base;
int i;
- if (rte_eal_iopl_init() != 0) {
- RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
- __func__, dev->name);
- return -1;
- }
-
/* open and read addresses of the corresponding resource in sysfs */
snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
@@ -408,15 +404,27 @@
&end_addr, &flags) < 0)
goto error;
- if (!(flags & IORESOURCE_IO)) {
- RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not supported\n", __func__);
- goto error;
- }
- base = (unsigned long)phys_addr;
- RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
+ if (flags & IORESOURCE_IO) {
+ if (rte_eal_iopl_init()) {
+ RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
+ __func__, dev->name);
+ goto error;
+ }
- if (base > UINT16_MAX)
+ base = (unsigned long)phys_addr;
+ if (base > PIO_MAX) {
+ RTE_LOG(ERR, EAL, "%s(): %08lx too large PIO resource\n", __func__, base);
+ goto error;
+ }
+
+ RTE_LOG(DEBUG, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
+ } else if (flags & IORESOURCE_MEM) {
+ base = (unsigned long)dev->mem_resource[bar].addr;
+ RTE_LOG(DEBUG, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, base);
+ } else {
+ RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
goto error;
+ }
/* FIXME only for primary process ? */
if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
@@ -517,6 +525,92 @@
}
#endif
+#if defined(RTE_ARCH_X86)
+static inline uint8_t ioread8(void *addr)
+{
+ uint8_t val;
+
+ val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+ *(volatile uint8_t *)addr :
+ inb_p((unsigned long)addr);
+
+ return val;
+}
+
+static inline uint16_t ioread16(void *addr)
+{
+ uint16_t val;
+
+ val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+ *(volatile uint16_t *)addr :
+ inw_p((unsigned long)addr);
+
+ return val;
+}
+
+static inline uint32_t ioread32(void *addr)
+{
+ uint32_t val;
+
+ val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+ *(volatile uint32_t *)addr :
+ inl_p((unsigned long)addr);
+
+ return val;
+}
+
+static inline void iowrite8(uint8_t val, void *addr)
+{
+ (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+ *(volatile uint8_t *)addr = val :
+ outb_p(val, (unsigned long)addr);
+}
+
+static inline void iowrite16(uint16_t val, void *addr)
+{
+ (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+ *(volatile uint16_t *)addr = val :
+ outw_p(val, (unsigned long)addr);
+}
+
+static inline void iowrite32(uint32_t val, void *addr)
+{
+ (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+ *(volatile uint32_t *)addr = val :
+ outl_p(val, (unsigned long)addr);
+}
+#else
+static inline uint8_t ioread8(void *addr)
+{
+ return *(volatile uint8_t *)addr;
+}
+
+static inline uint16_t ioread16(void *addr)
+{
+ return *(volatile uint16_t *)addr;
+}
+
+static inline uint32_t ioread32(void *addr)
+{
+ return *(volatile uint32_t *)addr;
+}
+
+static inline void iowrite8(uint8_t val, void *addr)
+{
+ *(volatile uint8_t *)addr = val;
+}
+
+static inline void iowrite16(uint16_t val, void *addr)
+{
+ *(volatile uint16_t *)addr = val;
+}
+
+static inline void iowrite32(uint32_t val, void *addr)
+{
+ *(volatile uint32_t *)addr = val;
+}
+#endif
+
void
pci_uio_ioport_read(struct rte_pci_ioport *p,
void *data, size_t len, off_t offset)
@@ -528,25 +622,13 @@
for (d = data; len > 0; d += size, reg += size, len -= size) {
if (len >= 4) {
size = 4;
-#if defined(RTE_ARCH_X86)
- *(uint32_t *)d = inl(reg);
-#else
- *(uint32_t *)d = *(volatile uint32_t *)reg;
-#endif
+ *(uint32_t *)d = ioread32((void *)reg);
} else if (len >= 2) {
size = 2;
-#if defined(RTE_ARCH_X86)
- *(uint16_t *)d = inw(reg);
-#else
- *(uint16_t *)d = *(volatile uint16_t *)reg;
-#endif
+ *(uint16_t *)d = ioread16((void *)reg);
} else {
size = 1;
-#if defined(RTE_ARCH_X86)
- *d = inb(reg);
-#else
- *d = *(volatile uint8_t *)reg;
-#endif
+ *d = ioread8((void *)reg);
}
}
}
@@ -562,25 +644,13 @@
for (s = data; len > 0; s += size, reg += size, len -= size) {
if (len >= 4) {
size = 4;
-#if defined(RTE_ARCH_X86)
- outl_p(*(const uint32_t *)s, reg);
-#else
- *(volatile uint32_t *)reg = *(const uint32_t *)s;
-#endif
+ iowrite32(*(const uint32_t *)s, (void *)reg);
} else if (len >= 2) {
size = 2;
-#if defined(RTE_ARCH_X86)
- outw_p(*(const uint16_t *)s, reg);
-#else
- *(volatile uint16_t *)reg = *(const uint16_t *)s;
-#endif
+ iowrite16(*(const uint16_t *)s, (void *)reg);
} else {
size = 1;
-#if defined(RTE_ARCH_X86)
- outb_p(*s, reg);
-#else
- *(volatile uint8_t *)reg = *s;
-#endif
+ iowrite8(*s, (void *)reg);
}
}
}