[v4,2/4] bus/pci: avoid depending on private value in kernel source

Message ID 20230531053743.129442-3-miao.li@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series Support VFIO sparse mmap in PCI bus |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Li, Miao May 31, 2023, 5:37 a.m. UTC
  From: Chenbo Xia <chenbo.xia@intel.com>

The value 40 used in VFIO_GET_REGION_ADDR() is a private value
(VFIO_PCI_OFFSET_SHIFT) defined in Linux kernel source [1]. It
is not part of VFIO API, and we should not depend on it.

[1] https://github.com/torvalds/linux/blob/v6.2/include/linux/vfio_pci_core.h

Signed-off-by: Chenbo Xia <chenbo.xia@intel.com>
Acked-by: Sunil Kumar Kori <skori@marvell.com>
Acked-by: Yahui Cao <yahui.cao@intel.com>
---
 drivers/bus/pci/linux/pci.c      |   4 +-
 drivers/bus/pci/linux/pci_init.h |   4 +-
 drivers/bus/pci/linux/pci_vfio.c | 197 +++++++++++++++++++++++--------
 drivers/bus/pci/private.h        |   9 ++
 lib/eal/include/rte_vfio.h       |   1 -
 5 files changed, 159 insertions(+), 56 deletions(-)
  

Patch

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 4c2c5ba382..04e21ae20f 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -645,7 +645,7 @@  int rte_pci_read_config(const struct rte_pci_device *device,
 		return pci_uio_read_config(intr_handle, buf, len, offset);
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
-		return pci_vfio_read_config(intr_handle, buf, len, offset);
+		return pci_vfio_read_config(device, buf, len, offset);
 #endif
 	default:
 		rte_pci_device_name(&device->addr, devname,
@@ -669,7 +669,7 @@  int rte_pci_write_config(const struct rte_pci_device *device,
 		return pci_uio_write_config(intr_handle, buf, len, offset);
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
-		return pci_vfio_write_config(intr_handle, buf, len, offset);
+		return pci_vfio_write_config(device, buf, len, offset);
 #endif
 	default:
 		rte_pci_device_name(&device->addr, devname,
diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h
index dcea726186..9f6659ba6e 100644
--- a/drivers/bus/pci/linux/pci_init.h
+++ b/drivers/bus/pci/linux/pci_init.h
@@ -66,9 +66,9 @@  int pci_uio_ioport_unmap(struct rte_pci_ioport *p);
 #endif
 
 /* access config space */
-int pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+int pci_vfio_read_config(const struct rte_pci_device *dev,
 			 void *buf, size_t len, off_t offs);
-int pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+int pci_vfio_write_config(const struct rte_pci_device *dev,
 			  const void *buf, size_t len, off_t offs);
 
 int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index fab3483d9f..5aef84b7d0 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -43,45 +43,82 @@  static struct rte_tailq_elem rte_vfio_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_vfio_tailq)
 
+static int
+pci_vfio_get_region(const struct rte_pci_device *dev, int index,
+		    uint64_t *size, uint64_t *offset)
+{
+	const struct rte_pci_device_internal *pdev =
+		RTE_PCI_DEVICE_INTERNAL_CONST(dev);
+
+	if (index >= VFIO_PCI_NUM_REGIONS || index >= RTE_MAX_PCI_REGIONS)
+		return -1;
+
+	if (pdev->region[index].size == 0 && pdev->region[index].offset == 0)
+		return -1;
+
+	*size   = pdev->region[index].size;
+	*offset = pdev->region[index].offset;
+
+	return 0;
+}
+
 int
-pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+pci_vfio_read_config(const struct rte_pci_device *dev,
 		    void *buf, size_t len, off_t offs)
 {
-	int vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
+	uint64_t size, offset;
+	int fd;
 
-	if (vfio_dev_fd < 0)
+	fd = rte_intr_dev_fd_get(dev->intr_handle);
+
+	if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+				&size, &offset) != 0)
+		return -1;
+
+	if ((uint64_t)len + offs > size)
 		return -1;
 
-	return pread64(vfio_dev_fd, buf, len,
-	       VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+	return pread64(fd, buf, len, offset + offs);
 }
 
 int
-pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+pci_vfio_write_config(const struct rte_pci_device *dev,
 		    const void *buf, size_t len, off_t offs)
 {
-	int vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
+	uint64_t size, offset;
+	int fd;
 
-	if (vfio_dev_fd < 0)
+	fd = rte_intr_dev_fd_get(dev->intr_handle);
+
+	if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+				&size, &offset) != 0)
 		return -1;
 
-	return pwrite64(vfio_dev_fd, buf, len,
-	       VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+	if ((uint64_t)len + offs > size)
+		return -1;
+
+	return pwrite64(fd, buf, len, offset + offs);
 }
 
 /* get PCI BAR number where MSI-X interrupts are */
 static int
-pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
+pci_vfio_get_msix_bar(const struct rte_pci_device *dev, int fd,
+	struct pci_msix_table *msix_table)
 {
 	int ret;
 	uint32_t reg;
 	uint16_t flags;
 	uint8_t cap_id, cap_offset;
+	uint64_t size, offset;
+
+	if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+		&size, &offset) != 0) {
+		RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n");
+		return -1;
+	}
 
 	/* read PCI capability pointer from config space */
-	ret = pread64(fd, &reg, sizeof(reg),
-			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-			PCI_CAPABILITY_LIST);
+	ret = pread64(fd, &reg, sizeof(reg), offset + PCI_CAPABILITY_LIST);
 	if (ret != sizeof(reg)) {
 		RTE_LOG(ERR, EAL,
 			"Cannot read capability pointer from PCI config space!\n");
@@ -94,9 +131,7 @@  pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
 	while (cap_offset) {
 
 		/* read PCI capability ID */
-		ret = pread64(fd, &reg, sizeof(reg),
-				VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-				cap_offset);
+		ret = pread64(fd, &reg, sizeof(reg), offset + cap_offset);
 		if (ret != sizeof(reg)) {
 			RTE_LOG(ERR, EAL,
 				"Cannot read capability ID from PCI config space!\n");
@@ -108,9 +143,7 @@  pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
 
 		/* if we haven't reached MSI-X, check next capability */
 		if (cap_id != PCI_CAP_ID_MSIX) {
-			ret = pread64(fd, &reg, sizeof(reg),
-					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-					cap_offset);
+			ret = pread64(fd, &reg, sizeof(reg), offset + cap_offset);
 			if (ret != sizeof(reg)) {
 				RTE_LOG(ERR, EAL,
 					"Cannot read capability pointer from PCI config space!\n");
@@ -125,18 +158,14 @@  pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
 		/* else, read table offset */
 		else {
 			/* table offset resides in the next 4 bytes */
-			ret = pread64(fd, &reg, sizeof(reg),
-					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-					cap_offset + 4);
+			ret = pread64(fd, &reg, sizeof(reg), offset + cap_offset + 4);
 			if (ret != sizeof(reg)) {
 				RTE_LOG(ERR, EAL,
 					"Cannot read table offset from PCI config space!\n");
 				return -1;
 			}
 
-			ret = pread64(fd, &flags, sizeof(flags),
-					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-					cap_offset + 2);
+			ret = pread64(fd, &flags, sizeof(flags), offset + cap_offset + 2);
 			if (ret != sizeof(flags)) {
 				RTE_LOG(ERR, EAL,
 					"Cannot read table flags from PCI config space!\n");
@@ -156,14 +185,19 @@  pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
 
 /* enable PCI bus memory space */
 static int
-pci_vfio_enable_bus_memory(int dev_fd)
+pci_vfio_enable_bus_memory(struct rte_pci_device *dev, int dev_fd)
 {
+	uint64_t size, offset;
 	uint16_t cmd;
 	int ret;
 
-	ret = pread64(dev_fd, &cmd, sizeof(cmd),
-		      VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-		      PCI_COMMAND);
+	if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+		&size, &offset) != 0) {
+		RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n");
+		return -1;
+	}
+
+	ret = pread64(dev_fd, &cmd, sizeof(cmd), offset + PCI_COMMAND);
 
 	if (ret != sizeof(cmd)) {
 		RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n");
@@ -174,9 +208,7 @@  pci_vfio_enable_bus_memory(int dev_fd)
 		return 0;
 
 	cmd |= PCI_COMMAND_MEMORY;
-	ret = pwrite64(dev_fd, &cmd, sizeof(cmd),
-		       VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-		       PCI_COMMAND);
+	ret = pwrite64(dev_fd, &cmd, sizeof(cmd), offset + PCI_COMMAND);
 
 	if (ret != sizeof(cmd)) {
 		RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n");
@@ -188,14 +220,19 @@  pci_vfio_enable_bus_memory(int dev_fd)
 
 /* set PCI bus mastering */
 static int
-pci_vfio_set_bus_master(int dev_fd, bool op)
+pci_vfio_set_bus_master(const struct rte_pci_device *dev, int dev_fd, bool op)
 {
+	uint64_t size, offset;
 	uint16_t reg;
 	int ret;
 
-	ret = pread64(dev_fd, &reg, sizeof(reg),
-			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-			PCI_COMMAND);
+	if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+		&size, &offset) != 0) {
+		RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n");
+		return -1;
+	}
+
+	ret = pread64(dev_fd, &reg, sizeof(reg), offset + PCI_COMMAND);
 	if (ret != sizeof(reg)) {
 		RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n");
 		return -1;
@@ -207,9 +244,7 @@  pci_vfio_set_bus_master(int dev_fd, bool op)
 	else
 		reg &= ~(PCI_COMMAND_MASTER);
 
-	ret = pwrite64(dev_fd, &reg, sizeof(reg),
-			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-			PCI_COMMAND);
+	ret = pwrite64(dev_fd, &reg, sizeof(reg), offset + PCI_COMMAND);
 
 	if (ret != sizeof(reg)) {
 		RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n");
@@ -458,14 +493,21 @@  pci_vfio_disable_notifier(struct rte_pci_device *dev)
 #endif
 
 static int
-pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
+pci_vfio_is_ioport_bar(const struct rte_pci_device *dev, int vfio_dev_fd,
+	int bar_index)
 {
+	uint64_t size, offset;
 	uint32_t ioport_bar;
 	int ret;
 
+	if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+		&size, &offset) != 0) {
+		RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n");
+		return -1;
+	}
+
 	ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
-			  VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
-			  + PCI_BASE_ADDRESS_0 + bar_index*4);
+			  offset + PCI_BASE_ADDRESS_0 + bar_index * 4);
 	if (ret != sizeof(ioport_bar)) {
 		RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n",
 			PCI_BASE_ADDRESS_0 + bar_index*4);
@@ -483,13 +525,13 @@  pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
 		return -1;
 	}
 
-	if (pci_vfio_enable_bus_memory(vfio_dev_fd)) {
+	if (pci_vfio_enable_bus_memory(dev, vfio_dev_fd)) {
 		RTE_LOG(ERR, EAL, "Cannot enable bus memory!\n");
 		return -1;
 	}
 
 	/* set bus mastering for the device */
-	if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
+	if (pci_vfio_set_bus_master(dev, vfio_dev_fd, true)) {
 		RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
 		return -1;
 	}
@@ -704,7 +746,7 @@  pci_vfio_info_cap(struct vfio_region_info *info, int cap)
 static int
 pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
 {
-	struct vfio_region_info *info;
+	struct vfio_region_info *info = NULL;
 	int ret;
 
 	ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
@@ -719,11 +761,40 @@  pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
 	return ret;
 }
 
+static int
+pci_vfio_fill_regions(struct rte_pci_device *dev, int vfio_dev_fd,
+		      struct vfio_device_info *device_info)
+{
+	struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev);
+	struct vfio_region_info *reg = NULL;
+	int nb_maps, i, ret;
+
+	nb_maps = RTE_MIN((int)device_info->num_regions,
+			VFIO_PCI_CONFIG_REGION_INDEX + 1);
+
+	for (i = 0; i < nb_maps; i++) {
+		ret = pci_vfio_get_region_info(vfio_dev_fd, &reg, i);
+		if (ret < 0) {
+			RTE_LOG(DEBUG, EAL, "%s cannot get device region info error %i (%s)\n",
+				dev->name, errno, strerror(errno));
+			return -1;
+		}
+
+		pdev->region[i].size = reg->size;
+		pdev->region[i].offset = reg->offset;
+
+		free(reg);
+	}
+
+	return 0;
+}
 
 static int
 pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 {
+	struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev);
 	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+	struct vfio_region_info *reg = NULL;
 	char pci_addr[PATH_MAX] = {0};
 	int vfio_dev_fd;
 	struct rte_pci_addr *loc = &dev->addr;
@@ -767,11 +838,22 @@  pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 	/* map BARs */
 	maps = vfio_res->maps;
 
+	ret = pci_vfio_get_region_info(vfio_dev_fd, &reg,
+		VFIO_PCI_CONFIG_REGION_INDEX);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "%s cannot get device region info error %i (%s)\n",
+			dev->name, errno, strerror(errno));
+		goto err_vfio_res;
+	}
+	pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].size = reg->size;
+	pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].offset = reg->offset;
+	free(reg);
+
 	vfio_res->msix_table.bar_index = -1;
 	/* get MSI-X BAR, if any (we have to know where it is because we can't
 	 * easily mmap it when using VFIO)
 	 */
-	ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table);
+	ret = pci_vfio_get_msix_bar(dev, vfio_dev_fd, &vfio_res->msix_table);
 	if (ret < 0) {
 		RTE_LOG(ERR, EAL, "%s cannot get MSI-X BAR number!\n",
 				pci_addr);
@@ -792,7 +874,6 @@  pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 	}
 
 	for (i = 0; i < vfio_res->nb_maps; i++) {
-		struct vfio_region_info *reg = NULL;
 		void *bar_addr;
 
 		ret = pci_vfio_get_region_info(vfio_dev_fd, &reg, i);
@@ -803,8 +884,11 @@  pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 			goto err_vfio_res;
 		}
 
+		pdev->region[i].size = reg->size;
+		pdev->region[i].offset = reg->offset;
+
 		/* chk for io port region */
-		ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
+		ret = pci_vfio_is_ioport_bar(dev, vfio_dev_fd, i);
 		if (ret < 0) {
 			free(reg);
 			goto err_vfio_res;
@@ -916,6 +1000,10 @@  pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
 	if (ret)
 		return ret;
 
+	ret = pci_vfio_fill_regions(dev, vfio_dev_fd, &device_info);
+	if (ret)
+		return ret;
+
 	/* map BARs */
 	maps = vfio_res->maps;
 
@@ -1031,7 +1119,7 @@  pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
 	if (vfio_dev_fd < 0)
 		return -1;
 
-	if (pci_vfio_set_bus_master(vfio_dev_fd, false)) {
+	if (pci_vfio_set_bus_master(dev, vfio_dev_fd, false)) {
 		RTE_LOG(ERR, EAL, "%s cannot unset bus mastering for PCI device!\n",
 				pci_addr);
 		return -1;
@@ -1111,14 +1199,21 @@  int
 pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
 		    struct rte_pci_ioport *p)
 {
+	uint64_t size, offset;
+
 	if (bar < VFIO_PCI_BAR0_REGION_INDEX ||
 	    bar > VFIO_PCI_BAR5_REGION_INDEX) {
 		RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar);
 		return -1;
 	}
 
+	if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) {
+		RTE_LOG(ERR, EAL, "Cannot get offset of region %d.\n", bar);
+		return -1;
+	}
+
 	p->dev = dev;
-	p->base = VFIO_GET_REGION_ADDR(bar);
+	p->base = offset;
 	return 0;
 }
 
diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
index b564646e03..2d6991ccb7 100644
--- a/drivers/bus/pci/private.h
+++ b/drivers/bus/pci/private.h
@@ -13,6 +13,8 @@ 
 #include <rte_os_shim.h>
 #include <rte_pci.h>
 
+#define RTE_MAX_PCI_REGIONS    9
+
 /*
  * Convert struct rte_pci_device to struct rte_pci_device_internal
  */
@@ -42,8 +44,15 @@  extern struct rte_pci_bus rte_pci_bus;
 struct rte_pci_driver;
 struct rte_pci_device;
 
+struct rte_pci_region {
+	uint64_t size;
+	uint64_t offset;
+};
+
 struct rte_pci_device_internal {
 	struct rte_pci_device device;
+	/* PCI regions provided by e.g. VFIO. */
+	struct rte_pci_region region[RTE_MAX_PCI_REGIONS];
 };
 
 /**
diff --git a/lib/eal/include/rte_vfio.h b/lib/eal/include/rte_vfio.h
index 7bdb8932b2..3487c4f2a2 100644
--- a/lib/eal/include/rte_vfio.h
+++ b/lib/eal/include/rte_vfio.h
@@ -38,7 +38,6 @@  extern "C" {
 #define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
 #define VFIO_GROUP_FMT "/dev/vfio/%u"
 #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
-#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
 #define VFIO_GET_REGION_IDX(x) (x >> 40)
 #define VFIO_NOIOMMU_MODE      \
 	"/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"