[v10,05/21] net/ntnic: add VFIO module

Message ID 20240717133313.3104239-5-sil-plv@napatech.com (mailing list archive)
State Accepted, archived
Delegated to: Ferruh Yigit
Headers
Series [v10,01/21] net/ntnic: add ethdev and makes PMD available |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Serhii Iliushyk July 17, 2024, 1:32 p.m. UTC
Adds VFIO functionality and the DMA it requires.
The VFIO context is initialized during ntnic ethdev startup.

Signed-off-by: Serhii Iliushyk <sil-plv@napatech.com>
---
v10
* Use 8 spaces as indentation in meson
---
 drivers/net/ntnic/meson.build      |   1 +
 drivers/net/ntnic/ntnic_ethdev.c   |  22 +++
 drivers/net/ntnic/ntnic_vfio.c     | 235 +++++++++++++++++++++++++++++
 drivers/net/ntnic/ntnic_vfio.h     |  29 ++++
 drivers/net/ntnic/ntutil/nt_util.c |  65 ++++++++
 5 files changed, 352 insertions(+)
 create mode 100644 drivers/net/ntnic/ntnic_vfio.c
 create mode 100644 drivers/net/ntnic/ntnic_vfio.h
  

Patch

diff --git a/drivers/net/ntnic/meson.build b/drivers/net/ntnic/meson.build
index 6f645320b9..deeb0aca09 100644
--- a/drivers/net/ntnic/meson.build
+++ b/drivers/net/ntnic/meson.build
@@ -18,5 +18,6 @@  includes = [
 sources = files(
         'ntlog/ntlog.c',
         'ntutil/nt_util.c',
+        'ntnic_vfio.c',
         'ntnic_ethdev.c',
 )
diff --git a/drivers/net/ntnic/ntnic_ethdev.c b/drivers/net/ntnic/ntnic_ethdev.c
index 02b55e2780..b838eb4d7a 100644
--- a/drivers/net/ntnic/ntnic_ethdev.c
+++ b/drivers/net/ntnic/ntnic_ethdev.c
@@ -8,10 +8,17 @@ 
 #include <rte_bus_pci.h>
 #include <ethdev_pci.h>
 
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_vfio.h>
+
 #include "ntlog.h"
 
+#include "ntnic_vfio.h"
 #include "nt_util.h"
 
+#define EXCEPTION_PATH_HID 0
+
 static const struct rte_pci_id nthw_pci_id_map[] = {
 	{
 		.vendor_id = 0,
@@ -21,12 +28,24 @@  static const struct rte_pci_id nthw_pci_id_map[] = {
 static int
 nthw_pci_dev_init(struct rte_pci_device *pci_dev)
 {
+	nt_vfio_init();
+
 	uint32_t n_port_mask = -1;	/* All ports enabled by default */
 	int n_phy_ports;
 	NT_LOG_DBGX(DEBUG, NTNIC, "Dev %s PF #%i Init : %02x:%02x:%i\n", pci_dev->name,
 		pci_dev->addr.function, pci_dev->addr.bus, pci_dev->addr.devid,
 		pci_dev->addr.function);
 
+
+	/* Setup VFIO context */
+	int vfio = nt_vfio_setup(pci_dev);
+
+	if (vfio < 0) {
+		NT_LOG_DBGX(ERR, TNIC, "%s: vfio_setup error %d\n",
+			(pci_dev->name[0] ? pci_dev->name : "NA"), -1);
+		return -1;
+	}
+
 	n_phy_ports = 0;
 
 	for (int n_intf_no = 0; n_intf_no < n_phy_ports; n_intf_no++) {
@@ -67,6 +86,8 @@  static int
 nthw_pci_dev_deinit(struct rte_eth_dev *eth_dev __rte_unused)
 {
 	NT_LOG_DBGX(DEBUG, NTNIC, "PCI device deinitialization\n");
+
+	nt_vfio_remove(EXCEPTION_PATH_HID);
 	return 0;
 }
 
@@ -131,3 +152,4 @@  static struct rte_pci_driver rte_nthw_pmd = {
 };
 
 RTE_PMD_REGISTER_PCI(net_ntnic, rte_nthw_pmd);
+RTE_PMD_REGISTER_KMOD_DEP(net_ntnic, "* vfio-pci");
diff --git a/drivers/net/ntnic/ntnic_vfio.c b/drivers/net/ntnic/ntnic_vfio.c
new file mode 100644
index 0000000000..f4433152b7
--- /dev/null
+++ b/drivers/net/ntnic/ntnic_vfio.c
@@ -0,0 +1,235 @@ 
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Napatech A/S
+ */
+
+#include <sys/ioctl.h>
+
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_vfio.h>
+#include <rte_dev.h>
+#include <rte_bus_pci.h>
+#include <rte_spinlock.h>
+
+#include <ntlog.h>
+#include <nt_util.h>
+#include "ntnic_vfio.h"
+
+#define ONE_G_SIZE 0x40000000
+#define ONE_G_MASK (ONE_G_SIZE - 1)
+#define START_VF_IOVA 0x220000000000
+
+int
+nt_vfio_vf_num(const struct rte_pci_device *pdev)
+{
+	return ((pdev->addr.devid & 0x1f) << 3) + ((pdev->addr.function) & 0x7);
+}
+
+/* Internal API */
+struct vfio_dev {
+	int container_fd;
+	int group_fd;
+	int dev_fd;
+	uint64_t iova_addr;
+};
+
+static struct vfio_dev vfio_list[256];
+
+static struct vfio_dev *
+vfio_get(int vf_num)
+{
+	if (vf_num < 0 || vf_num > 255)
+		return NULL;
+
+	return &vfio_list[vf_num];
+}
+
+/* External API */
+int
+nt_vfio_setup(struct rte_pci_device *dev)
+{
+	char devname[RTE_DEV_NAME_MAX_LEN] = { 0 };
+	int iommu_group_num;
+	int vf_num;
+	struct vfio_dev *vfio;
+
+	NT_LOG(INF, NTNIC, "NT VFIO device setup %s\n", dev->name);
+
+	vf_num = nt_vfio_vf_num(dev);
+
+	vfio = vfio_get(vf_num);
+
+	if (vfio == NULL) {
+		NT_LOG(ERR, NTNIC, "VFIO device setup failed. Illegal device id\n");
+		return -1;
+	}
+
+	vfio->dev_fd = -1;
+	vfio->group_fd = -1;
+	vfio->container_fd = -1;
+	vfio->iova_addr = START_VF_IOVA;
+
+	rte_pci_device_name(&dev->addr, devname, RTE_DEV_NAME_MAX_LEN);
+	rte_vfio_get_group_num(rte_pci_get_sysfs_path(), devname, &iommu_group_num);
+
+	if (vf_num == 0) {
+		/* use default container for pf0 */
+		vfio->container_fd = RTE_VFIO_DEFAULT_CONTAINER_FD;
+
+	} else {
+		vfio->container_fd = rte_vfio_container_create();
+
+		if (vfio->container_fd < 0) {
+			NT_LOG(ERR, NTNIC,
+				"VFIO device setup failed. VFIO container creation failed.\n");
+			return -1;
+		}
+	}
+
+	vfio->group_fd = rte_vfio_container_group_bind(vfio->container_fd, iommu_group_num);
+
+	if (vfio->group_fd < 0) {
+		NT_LOG(ERR, NTNIC,
+			"VFIO device setup failed. VFIO container group bind failed.\n");
+		goto err;
+	}
+
+	if (vf_num > 0) {
+		if (rte_pci_map_device(dev)) {
+			NT_LOG(ERR, NTNIC,
+				"Map VFIO device failed. is the vfio-pci driver loaded?\n");
+			goto err;
+		}
+	}
+
+	vfio->dev_fd = rte_intr_dev_fd_get(dev->intr_handle);
+
+	NT_LOG(DBG, NTNIC,
+		"%s: VFIO id=%d, dev_fd=%d, container_fd=%d, group_fd=%d, iommu_group_num=%d\n",
+		dev->name, vf_num, vfio->dev_fd, vfio->container_fd, vfio->group_fd,
+		iommu_group_num);
+
+	return vf_num;
+
+err:
+
+	if (vfio->container_fd != RTE_VFIO_DEFAULT_CONTAINER_FD)
+		rte_vfio_container_destroy(vfio->container_fd);
+
+	return -1;
+}
+
+int
+nt_vfio_remove(int vf_num)
+{
+	struct vfio_dev *vfio;
+
+	NT_LOG(DBG, NTNIC, "NT VFIO device remove VF=%d\n", vf_num);
+
+	vfio = vfio_get(vf_num);
+
+	if (!vfio) {
+		NT_LOG(ERR, NTNIC, "VFIO device remove failed. Illegal device id\n");
+		return -1;
+	}
+
+	rte_vfio_container_destroy(vfio->container_fd);
+	return 0;
+}
+
+int
+nt_vfio_dma_map(int vf_num, void *virt_addr, uint64_t *iova_addr, uint64_t size)
+{
+	uint64_t gp_virt_base;
+	uint64_t gp_offset;
+
+	if (size == ONE_G_SIZE) {
+		gp_virt_base = (uint64_t)virt_addr & ~ONE_G_MASK;
+		gp_offset = (uint64_t)virt_addr & ONE_G_MASK;
+
+	} else {
+		gp_virt_base = (uint64_t)virt_addr;
+		gp_offset = 0;
+	}
+
+	struct vfio_dev *vfio;
+
+	vfio = vfio_get(vf_num);
+
+	if (vfio == NULL) {
+		NT_LOG(ERR, NTNIC, "VFIO MAP: VF number %d invalid\n", vf_num);
+		return -1;
+	}
+
+	NT_LOG(DBG, NTNIC,
+		"VFIO MMAP VF=%d VirtAddr=%p HPA=%" PRIX64 " VirtBase=%" PRIX64
+		" IOVA Addr=%" PRIX64 " size=%" PRIX64 "\n",
+		vf_num, virt_addr, rte_malloc_virt2iova(virt_addr), gp_virt_base, vfio->iova_addr,
+		size);
+
+	int res = rte_vfio_container_dma_map(vfio->container_fd, gp_virt_base, vfio->iova_addr,
+			size);
+
+	NT_LOG(DBG, NTNIC, "VFIO MMAP res %i, container_fd %i, vf_num %i\n", res,
+		vfio->container_fd, vf_num);
+
+	if (res) {
+		NT_LOG(ERR, NTNIC, "rte_vfio_container_dma_map failed: res %d\n", res);
+		return -1;
+	}
+
+	*iova_addr = vfio->iova_addr + gp_offset;
+
+	vfio->iova_addr += ONE_G_SIZE;
+
+	return 0;
+}
+
+int
+nt_vfio_dma_unmap(int vf_num, void *virt_addr, uint64_t iova_addr, uint64_t size)
+{
+	uint64_t gp_virt_base;
+	struct vfio_dev *vfio;
+
+	if (size == ONE_G_SIZE) {
+		uint64_t gp_offset;
+		gp_virt_base = (uint64_t)virt_addr & ~ONE_G_MASK;
+		gp_offset = (uint64_t)virt_addr & ONE_G_MASK;
+		iova_addr -= gp_offset;
+
+	} else {
+		gp_virt_base = (uint64_t)virt_addr;
+	}
+
+	vfio = vfio_get(vf_num);
+
+	if (vfio == NULL) {
+		NT_LOG(ERR, NTNIC, "VFIO UNMAP: VF number %d invalid\n", vf_num);
+		return -1;
+	}
+
+	if (vfio->container_fd == -1)
+		return 0;
+
+	int res = rte_vfio_container_dma_unmap(vfio->container_fd, gp_virt_base, iova_addr, size);
+
+	if (res != 0) {
+		NT_LOG(ERR, NTNIC,
+			"VFIO UNMMAP FAILED! res %i, container_fd %i, vf_num %i, virt_base=%" PRIX64
+			", IOVA=%" PRIX64 ", size=%" PRIX64 "\n",
+			res, vfio->container_fd, vf_num, gp_virt_base, iova_addr, size);
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+nt_vfio_init(void)
+{
+	struct nt_util_vfio_impl s = { .vfio_dma_map = nt_vfio_dma_map,
+		       .vfio_dma_unmap = nt_vfio_dma_unmap
+	};
+	nt_util_vfio_init(&s);
+}
diff --git a/drivers/net/ntnic/ntnic_vfio.h b/drivers/net/ntnic/ntnic_vfio.h
new file mode 100644
index 0000000000..69fef7923d
--- /dev/null
+++ b/drivers/net/ntnic/ntnic_vfio.h
@@ -0,0 +1,29 @@ 
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Napatech A/S
+ */
+
+#ifndef _NTNIC_VFIO_H_
+#define _NTNIC_VFIO_H_
+
+#include <rte_dev.h>
+#include <rte_bus_pci.h>
+#include <ethdev_pci.h>
+
+void
+nt_vfio_init(void);
+
+int
+nt_vfio_setup(struct rte_pci_device *dev);
+int
+nt_vfio_remove(int vf_num);
+
+int
+nt_vfio_dma_map(int vf_num, void *virt_addr, uint64_t *iova_addr, uint64_t size);
+int
+nt_vfio_dma_unmap(int vf_num, void *virt_addr, uint64_t iova_addr, uint64_t size);
+
+/* Find device (PF/VF) number from device address */
+int
+nt_vfio_vf_num(const struct rte_pci_device *dev);
+#endif	/* _NTNIC_VFIO_H_ */
diff --git a/drivers/net/ntnic/ntutil/nt_util.c b/drivers/net/ntnic/ntutil/nt_util.c
index 5395bf6993..53c39ef112 100644
--- a/drivers/net/ntnic/ntutil/nt_util.c
+++ b/drivers/net/ntnic/ntutil/nt_util.c
@@ -15,6 +15,8 @@ 
 #include "ntlog.h"
 #include "nt_util.h"
 
+static struct nt_util_vfio_impl vfio_cb;
+
 /* uses usleep which schedules out the calling thread */
 void nt_os_wait_usec(int val)
 {
@@ -31,3 +33,66 @@  uint64_t nt_util_align_size(uint64_t size)
 {
 	return 1 << rte_log2_u64(size);
 }
+
+void nt_util_vfio_init(struct nt_util_vfio_impl *impl)
+{
+	vfio_cb = *impl;
+}
+
+struct nt_dma_s *nt_dma_alloc(uint64_t size, uint64_t align, int numa)
+{
+	int res;
+	struct nt_dma_s *vfio_addr;
+
+	vfio_addr = rte_malloc(NULL, sizeof(struct nt_dma_s), 0);
+
+	if (!vfio_addr) {
+		NT_LOG(ERR, GENERAL, "VFIO rte_malloc failed\n");
+		return NULL;
+	}
+
+	void *addr = rte_malloc_socket(NULL, size, align, numa);
+
+	if (!addr) {
+		rte_free(vfio_addr);
+		NT_LOG(ERR, GENERAL, "VFIO rte_malloc_socket failed\n");
+		return NULL;
+	}
+
+	res = vfio_cb.vfio_dma_map(0, addr, &vfio_addr->iova, nt_util_align_size(size));
+
+	if (res != 0) {
+		rte_free(addr);
+		rte_free(vfio_addr);
+		NT_LOG(ERR, GENERAL, "VFIO nt_dma_map failed\n");
+		return NULL;
+	}
+
+	vfio_addr->addr = (uint64_t)addr;
+	vfio_addr->size = nt_util_align_size(size);
+
+	NT_LOG(DBG, GENERAL,
+		"VFIO DMA alloc addr=%" PRIX64 ", iova=%" PRIX64
+		", size=%" PRIX64 "align=0x%" PRIX64 "\n",
+		vfio_addr->addr, vfio_addr->iova, vfio_addr->size, align);
+
+	return vfio_addr;
+}
+
+void nt_dma_free(struct nt_dma_s *vfio_addr)
+{
+	NT_LOG(DBG, GENERAL, "VFIO DMA free addr=%" PRIX64 ", iova=%" PRIX64 ", size=%" PRIX64 "\n",
+		vfio_addr->addr, vfio_addr->iova, vfio_addr->size);
+
+	int res = vfio_cb.vfio_dma_unmap(0, (void *)vfio_addr->addr, vfio_addr->iova,
+			vfio_addr->size);
+
+	if (res != 0) {
+		NT_LOG(WRN, GENERAL,
+			"VFIO DMA free FAILED addr=%" PRIX64 ", iova=%" PRIX64 ", size=%" PRIX64 "\n",
+			vfio_addr->addr, vfio_addr->iova, vfio_addr->size);
+	}
+
+	rte_free((void *)(vfio_addr->addr));
+	rte_free(vfio_addr);
+}