@@ -18,5 +18,6 @@ includes = [
sources = files(
'ntlog/ntlog.c',
'ntutil/nt_util.c',
+ 'ntnic_vfio.c',
'ntnic_ethdev.c',
)
@@ -8,10 +8,17 @@
#include <rte_bus_pci.h>
#include <ethdev_pci.h>
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_vfio.h>
+
#include "ntlog.h"
+#include "ntnic_vfio.h"
#include "nt_util.h"
+#define EXCEPTION_PATH_HID 0
+
static const struct rte_pci_id nthw_pci_id_map[] = {
{
.vendor_id = 0,
@@ -21,12 +28,24 @@ static const struct rte_pci_id nthw_pci_id_map[] = {
static int
nthw_pci_dev_init(struct rte_pci_device *pci_dev)
{
+ nt_vfio_init();
+
uint32_t n_port_mask = -1; /* All ports enabled by default */
int n_phy_ports;
NT_LOG_DBGX(DEBUG, NTNIC, "Dev %s PF #%i Init : %02x:%02x:%i\n", pci_dev->name,
pci_dev->addr.function, pci_dev->addr.bus, pci_dev->addr.devid,
pci_dev->addr.function);
+
+ /* Setup VFIO context */
+ int vfio = nt_vfio_setup(pci_dev);
+
+ if (vfio < 0) {
+ NT_LOG_DBGX(ERR, TNIC, "%s: vfio_setup error %d\n",
+ (pci_dev->name[0] ? pci_dev->name : "NA"), -1);
+ return -1;
+ }
+
n_phy_ports = 0;
for (int n_intf_no = 0; n_intf_no < n_phy_ports; n_intf_no++) {
@@ -67,6 +86,8 @@ static int
nthw_pci_dev_deinit(struct rte_eth_dev *eth_dev __rte_unused)
{
NT_LOG_DBGX(DEBUG, NTNIC, "PCI device deinitialization\n");
+
+ nt_vfio_remove(EXCEPTION_PATH_HID);
return 0;
}
@@ -131,3 +152,4 @@ static struct rte_pci_driver rte_nthw_pmd = {
};
RTE_PMD_REGISTER_PCI(net_ntnic, rte_nthw_pmd);
+RTE_PMD_REGISTER_KMOD_DEP(net_ntnic, "* vfio-pci");
new file mode 100644
@@ -0,0 +1,235 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Napatech A/S
+ */
+
+#include <sys/ioctl.h>
+
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_vfio.h>
+#include <rte_dev.h>
+#include <rte_bus_pci.h>
+#include <rte_spinlock.h>
+
+#include <ntlog.h>
+#include <nt_util.h>
+#include "ntnic_vfio.h"
+
+#define ONE_G_SIZE 0x40000000
+#define ONE_G_MASK (ONE_G_SIZE - 1)
+#define START_VF_IOVA 0x220000000000
+
+int
+nt_vfio_vf_num(const struct rte_pci_device *pdev)
+{
+ return ((pdev->addr.devid & 0x1f) << 3) + ((pdev->addr.function) & 0x7);
+}
+
+/* Internal API */
+struct vfio_dev {
+ int container_fd;
+ int group_fd;
+ int dev_fd;
+ uint64_t iova_addr;
+};
+
+static struct vfio_dev vfio_list[256];
+
+static struct vfio_dev *
+vfio_get(int vf_num)
+{
+ if (vf_num < 0 || vf_num > 255)
+ return NULL;
+
+ return &vfio_list[vf_num];
+}
+
+/* External API */
+int
+nt_vfio_setup(struct rte_pci_device *dev)
+{
+ char devname[RTE_DEV_NAME_MAX_LEN] = { 0 };
+ int iommu_group_num;
+ int vf_num;
+ struct vfio_dev *vfio;
+
+ NT_LOG(INF, NTNIC, "NT VFIO device setup %s\n", dev->name);
+
+ vf_num = nt_vfio_vf_num(dev);
+
+ vfio = vfio_get(vf_num);
+
+ if (vfio == NULL) {
+ NT_LOG(ERR, NTNIC, "VFIO device setup failed. Illegal device id\n");
+ return -1;
+ }
+
+ vfio->dev_fd = -1;
+ vfio->group_fd = -1;
+ vfio->container_fd = -1;
+ vfio->iova_addr = START_VF_IOVA;
+
+ rte_pci_device_name(&dev->addr, devname, RTE_DEV_NAME_MAX_LEN);
+ rte_vfio_get_group_num(rte_pci_get_sysfs_path(), devname, &iommu_group_num);
+
+ if (vf_num == 0) {
+ /* use default container for pf0 */
+ vfio->container_fd = RTE_VFIO_DEFAULT_CONTAINER_FD;
+
+ } else {
+ vfio->container_fd = rte_vfio_container_create();
+
+ if (vfio->container_fd < 0) {
+ NT_LOG(ERR, NTNIC,
+ "VFIO device setup failed. VFIO container creation failed.\n");
+ return -1;
+ }
+ }
+
+ vfio->group_fd = rte_vfio_container_group_bind(vfio->container_fd, iommu_group_num);
+
+ if (vfio->group_fd < 0) {
+ NT_LOG(ERR, NTNIC,
+ "VFIO device setup failed. VFIO container group bind failed.\n");
+ goto err;
+ }
+
+ if (vf_num > 0) {
+ if (rte_pci_map_device(dev)) {
+ NT_LOG(ERR, NTNIC,
+ "Map VFIO device failed. is the vfio-pci driver loaded?\n");
+ goto err;
+ }
+ }
+
+ vfio->dev_fd = rte_intr_dev_fd_get(dev->intr_handle);
+
+ NT_LOG(DBG, NTNIC,
+ "%s: VFIO id=%d, dev_fd=%d, container_fd=%d, group_fd=%d, iommu_group_num=%d\n",
+ dev->name, vf_num, vfio->dev_fd, vfio->container_fd, vfio->group_fd,
+ iommu_group_num);
+
+ return vf_num;
+
+err:
+
+ if (vfio->container_fd != RTE_VFIO_DEFAULT_CONTAINER_FD)
+ rte_vfio_container_destroy(vfio->container_fd);
+
+ return -1;
+}
+
+int
+nt_vfio_remove(int vf_num)
+{
+ struct vfio_dev *vfio;
+
+ NT_LOG(DBG, NTNIC, "NT VFIO device remove VF=%d\n", vf_num);
+
+ vfio = vfio_get(vf_num);
+
+ if (!vfio) {
+ NT_LOG(ERR, NTNIC, "VFIO device remove failed. Illegal device id\n");
+ return -1;
+ }
+
+ rte_vfio_container_destroy(vfio->container_fd);
+ return 0;
+}
+
+int
+nt_vfio_dma_map(int vf_num, void *virt_addr, uint64_t *iova_addr, uint64_t size)
+{
+ uint64_t gp_virt_base;
+ uint64_t gp_offset;
+
+ if (size == ONE_G_SIZE) {
+ gp_virt_base = (uint64_t)virt_addr & ~ONE_G_MASK;
+ gp_offset = (uint64_t)virt_addr & ONE_G_MASK;
+
+ } else {
+ gp_virt_base = (uint64_t)virt_addr;
+ gp_offset = 0;
+ }
+
+ struct vfio_dev *vfio;
+
+ vfio = vfio_get(vf_num);
+
+ if (vfio == NULL) {
+ NT_LOG(ERR, NTNIC, "VFIO MAP: VF number %d invalid\n", vf_num);
+ return -1;
+ }
+
+ NT_LOG(DBG, NTNIC,
+ "VFIO MMAP VF=%d VirtAddr=%p HPA=%" PRIX64 " VirtBase=%" PRIX64
+ " IOVA Addr=%" PRIX64 " size=%" PRIX64 "\n",
+ vf_num, virt_addr, rte_malloc_virt2iova(virt_addr), gp_virt_base, vfio->iova_addr,
+ size);
+
+ int res = rte_vfio_container_dma_map(vfio->container_fd, gp_virt_base, vfio->iova_addr,
+ size);
+
+ NT_LOG(DBG, NTNIC, "VFIO MMAP res %i, container_fd %i, vf_num %i\n", res,
+ vfio->container_fd, vf_num);
+
+ if (res) {
+ NT_LOG(ERR, NTNIC, "rte_vfio_container_dma_map failed: res %d\n", res);
+ return -1;
+ }
+
+ *iova_addr = vfio->iova_addr + gp_offset;
+
+ vfio->iova_addr += ONE_G_SIZE;
+
+ return 0;
+}
+
+int
+nt_vfio_dma_unmap(int vf_num, void *virt_addr, uint64_t iova_addr, uint64_t size)
+{
+ uint64_t gp_virt_base;
+ struct vfio_dev *vfio;
+
+ if (size == ONE_G_SIZE) {
+ uint64_t gp_offset;
+ gp_virt_base = (uint64_t)virt_addr & ~ONE_G_MASK;
+ gp_offset = (uint64_t)virt_addr & ONE_G_MASK;
+ iova_addr -= gp_offset;
+
+ } else {
+ gp_virt_base = (uint64_t)virt_addr;
+ }
+
+ vfio = vfio_get(vf_num);
+
+ if (vfio == NULL) {
+ NT_LOG(ERR, NTNIC, "VFIO UNMAP: VF number %d invalid\n", vf_num);
+ return -1;
+ }
+
+ if (vfio->container_fd == -1)
+ return 0;
+
+ int res = rte_vfio_container_dma_unmap(vfio->container_fd, gp_virt_base, iova_addr, size);
+
+ if (res != 0) {
+ NT_LOG(ERR, NTNIC,
+ "VFIO UNMMAP FAILED! res %i, container_fd %i, vf_num %i, virt_base=%" PRIX64
+ ", IOVA=%" PRIX64 ", size=%" PRIX64 "\n",
+ res, vfio->container_fd, vf_num, gp_virt_base, iova_addr, size);
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+nt_vfio_init(void)
+{
+ struct nt_util_vfio_impl s = { .vfio_dma_map = nt_vfio_dma_map,
+ .vfio_dma_unmap = nt_vfio_dma_unmap
+ };
+ nt_util_vfio_init(&s);
+}
new file mode 100644
@@ -0,0 +1,29 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Napatech A/S
+ */
+
+#ifndef _NTNIC_VFIO_H_
+#define _NTNIC_VFIO_H_
+
+#include <rte_dev.h>
+#include <rte_bus_pci.h>
+#include <ethdev_pci.h>
+
+void
+nt_vfio_init(void);
+
+int
+nt_vfio_setup(struct rte_pci_device *dev);
+int
+nt_vfio_remove(int vf_num);
+
+int
+nt_vfio_dma_map(int vf_num, void *virt_addr, uint64_t *iova_addr, uint64_t size);
+int
+nt_vfio_dma_unmap(int vf_num, void *virt_addr, uint64_t iova_addr, uint64_t size);
+
+/* Find device (PF/VF) number from device address */
+int
+nt_vfio_vf_num(const struct rte_pci_device *dev);
+#endif /* _NTNIC_VFIO_H_ */
@@ -15,6 +15,8 @@
#include "ntlog.h"
#include "nt_util.h"
+static struct nt_util_vfio_impl vfio_cb;
+
/* uses usleep which schedules out the calling thread */
void nt_os_wait_usec(int val)
{
@@ -31,3 +33,66 @@ uint64_t nt_util_align_size(uint64_t size)
{
return 1 << rte_log2_u64(size);
}
+
+void nt_util_vfio_init(struct nt_util_vfio_impl *impl)
+{
+ vfio_cb = *impl;
+}
+
+struct nt_dma_s *nt_dma_alloc(uint64_t size, uint64_t align, int numa)
+{
+ int res;
+ struct nt_dma_s *vfio_addr;
+
+ vfio_addr = rte_malloc(NULL, sizeof(struct nt_dma_s), 0);
+
+ if (!vfio_addr) {
+ NT_LOG(ERR, GENERAL, "VFIO rte_malloc failed\n");
+ return NULL;
+ }
+
+ void *addr = rte_malloc_socket(NULL, size, align, numa);
+
+ if (!addr) {
+ rte_free(vfio_addr);
+ NT_LOG(ERR, GENERAL, "VFIO rte_malloc_socket failed\n");
+ return NULL;
+ }
+
+ res = vfio_cb.vfio_dma_map(0, addr, &vfio_addr->iova, nt_util_align_size(size));
+
+ if (res != 0) {
+ rte_free(addr);
+ rte_free(vfio_addr);
+ NT_LOG(ERR, GENERAL, "VFIO nt_dma_map failed\n");
+ return NULL;
+ }
+
+ vfio_addr->addr = (uint64_t)addr;
+ vfio_addr->size = nt_util_align_size(size);
+
+ NT_LOG(DBG, GENERAL,
+ "VFIO DMA alloc addr=%" PRIX64 ", iova=%" PRIX64
+ ", size=%" PRIX64 "align=0x%" PRIX64 "\n",
+ vfio_addr->addr, vfio_addr->iova, vfio_addr->size, align);
+
+ return vfio_addr;
+}
+
+void nt_dma_free(struct nt_dma_s *vfio_addr)
+{
+ NT_LOG(DBG, GENERAL, "VFIO DMA free addr=%" PRIX64 ", iova=%" PRIX64 ", size=%" PRIX64 "\n",
+ vfio_addr->addr, vfio_addr->iova, vfio_addr->size);
+
+ int res = vfio_cb.vfio_dma_unmap(0, (void *)vfio_addr->addr, vfio_addr->iova,
+ vfio_addr->size);
+
+ if (res != 0) {
+ NT_LOG(WRN, GENERAL,
+ "VFIO DMA free FAILED addr=%" PRIX64 ", iova=%" PRIX64 ", size=%" PRIX64 "\n",
+ vfio_addr->addr, vfio_addr->iova, vfio_addr->size);
+ }
+
+ rte_free((void *)(vfio_addr->addr));
+ rte_free(vfio_addr);
+}