[v3,5/8] emu/iavf: add resource management and internal logic of iavf

Message ID 20210114062512.45462-6-chenbo.xia@intel.com (mailing list archive)
State Changes Requested, archived
Delegated to: Thomas Monjalon
Headers
Series Introduce emudev library and iavf emudev driver |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Chenbo Xia Jan. 14, 2021, 6:25 a.m. UTC
This patch adds the allocation and release of device resources.
Device resources include PCI BARs' memory and interrupt related
resources. Device internal logic is also added.

Signed-off-by: Chenbo Xia <chenbo.xia@intel.com>
Signed-off-by: Xiuchun Lu <xiuchun.lu@intel.com>
---
 drivers/emu/iavf/iavf_emu.c       |   1 +
 drivers/emu/iavf/iavf_emudev.c    |  20 +
 drivers/emu/iavf/iavf_vfio_user.c | 683 +++++++++++++++++++++++++++++-
 drivers/emu/iavf/iavf_vfio_user.h |  41 ++
 drivers/emu/iavf/meson.build      |   8 +
 5 files changed, 750 insertions(+), 3 deletions(-)
  

Patch

diff --git a/drivers/emu/iavf/iavf_emu.c b/drivers/emu/iavf/iavf_emu.c
index 2f1513137c..7506849e42 100644
--- a/drivers/emu/iavf/iavf_emu.c
+++ b/drivers/emu/iavf/iavf_emu.c
@@ -19,6 +19,7 @@  iavf_emu_dev_close(struct rte_emudev *dev)
 	}
 
 	iavf = (struct iavf_emudev *)dev->priv_data;
+	iavf_emu_uninit_vfio_user(iavf);
 	iavf_emu_unregister_vfio_user(iavf);
 	iavf_emu_uninit_device(iavf);
 	dev->priv_data = NULL;
diff --git a/drivers/emu/iavf/iavf_emudev.c b/drivers/emu/iavf/iavf_emudev.c
index 6ba1cc2a89..74f4829e7f 100644
--- a/drivers/emu/iavf/iavf_emudev.c
+++ b/drivers/emu/iavf/iavf_emudev.c
@@ -180,10 +180,30 @@  rte_emu_iavf_probe(struct rte_vdev_device *dev)
 		goto err_reg;
 	}
 
+	ret = iavf_emu_init_vfio_user(iavf);
+	if (ret) {
+		EMU_IAVF_LOG(ERR,
+			"Emulated iavf failed to init vfio user.\n");
+		ret = -1;
+		goto err_init;
+	}
+
+	ret = iavf_emu_start_vfio_user(iavf);
+	if (ret) {
+		EMU_IAVF_LOG(ERR,
+			"Emulated iavf failed to start vfio user.\n");
+		ret = -1;
+		goto err_start;
+	}
+
 	edev->started = 1;
 	rte_kvargs_free(kvlist);
 	return 0;
 
+err_start:
+	iavf_emu_uninit_vfio_user(iavf);
+err_init:
+	iavf_emu_unregister_vfio_user(iavf);
 err_reg:
 	iavf_emu_uninit_device(iavf);
 err_ndev:
diff --git a/drivers/emu/iavf/iavf_vfio_user.c b/drivers/emu/iavf/iavf_vfio_user.c
index 0cc4c433c2..5c690978bd 100644
--- a/drivers/emu/iavf/iavf_vfio_user.c
+++ b/drivers/emu/iavf/iavf_vfio_user.c
@@ -2,13 +2,36 @@ 
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include <linux/pci.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
 #include <pthread.h>
+#include <sys/types.h>
 
 #include <rte_malloc.h>
+#include <rte_emudev.h>
+#include <rte_memcpy.h>
 
 #include "iavf_vfio_user.h"
 #include <iavf_type.h>
 
+#define STORE_LE16(addr, val)   (*(__u16 *)addr = val)
+#define STORE_LE32(addr, val)   (*(__u32 *)addr = val)
+
+#define IAVF_EMU_BAR0_SIZE 0x10000
+#define IAVF_EMU_BAR3_SIZE 0x1000
+#define IAVF_EMU_BAR_SIZE_MASK 0xffffffff
+#define IAVF_EMU_BAR_MASK(sz) (~(sz) + 1)
+#define IAVF_EMU_MSIX_TABLE_SIZE 0x20
+
+#define PCI_VENDOR_ID_INTEL 0x8086
+#define PCI_SUBDEVICE_ID 0x1100
+#define PCI_CLASS_ETHERNET 0x0200
+
 struct iavf_emu_sock_list {
 	TAILQ_ENTRY(iavf_emu_sock_list) next;
 	struct rte_emudev *emu_dev;
@@ -52,10 +75,8 @@  iavf_emu_setup_irq(struct iavf_emudev *dev)
 	intr = dev->intr;
 	intr->intr_num = irq->irq_info[VFIO_PCI_MSIX_IRQ_INDEX].count;
 
-	for (i = 0; i < count; i++) {
+	for (i = 0; i < count; i++)
 		intr->info[i].fd = fds[i];
-		intr->info[i].enable = 0;
-	}
 
 	rte_free(fds);
 
@@ -199,6 +220,591 @@  iavf_emu_reset_all_resources(struct iavf_emudev *dev)
 	iavf_emu_reset_regions(dev);
 }
 
+static int
+iavf_emu_init_dev(struct iavf_emudev *dev)
+{
+	struct iavf_emu_vfio_user *vfio;
+	struct vfio_device_info *dev_info;
+	struct rte_vfio_user_regions *reg;
+	struct rte_vfio_user_irq_info *irq;
+	struct vfio_region_info_cap_sparse_mmap *sparse;
+	int ret;
+	uint32_t i, j;
+
+	vfio = rte_zmalloc_socket("vfio", sizeof(*vfio), 0, dev->numa_node);
+	if (!vfio) {
+		EMU_IAVF_LOG(ERR, "Failed to alloc iavf_emu_vfio_user\n");
+		ret = -1;
+		goto exit;
+	}
+
+	dev_info = rte_zmalloc_socket("vfio_dev_info",
+		sizeof(*dev_info), 0, dev->numa_node);
+	if (!dev_info) {
+		EMU_IAVF_LOG(ERR, "Failed to alloc vfio dev_info\n");
+		ret = -1;
+		goto err_info;
+	}
+	dev_info->argsz = sizeof(*dev_info);
+	dev_info->flags = VFIO_DEVICE_FLAGS_PCI | VFIO_DEVICE_FLAGS_RESET;
+	dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+	dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+
+	reg = rte_zmalloc_socket("vfio_user_regions",
+		sizeof(*reg) + dev_info->num_regions *
+		sizeof(struct rte_vfio_user_reg_info), 0, dev->numa_node);
+	if (!reg) {
+		EMU_IAVF_LOG(ERR, "Failed to alloc vfio_user_regions\n");
+		ret = -1;
+		goto err_reg;
+	}
+	reg->reg_num = dev_info->num_regions;
+
+	for (i = 0; i < reg->reg_num; i++) {
+		struct rte_vfio_user_reg_info *vinfo = &reg->reg_info[i];
+		size_t sz = sizeof(struct vfio_region_info);
+
+		/* BAR0 has two sparse mmap area */
+		if (i == VFIO_PCI_BAR0_REGION_INDEX)
+			sz += sizeof(*sparse) + 2 * sizeof(*sparse->areas);
+
+		vinfo->info = rte_zmalloc_socket("vfio_region_info",
+			sz, 0, dev->numa_node);
+		if (!vinfo->info) {
+			EMU_IAVF_LOG(ERR, "Failed to alloc region info "
+				"for region %d\n", i);
+			ret = -1;
+			goto err_reg_alloc;
+		}
+
+		vinfo->info->index = i;
+
+		switch (i) {
+		case VFIO_PCI_CONFIG_REGION_INDEX:
+			vinfo->info->argsz = sz;
+			vinfo->info->offset = 0;
+			vinfo->info->size = IAVF_EMU_CFG_SPACE_SIZE;
+			vinfo->info->flags = VFIO_REGION_INFO_FLAG_READ |
+				       VFIO_REGION_INFO_FLAG_WRITE;
+			break;
+		case VFIO_PCI_BAR0_REGION_INDEX:
+			vinfo->info->argsz = sz;
+			vinfo->info->offset = 0;
+			vinfo->info->size = IAVF_EMU_BAR0_SIZE;
+			vinfo->info->flags  = VFIO_REGION_INFO_FLAG_READ |
+				       VFIO_REGION_INFO_FLAG_WRITE |
+				       VFIO_REGION_INFO_FLAG_MMAP |
+				       VFIO_REGION_INFO_FLAG_CAPS;
+			vinfo->info->cap_offset =
+						sizeof(struct vfio_region_info);
+
+			sparse = (struct vfio_region_info_cap_sparse_mmap *)
+						((uint8_t *)vinfo->info +
+						vinfo->info->cap_offset);
+			sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
+			sparse->header.version = 1;
+			sparse->nr_areas = 2;
+			sparse->areas[0].offset = 0;
+			sparse->areas[0].size = 0x3000;
+			sparse->areas[1].offset = 0x6000;
+			sparse->areas[1].size = IAVF_EMU_BAR0_SIZE - 0x6000;
+
+			break;
+		case VFIO_PCI_BAR3_REGION_INDEX:
+			vinfo->info->argsz = sz;
+			vinfo->info->offset = 0;
+			vinfo->info->size = IAVF_EMU_BAR3_SIZE;
+			vinfo->info->flags  = VFIO_REGION_INFO_FLAG_READ |
+				       VFIO_REGION_INFO_FLAG_WRITE |
+				       VFIO_REGION_INFO_FLAG_MMAP;
+			break;
+		default:
+			vinfo->info->argsz = sz;
+			vinfo->info->offset = 0;
+			vinfo->info->size = 0;
+			vinfo->info->flags = 0;
+		}
+	}
+
+	irq = rte_zmalloc_socket("vfio_user_irq_info", sizeof(*irq) +
+		VFIO_PCI_NUM_IRQS * sizeof(struct vfio_irq_info),
+		0, dev->numa_node);
+	if (!irq) {
+		EMU_IAVF_LOG(ERR, "Failed to alloc vfio_user_irqs\n");
+		ret = -1;
+		goto err_irq;
+	}
+	irq->irq_num = VFIO_PCI_NUM_IRQS;
+
+	for (i = 0; i < VFIO_PCI_NUM_IRQS; i++) {
+		irq->irq_info[i].index = i;
+		irq->irq_info[i].flags =
+			VFIO_IRQ_INFO_EVENTFD | VFIO_IRQ_INFO_NORESIZE;
+		if (i == VFIO_PCI_MSIX_IRQ_INDEX)
+			irq->irq_info[i].count =
+				IAVF_EMU_MSIX_TABLE_SIZE + 1;
+		else if (i == VFIO_PCI_ERR_IRQ_INDEX)
+			irq->irq_info[i].count = 1;
+		else
+			irq->irq_info[i].count = 0;
+	}
+
+	vfio->dev_info = dev_info;
+	vfio->reg = reg;
+	vfio->irq = irq;
+	dev->vfio = vfio;
+
+	return 0;
+
+err_irq:
+err_reg_alloc:
+	for (j = 0; j < i; j++)
+		rte_free(reg->reg_info[j].info);
+	rte_free(reg);
+err_reg:
+	rte_free(dev_info);
+err_info:
+	rte_free(vfio);
+exit:
+	return ret;
+}
+
+static int
+iavf_emu_uninit_dev(struct iavf_emudev *dev)
+{
+	struct iavf_emu_vfio_user *vfio;
+	struct rte_vfio_user_regions *reg;
+	uint32_t i;
+
+	if (!dev->vfio)
+		return -1;
+
+	vfio = dev->vfio;
+	rte_free(vfio->dev_info);
+
+	reg = vfio->reg;
+	for (i = 0; i < reg->reg_num; i++)
+		rte_free(reg->reg_info[i].info);
+	rte_free(reg);
+
+	rte_free(vfio->irq);
+	rte_free(vfio);
+
+	return 0;
+}
+
+static int
+handle_pci_cmd_write(struct iavf_emu_pci_hdr *hdr,
+	char *buf, size_t count)
+{
+	/* Below are all R/W bits in command register */
+	uint16_t rw_bitmask = PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+		PCI_COMMAND_MASTER | PCI_COMMAND_PARITY |
+		PCI_COMMAND_SERR | PCI_COMMAND_INTX_DISABLE;
+	uint16_t val;
+
+	if (count != 2) {
+		EMU_IAVF_LOG(ERR, "Wrong write count (%lu) for PCI_COMMAND\n",
+			count);
+		return -1;
+	}
+
+	val = *(uint16_t *)buf;
+	/* Only write the R/W bits */
+	hdr->cmd = (rw_bitmask & val) | (~rw_bitmask & hdr->cmd);
+
+	return 2;
+}
+
+static int
+handle_pci_status_write(struct iavf_emu_pci_hdr *hdr,
+	char *buf, size_t count)
+{
+	/* Below are all write-1-to-clear bits in status register */
+	uint16_t rw1c_bitmask = PCI_STATUS_PARITY |
+		PCI_STATUS_SIG_TARGET_ABORT |
+		PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT |
+		PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY;
+	uint16_t val;
+
+	if (count != 2) {
+		EMU_IAVF_LOG(ERR, "Wrong write count (%lu) for PCI_STATUS\n",
+			count);
+		return -1;
+	}
+
+	val = *(uint16_t *)buf;
+	/* Clear the write-1-to-clear bits*/
+	hdr->status = ~(rw1c_bitmask & val) & hdr->status;
+
+	return 2;
+}
+
+static int
+handle_pci_bar_write(struct iavf_emu_pci_hdr *hdr,
+	char *buf, size_t count, loff_t pos)
+{
+	uint32_t val, size;
+	uint8_t idx;
+
+	if (count != 4) {
+		EMU_IAVF_LOG(ERR, "Wrong write count (%lu) for "
+			"Base Address Register\n",
+			count);
+		return -1;
+	}
+
+	val = *(uint32_t *)buf;
+
+	if (pos == PCI_BASE_ADDRESS_0)
+		size = IAVF_EMU_BAR0_SIZE;
+	else if (pos == PCI_BASE_ADDRESS_3)
+		size = IAVF_EMU_BAR3_SIZE;
+	else
+		size = 0;
+
+	if (val == IAVF_EMU_BAR_SIZE_MASK)
+		val &= IAVF_EMU_BAR_MASK(size);
+
+	idx = (pos - PCI_BASE_ADDRESS_0) / 0x4;
+	hdr->bar[idx] |= val & ~PCI_BASE_ADDRESS_MEM_MASK;
+
+	return 4;
+}
+
+static int
+handle_cfg_write(struct iavf_emu_pci_hdr *hdr,
+	char *buf, size_t count, loff_t pos)
+{
+	int ret = count;
+
+	switch (pos) {
+	case PCI_COMMAND:
+		ret = handle_pci_cmd_write(hdr, buf, count);
+		break;
+	case PCI_STATUS:
+		ret = handle_pci_status_write(hdr, buf, count);
+		break;
+	case PCI_INTERRUPT_LINE:
+		if (count != 1) {
+			EMU_IAVF_LOG(ERR, "Wrong write count (%lu)"
+				"for PCI_INTERRUPT_LINE\n",
+				count);
+			return -1;
+		}
+		hdr->intrl = *(uint8_t *)buf;
+		ret = 1;
+		break;
+	case PCI_BASE_ADDRESS_0:
+		/* FALLTHROUGH */
+	case PCI_BASE_ADDRESS_1:
+		/* FALLTHROUGH */
+	case PCI_BASE_ADDRESS_2:
+		/* FALLTHROUGH */
+	case PCI_BASE_ADDRESS_3:
+		/* FALLTHROUGH */
+	case PCI_BASE_ADDRESS_4:
+		/* FALLTHROUGH */
+	case PCI_BASE_ADDRESS_5:
+		ret = handle_pci_bar_write(hdr, buf, count, pos);
+		break;
+	default:
+		EMU_IAVF_LOG(INFO, "Write request for cfg (pos: %ld) ignored\n",
+			pos);
+		break;
+	}
+
+	return ret;
+}
+
+static ssize_t
+iavf_emu_cfg_rw(struct rte_vfio_user_reg_info *reg, char *buf,
+	size_t count, loff_t pos, bool iswrite)
+{
+	struct iavf_emu_cfg_space *cfg;
+	char *reg_pos;
+	int ret = 0;
+
+	if (!reg->base) {
+		EMU_IAVF_LOG(ERR, "Config space not exist\n");
+		return -EFAULT;
+	}
+
+	if (pos + count > reg->info->size) {
+		EMU_IAVF_LOG(ERR, "Access exceeds config space size\n");
+		return -EINVAL;
+	}
+
+	cfg = (struct iavf_emu_cfg_space *)reg->base;
+	reg_pos = (char *)reg->base + pos;
+
+	if (!iswrite) {
+		rte_memcpy(buf, reg_pos, count);
+		ret = count;
+	} else {
+		ret = handle_cfg_write(&cfg->hdr, buf, count, pos);
+		if (ret < 0) {
+			EMU_IAVF_LOG(ERR, "Failed to write cfg space\n");
+			return -EINVAL;
+		}
+	}
+
+	return ret;
+}
+
+static int
+iavf_emu_init_cfg_space(struct rte_vfio_user_reg_info *vinfo,
+	unsigned int numa_node)
+{
+	char *v_cfg;
+
+	vinfo->base = rte_zmalloc_socket("cfg space",
+		IAVF_EMU_CFG_SPACE_SIZE,
+		0, numa_node);
+	if (!vinfo->base) {
+		EMU_IAVF_LOG(ERR, "Failed to alloc cfg space\n");
+		return -1;
+	}
+	vinfo->rw = iavf_emu_cfg_rw;
+	vinfo->fd = -1;
+	vinfo->priv = NULL;
+
+	v_cfg = (char *)vinfo->base;
+
+	STORE_LE16((uint16_t *)&v_cfg[PCI_VENDOR_ID],
+		PCI_VENDOR_ID_INTEL);
+	STORE_LE16((uint16_t *)&v_cfg[PCI_DEVICE_ID],
+		IAVF_DEV_ID_ADAPTIVE_VF);
+	STORE_LE16((uint16_t *)&v_cfg[PCI_SUBSYSTEM_VENDOR_ID],
+		PCI_VENDOR_ID_INTEL);
+	STORE_LE16((uint16_t *)&v_cfg[PCI_SUBSYSTEM_ID],
+		   PCI_SUBDEVICE_ID);
+
+	STORE_LE16((uint16_t *)&v_cfg[PCI_COMMAND],
+		   PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+	STORE_LE16((uint16_t *)&v_cfg[PCI_CLASS_DEVICE],
+		   PCI_CLASS_ETHERNET);
+	v_cfg[PCI_CLASS_REVISION] = 0x01;
+
+	STORE_LE16((uint16_t *)&v_cfg[PCI_STATUS],
+			   PCI_STATUS_CAP_LIST);
+
+	STORE_LE32((uint32_t *)&v_cfg[PCI_BASE_ADDRESS_0],
+		   PCI_BASE_ADDRESS_SPACE_MEMORY |
+		   PCI_BASE_ADDRESS_MEM_TYPE_32	 |
+		   PCI_BASE_ADDRESS_MEM_PREFETCH);
+
+	STORE_LE32((uint32_t *)&v_cfg[PCI_BASE_ADDRESS_3],
+		   PCI_BASE_ADDRESS_SPACE_MEMORY |
+		   PCI_BASE_ADDRESS_MEM_TYPE_32);
+
+	STORE_LE16((uint16_t *)&v_cfg[PCI_CAPABILITY_LIST],
+			   0x70);
+	STORE_LE16((uint16_t *)&v_cfg[0x70],
+			   PCI_CAP_ID_MSIX);
+
+	STORE_LE16((uint16_t *)&v_cfg[0x70 + PCI_MSIX_FLAGS],
+			(IAVF_EMU_MSIX_TABLE_SIZE & PCI_MSIX_FLAGS_QSIZE) |
+			PCI_MSIX_FLAGS_ENABLE);
+
+	STORE_LE32((uint32_t *)&v_cfg[0x70 + PCI_MSIX_TABLE],
+			   (0x3 & PCI_MSIX_TABLE_BIR));
+
+	STORE_LE32((uint32_t *)&v_cfg[0x70 + PCI_MSIX_PBA],
+			(0x3 & PCI_MSIX_PBA_BIR) |
+			(0x100 & PCI_MSIX_PBA_OFFSET));
+
+	return 0;
+}
+
+static inline void
+iavf_emu_uninit_cfg_space(struct rte_vfio_user_reg_info *vinfo)
+{
+	rte_free(vinfo->base);
+	vinfo->rw = NULL;
+	vinfo->info->size = 0;
+	vinfo->fd = -1;
+}
+
+static ssize_t
+iavf_emu_bar0_rw(struct rte_vfio_user_reg_info *reg, char *buf,
+	size_t count, loff_t pos, bool iswrite)
+{
+	struct iavf_emudev *dev = (struct iavf_emudev *)reg->priv;
+	char *reg_pos;
+
+	if (!reg->base) {
+		EMU_IAVF_LOG(ERR, "BAR 0 does not exist\n");
+		return -EFAULT;
+	}
+
+	if (pos + count > reg->info->size) {
+		EMU_IAVF_LOG(ERR, "Access exceeds BAR 0 size\n");
+		return -EINVAL;
+	}
+
+	reg_pos = (char *)reg->base + pos;
+
+	if (!iswrite) {
+		rte_memcpy(buf, reg_pos, count);
+	} else {
+		int tmp;
+		uint32_t val;
+		int idx = -1;
+
+		if (count != 4)
+			return -EINVAL;
+
+		val = *(uint32_t *)buf;
+		/* Only handle interrupt enable/disable for now */
+		if (pos == IAVF_VFINT_DYN_CTL01) {
+			tmp = val & IAVF_VFINT_DYN_CTL01_INTENA_MASK;
+			idx = 0;
+		} else if ((pos >= IAVF_VFINT_DYN_CTLN1(0)) && pos <=
+			IAVF_VFINT_DYN_CTLN1(RTE_IAVF_EMU_MAX_INTR - 1)) {
+			tmp = val & IAVF_VFINT_DYN_CTLN1_INTENA_MASK;
+			idx = pos - IAVF_VFINT_DYN_CTLN1(0);
+			if (idx % 4)
+				return -EINVAL;
+			idx = idx / 4 + 1;
+		}
+
+		if (idx != -1 &&
+			tmp != dev->intr->info[idx].enable && dev->ready) {
+			dev->intr->info[idx].enable = tmp;
+			rte_wmb();
+			dev->ops->lock_dp(dev->edev, 1);
+			dev->ops->update_status(dev->edev);
+			dev->ops->lock_dp(dev->edev, 0);
+		}
+
+		rte_memcpy(reg_pos, buf, count);
+	}
+
+	return count;
+}
+
+static int
+iavf_emu_alloc_reg(struct iavf_emudev *dev)
+{
+	struct rte_vfio_user_regions *reg = dev->vfio->reg;
+	struct rte_vfio_user_reg_info *vinfo;
+	char shm_str[64];
+	uint32_t i;
+	int ret;
+
+	for (i = 0; i < reg->reg_num; i++) {
+		vinfo = &reg->reg_info[i];
+
+		switch (i) {
+		case VFIO_PCI_CONFIG_REGION_INDEX:
+			ret = iavf_emu_init_cfg_space(vinfo, dev->numa_node);
+			if (ret)
+				return ret;
+			break;
+		case VFIO_PCI_BAR0_REGION_INDEX:
+		case VFIO_PCI_BAR3_REGION_INDEX:
+			sprintf(shm_str, "AVF%d_BAR%d",
+				dev->edev->dev_id, i);
+			vinfo->fd = shm_open(shm_str,
+				O_RDWR|O_CREAT, 0700);
+			if (vinfo->fd == -1) {
+				EMU_IAVF_LOG(ERR,
+					"Failed to open shm for BAR %d\n", i);
+				goto exit;
+			}
+
+			if (ftruncate(vinfo->fd, vinfo->info->size) == -1) {
+				EMU_IAVF_LOG(ERR,
+					"Failed to ftruncate BAR %d\n", i);
+				ret = -1;
+				goto exit;
+			}
+
+			vinfo->base = mmap(NULL, vinfo->info->size,
+					PROT_READ | PROT_WRITE,
+					MAP_SHARED, vinfo->fd, 0);
+			memset(vinfo->base, 0, vinfo->info->size);
+			if (vinfo->base == MAP_FAILED) {
+				EMU_IAVF_LOG(ERR,
+					"Failed to mmap BAR %d\n", i);
+				ret = -1;
+				goto exit;
+			}
+			vinfo->priv = (void *)dev;
+			if (i == VFIO_PCI_BAR0_REGION_INDEX)
+				vinfo->rw = iavf_emu_bar0_rw;
+			else
+				vinfo->rw = NULL;
+			break;
+		default:
+			vinfo->base = NULL;
+			vinfo->rw = NULL;
+			vinfo->fd = -1;
+			vinfo->priv = NULL;
+			break;
+		}
+	}
+
+	return 0;
+
+exit:
+	for (;; i--) {
+		vinfo = &reg->reg_info[i];
+
+		if (i == VFIO_PCI_CONFIG_REGION_INDEX)
+			iavf_emu_uninit_cfg_space(vinfo);
+
+		if (!vinfo->info->size) {
+			if (!vinfo->base)
+				munmap(vinfo->base, vinfo->info->size);
+			if (vinfo->fd > 0) {
+				close(vinfo->fd);
+				sprintf(shm_str, "AVF%d_BAR%d",
+					dev->edev->dev_id, i);
+				shm_unlink(shm_str);
+				vinfo->fd = -1;
+			}
+		}
+
+		if (i == 0)
+			break;
+	}
+	return ret;
+}
+
+static void
+iavf_emu_free_reg(struct iavf_emudev *dev)
+{
+	struct rte_vfio_user_regions *reg = dev->vfio->reg;
+	struct rte_vfio_user_reg_info *vinfo;
+	char shm_str[64];
+	uint32_t i;
+
+	for (i = 0; i < reg->reg_num; i++) {
+		vinfo = &reg->reg_info[i];
+
+		switch (i) {
+		case VFIO_PCI_CONFIG_REGION_INDEX:
+			iavf_emu_uninit_cfg_space(vinfo);
+			break;
+		case VFIO_PCI_BAR0_REGION_INDEX:
+			/* FALLTHROUGH */
+		case VFIO_PCI_BAR3_REGION_INDEX:
+			munmap(vinfo->base, vinfo->info->size);
+			close(vinfo->fd);
+			vinfo->fd = -1;
+			sprintf(shm_str, "AVF%d_BAR%d",
+				dev->edev->dev_id, i);
+			shm_unlink(shm_str);
+			vinfo->info->size = 0;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
 static inline struct iavf_emu_sock_list *
 iavf_emu_find_sock_list(char *sock_addr)
 {
@@ -397,3 +1003,74 @@  iavf_emu_unregister_vfio_user(struct iavf_emudev *dev)
 
 	return 0;
 }
+
+int
+iavf_emu_init_vfio_user(struct iavf_emudev *dev)
+{
+	int ret;
+	struct iavf_emu_sock_list *list;
+
+	if (iavf_emu_init_dev(dev)) {
+		EMU_IAVF_LOG(ERR, "Emulated iavf dev init failed.\n");
+		ret = -1;
+		goto exit;
+	}
+
+	if (iavf_emu_alloc_reg(dev)) {
+		EMU_IAVF_LOG(ERR, "Emulated iavf alloc region failed.\n");
+		ret = -1;
+		goto err_alloc_reg;
+	}
+
+	ret = rte_vfio_user_set_dev_info(dev->sock_addr, dev->vfio->dev_info);
+	if (ret) {
+		EMU_IAVF_LOG(ERR, "Failed to set vfio dev info\n");
+		goto err_set;
+	}
+
+	ret = rte_vfio_user_set_reg_info(dev->sock_addr, dev->vfio->reg);
+	if (ret) {
+		EMU_IAVF_LOG(ERR, "Failed to set vfio region info\n");
+		goto err_set;
+	}
+
+	ret = rte_vfio_user_set_irq_info(dev->sock_addr, dev->vfio->irq);
+	if (ret) {
+		EMU_IAVF_LOG(ERR, "Failed to set vfio irq info\n");
+		goto err_set;
+	}
+
+	list = rte_zmalloc_socket("list", sizeof(*list), 0, dev->numa_node);
+	list->emu_dev = dev->edev;
+	pthread_mutex_lock(&sock_list_lock);
+	TAILQ_INSERT_TAIL(&sock_list, list, next);
+	pthread_mutex_unlock(&sock_list_lock);
+
+	return 0;
+
+err_set:
+	iavf_emu_free_reg(dev);
+err_alloc_reg:
+	iavf_emu_uninit_dev(dev);
+exit:
+	return ret;
+}
+
+void
+iavf_emu_uninit_vfio_user(struct iavf_emudev *dev)
+{
+	iavf_emu_free_reg(dev);
+	iavf_emu_uninit_dev(dev);
+}
+
+int
+iavf_emu_start_vfio_user(struct iavf_emudev *dev)
+{
+	int ret;
+
+	ret = rte_vfio_user_start(dev->sock_addr);
+	if (ret)
+		EMU_IAVF_LOG(ERR, "Start vfio user failed.\n");
+
+	return ret;
+}
diff --git a/drivers/emu/iavf/iavf_vfio_user.h b/drivers/emu/iavf/iavf_vfio_user.h
index aa2f3edc87..2ccb04eb48 100644
--- a/drivers/emu/iavf/iavf_vfio_user.h
+++ b/drivers/emu/iavf/iavf_vfio_user.h
@@ -5,12 +5,53 @@ 
 #ifndef _IAVF_VFIO_USER_H
 #define _IAVF_VFIO_USER_H
 
+#include <linux/pci_regs.h>
+
 #include <rte_vfio_user.h>
 
 #include "iavf_emu_internal.h"
 
+#define IAVF_EMU_CFG_SPACE_SIZE 0x100
+
+struct iavf_emu_pci_hdr {
+	uint16_t vid;		/* Vendor ID */
+	uint16_t did;		/* Device ID */
+	uint16_t cmd;		/* Command */
+	uint16_t status;	/* Status */
+	uint8_t rid;		/* Revision ID */
+	uint8_t cc_pi;		/* Program I/F in Class Code*/
+	uint8_t cc_sub;		/* Sub-Class Code */
+	uint8_t cc_base;	/* Base Class Code */
+	uint8_t cl_size;	/* Cache Line Size*/
+	uint8_t lt_timer;	/* Latency Timer */
+	uint8_t hdr_type;	/* Header Type */
+	uint8_t bist;		/* BIST */
+	uint32_t bar[6];	/* Base Address Registers */
+	uint32_t ccp;		/* Cardbus CIC Pointer */
+	uint16_t sub_vid;	/* Subsystem Vendor ID */
+	uint16_t sub_sid;	/* Subsystem ID */
+	uint32_t rom;		/* Expansion ROM Base Address */
+	uint8_t cap;		/* Capabilities Pointer */
+	uint8_t rsvd[7];	/* Reserved */
+	uint8_t intrl;		/* Interrupt Line */
+	uint8_t intrp;		/* Interrupt Pin */
+	uint8_t min_gnt;	/* Min_Gnt Register */
+	uint8_t max_lat;	/* Max_Lat Register */
+} __attribute((packed));
+
+struct iavf_emu_cfg_space {
+	struct iavf_emu_pci_hdr hdr;
+	uint8_t cfg_non_std[IAVF_EMU_CFG_SPACE_SIZE - PCI_STD_HEADER_SIZEOF];
+} __attribute((packed));
+
 int iavf_emu_register_vfio_user(struct iavf_emudev *dev);
 
 int iavf_emu_unregister_vfio_user(struct iavf_emudev *dev);
 
+int iavf_emu_init_vfio_user(struct iavf_emudev *dev);
+
+void iavf_emu_uninit_vfio_user(struct iavf_emudev *dev);
+
+int iavf_emu_start_vfio_user(struct iavf_emudev *dev);
+
 #endif
diff --git a/drivers/emu/iavf/meson.build b/drivers/emu/iavf/meson.build
index 4f651258c2..3cab2226b7 100644
--- a/drivers/emu/iavf/meson.build
+++ b/drivers/emu/iavf/meson.build
@@ -1,6 +1,14 @@ 
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2020 Intel Corporation
 
+librt = cc.find_library('rt', required: false)
+if not librt.found()
+	build = false
+	subdir_done()
+endif
+
+ext_deps += librt
+
 sources = files('iavf_emu.c', 'iavf_vfio_user.c',
 	'iavf_emudev.c')