@@ -10,6 +10,7 @@
#include <sys/un.h>
#include <sys/types.h>
#include <sys/ioctl.h>
+#include <sys/syscall.h>
#ifdef RTE_LIBRTE_VHOST_POSTCOPY
#include <linux/userfaultfd.h>
#endif
@@ -39,6 +40,9 @@ struct vhost_user_connection {
int slave_req_fd;
rte_spinlock_t slave_req_lock;
+ int postcopy_ufd;
+ int postcopy_listening;
+
TAILQ_ENTRY(vhost_user_connection) next;
};
@@ -261,6 +265,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
conn->slave_req_fd = -1;
conn->vsocket = vsocket;
rte_spinlock_init(&conn->slave_req_lock);
+ conn->postcopy_ufd = -1;
size = strnlen(vsocket->path, PATH_MAX);
vhost_set_ifname(dev->vid, vsocket->path, size);
@@ -772,6 +777,13 @@ af_unix_cleanup_device(struct virtio_net *dev, int destroy __rte_unused)
close(conn->slave_req_fd);
conn->slave_req_fd = -1;
}
+
+ if (conn->postcopy_ufd >= 0) {
+ close(conn->postcopy_ufd);
+ conn->postcopy_ufd = -1;
+ }
+
+ conn->postcopy_listening = 0;
}
static int
@@ -866,7 +878,7 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
alignment,
mmap_offset);
- if (dev->postcopy_listening) {
+ if (conn->postcopy_listening) {
/*
* We haven't a better way right now than sharing
* DPDK's virtual address with Qemu, so that Qemu can
@@ -877,7 +889,7 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
}
}
- if (dev->postcopy_listening) {
+ if (conn->postcopy_listening) {
/* Send the addresses back to qemu */
msg->fd_num = 0;
/* Send reply */
@@ -918,11 +930,11 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
reg_struct.range.len = reg->mmap_size;
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
+ if (ioctl(conn->postcopy_ufd, UFFDIO_REGISTER,
®_struct)) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to register ufd for region %d: (ufd = %d) %s\n",
- i, dev->postcopy_ufd,
+ i, conn->postcopy_ufd,
strerror(errno));
return -1;
}
@@ -990,6 +1002,77 @@ af_unix_set_log_base(struct virtio_net *dev, const struct VhostUserMsg *msg)
return 0;
}
+static int
+af_unix_set_postcopy_advise(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+ struct vhost_user_connection *conn =
+ container_of(dev, struct vhost_user_connection, device);
+#ifdef RTE_LIBRTE_VHOST_POSTCOPY
+ struct uffdio_api api_struct;
+
+ conn->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+
+ if (conn->postcopy_ufd == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
+ strerror(errno));
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ api_struct.api = UFFD_API;
+ api_struct.features = 0;
+ if (ioctl(conn->postcopy_ufd, UFFDIO_API, &api_struct)) {
+ RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
+ strerror(errno));
+ close(conn->postcopy_ufd);
+ conn->postcopy_ufd = -1;
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ msg->fds[0] = conn->postcopy_ufd;
+ msg->fd_num = 1;
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+#else
+ conn->postcopy_ufd = -1;
+ msg->fd_num = 0;
+
+ return RTE_VHOST_MSG_RESULT_ERR;
+#endif
+}
+
+static int
+af_unix_set_postcopy_listen(struct virtio_net *dev)
+{
+ struct vhost_user_connection *conn =
+ container_of(dev, struct vhost_user_connection, device);
+
+ if (dev->mem && dev->mem->nregions) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Regions already registered at postcopy-listen\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ conn->postcopy_listening = 1;
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+af_unix_set_postcopy_end(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+ struct vhost_user_connection *conn =
+ container_of(dev, struct vhost_user_connection, device);
+
+ conn->postcopy_listening = 0;
+ if (conn->postcopy_ufd >= 0) {
+ close(conn->postcopy_ufd);
+ conn->postcopy_ufd = -1;
+ }
+
+ msg->payload.u64 = 0;
+ msg->size = sizeof(msg->payload.u64);
+ msg->fd_num = 0;
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
const struct vhost_transport_ops af_unix_trans_ops = {
.socket_size = sizeof(struct af_unix_socket),
.device_size = sizeof(struct vhost_user_connection),
@@ -1005,4 +1088,7 @@ const struct vhost_transport_ops af_unix_trans_ops = {
.map_mem_regions = af_unix_map_mem_regions,
.unmap_mem_regions = af_unix_unmap_mem_regions,
.set_log_base = af_unix_set_log_base,
+ .set_postcopy_advise = af_unix_set_postcopy_advise,
+ .set_postcopy_listen = af_unix_set_postcopy_listen,
+ .set_postcopy_end = af_unix_set_postcopy_end,
};
@@ -512,7 +512,6 @@ vhost_new_device(const struct vhost_transport_ops *trans_ops)
dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
dev->trans_ops = trans_ops;
dev->vdpa_dev_id = -1;
- dev->postcopy_ufd = -1;
return dev;
}
@@ -450,6 +450,44 @@ struct vhost_transport_ops {
*/
int (*set_log_base)(struct virtio_net *dev,
const struct VhostUserMsg *msg);
+
+ /**
+ * Register a userfault fd and send it to master.
+ *
+ * @param dev
+ * vhost device
+ * @param msg
+ * message
+ * @return
+ * RTE_VHOST_MSG_RESULT_REPLY on success,
+ * RTE_VHOST_MSG_RESULT_ERR on failure
+ */
+ int (*set_postcopy_advise)(struct virtio_net *dev,
+ struct VhostUserMsg *msg);
+
+ /**
+ * Change live migration mode (entering postcopy mode).
+ *
+ * @param dev
+ * vhost device
+ * @return
+ * RTE_VHOST_MSG_RESULT_OK on success,
+ * RTE_VHOST_MSG_RESULT_ERR on failure
+ */
+ int (*set_postcopy_listen)(struct virtio_net *dev);
+
+ /**
+ * Register completion of postcopy live migration.
+ *
+ * @param dev
+ * vhost device
+ * @param msg
+ * message
+ * @return
+ * RTE_VHOST_MSG_RESULT_REPLY
+ */
+ int (*set_postcopy_end)(struct virtio_net *dev,
+ struct VhostUserMsg *msg);
};
/** The traditional AF_UNIX vhost-user protocol transport. */
@@ -492,9 +530,6 @@ struct virtio_net {
uint32_t max_guest_pages;
struct guest_page *guest_pages;
- int postcopy_ufd;
- int postcopy_listening;
-
/*
* Device id to identify a specific backend device.
* It's set to -1 for the default software implementation.
@@ -29,14 +29,10 @@
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <sys/syscall.h>
#include <assert.h>
#ifdef RTE_LIBRTE_VHOST_NUMA
#include <numaif.h>
#endif
-#ifdef RTE_LIBRTE_VHOST_POSTCOPY
-#include <linux/userfaultfd.h>
-#endif
#include <rte_common.h>
#include <rte_malloc.h>
@@ -136,13 +132,6 @@ vhost_backend_cleanup(struct virtio_net *dev)
free(dev->guest_pages);
dev->guest_pages = NULL;
-
- if (dev->postcopy_ufd >= 0) {
- close(dev->postcopy_ufd);
- dev->postcopy_ufd = -1;
- }
-
- dev->postcopy_listening = 0;
}
/*
@@ -1471,35 +1460,8 @@ vhost_user_set_postcopy_advise(struct virtio_net **pdev,
struct VhostUserMsg *msg)
{
struct virtio_net *dev = *pdev;
-#ifdef RTE_LIBRTE_VHOST_POSTCOPY
- struct uffdio_api api_struct;
-
- dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
-
- if (dev->postcopy_ufd == -1) {
- RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
- strerror(errno));
- return RTE_VHOST_MSG_RESULT_ERR;
- }
- api_struct.api = UFFD_API;
- api_struct.features = 0;
- if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
- RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
- strerror(errno));
- close(dev->postcopy_ufd);
- dev->postcopy_ufd = -1;
- return RTE_VHOST_MSG_RESULT_ERR;
- }
- msg->fds[0] = dev->postcopy_ufd;
- msg->fd_num = 1;
-
- return RTE_VHOST_MSG_RESULT_REPLY;
-#else
- dev->postcopy_ufd = -1;
- msg->fd_num = 0;
- return RTE_VHOST_MSG_RESULT_ERR;
-#endif
+ return dev->trans_ops->set_postcopy_advise(dev, msg);
}
static int
@@ -1508,14 +1470,7 @@ vhost_user_set_postcopy_listen(struct virtio_net **pdev,
{
struct virtio_net *dev = *pdev;
- if (dev->mem && dev->mem->nregions) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Regions already registered at postcopy-listen\n");
- return RTE_VHOST_MSG_RESULT_ERR;
- }
- dev->postcopy_listening = 1;
-
- return RTE_VHOST_MSG_RESULT_OK;
+ return dev->trans_ops->set_postcopy_listen(dev);
}
static int
@@ -1523,17 +1478,7 @@ vhost_user_postcopy_end(struct virtio_net **pdev, struct VhostUserMsg *msg)
{
struct virtio_net *dev = *pdev;
- dev->postcopy_listening = 0;
- if (dev->postcopy_ufd >= 0) {
- close(dev->postcopy_ufd);
- dev->postcopy_ufd = -1;
- }
-
- msg->payload.u64 = 0;
- msg->size = sizeof(msg->payload.u64);
- msg->fd_num = 0;
-
- return RTE_VHOST_MSG_RESULT_REPLY;
+ return dev->trans_ops->set_postcopy_end(dev, msg);
}
typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,