[1/9] lib: introduce vfio-user library

Message ID 20201218073851.93609-2-chenbo.xia@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Introduce vfio-user library |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Chenbo Xia Dec. 18, 2020, 7:38 a.m. UTC
  This patch introduces vfio-user library, which follows vfio-user
protocol v1.0. As vfio-user has server and client implementaion,
this patch introduces basic structures and internal functions that
will be used by both server and client.

Signed-off-by: Chenbo Xia <chenbo.xia@intel.com>
Signed-off-by: Xiuchun Lu <xiuchun.lu@intel.com>
---
 MAINTAINERS                           |   4 +
 lib/librte_vfio_user/meson.build      |   9 ++
 lib/librte_vfio_user/version.map      |   3 +
 lib/librte_vfio_user/vfio_user_base.c | 205 ++++++++++++++++++++++++++
 lib/librte_vfio_user/vfio_user_base.h |  65 ++++++++
 lib/meson.build                       |   1 +
 6 files changed, 287 insertions(+)
 create mode 100644 lib/librte_vfio_user/meson.build
 create mode 100644 lib/librte_vfio_user/version.map
 create mode 100644 lib/librte_vfio_user/vfio_user_base.c
 create mode 100644 lib/librte_vfio_user/vfio_user_base.h
  

Comments

Stephen Hemminger Dec. 18, 2020, 5:13 p.m. UTC | #1
On Fri, 18 Dec 2020 15:38:43 +0800
Chenbo Xia <chenbo.xia@intel.com> wrote:

> +inline void vfio_user_close_msg_fds(VFIO_USER_MSG *msg)
> +{
> +	int i;
> +
> +	for (i = 0; i < msg->fd_num; i++)
> +		close(msg->fds[i]);
> +}
> +

Please don't use non-static inlines.
  
Stephen Hemminger Dec. 18, 2020, 5:17 p.m. UTC | #2
On Fri, 18 Dec 2020 15:38:43 +0800
Chenbo Xia <chenbo.xia@intel.com> wrote:

> +typedef struct vfio_user_msg {
> +	uint16_t msg_id;
> +	uint16_t cmd;
> +	uint32_t size;
> +#define VFIO_USER_TYPE_CMD	(0x0)		/* Message type is COMMAND */
> +#define VFIO_USER_TYPE_REPLY	(0x1 << 0)	/* Message type is REPLY */
> +#define VFIO_USER_NEED_NO_RP	(0x1 << 4)	/* Message needs no reply */
> +#define VFIO_USER_ERROR		(0x1 << 5)	/* Reply message has error */
> +	uint32_t flags;
> +	uint32_t err;				/* Valid in reply, optional */
> +	union {
> +		struct vfio_user_version ver;
> +	} payload;
> +	int fds[VFIO_USER_MAX_FD];
> +	int fd_num;
> +} __attribute((packed)) VFIO_USER_MSG;

Please don't introduce all capitals typedefs.
Don't use packed,  it generates slower code. Better to use tools
like pahole and make the layout of the structure naturally packed.
Also, if you put fds[] at the end you could use dynamic sized array
and not be constrained by VFIO_USER_MAX_FD.


Since this is DPDK library the symbols should all start with rte_
  
Chenbo Xia Dec. 19, 2020, 6:12 a.m. UTC | #3
Hi Stephen,

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Saturday, December 19, 2020 1:14 AM
> To: Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; thomas@monjalon.net; david.marchand@redhat.com; Liang,
> Cunming <cunming.liang@intel.com>; Lu, Xiuchun <xiuchun.lu@intel.com>; Li,
> Miao <miao.li@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
> Subject: Re: [PATCH 1/9] lib: introduce vfio-user library
> 
> On Fri, 18 Dec 2020 15:38:43 +0800
> Chenbo Xia <chenbo.xia@intel.com> wrote:
> 
> > +inline void vfio_user_close_msg_fds(VFIO_USER_MSG *msg)
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < msg->fd_num; i++)
> > +		close(msg->fds[i]);
> > +}
> > +
> 
> Please don't use non-static inlines.

Got it. Will fix in v2.

Thanks!
Chenbo
  
Chenbo Xia Dec. 19, 2020, 6:25 a.m. UTC | #4
Hi Stephen,

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Saturday, December 19, 2020 1:18 AM
> To: Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; thomas@monjalon.net; david.marchand@redhat.com; Liang,
> Cunming <cunming.liang@intel.com>; Lu, Xiuchun <xiuchun.lu@intel.com>; Li,
> Miao <miao.li@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
> Subject: Re: [PATCH 1/9] lib: introduce vfio-user library
> 
> On Fri, 18 Dec 2020 15:38:43 +0800
> Chenbo Xia <chenbo.xia@intel.com> wrote:
> 
> > +typedef struct vfio_user_msg {
> > +	uint16_t msg_id;
> > +	uint16_t cmd;
> > +	uint32_t size;
> > +#define VFIO_USER_TYPE_CMD	(0x0)		/* Message type is COMMAND */
> > +#define VFIO_USER_TYPE_REPLY	(0x1 << 0)	/* Message type is REPLY
> */
> > +#define VFIO_USER_NEED_NO_RP	(0x1 << 4)	/* Message needs no reply
> */
> > +#define VFIO_USER_ERROR		(0x1 << 5)	/* Reply message has error
> */
> > +	uint32_t flags;
> > +	uint32_t err;				/* Valid in reply, optional */
> > +	union {
> > +		struct vfio_user_version ver;
> > +	} payload;
> > +	int fds[VFIO_USER_MAX_FD];
> > +	int fd_num;
> > +} __attribute((packed)) VFIO_USER_MSG;
> 
> Please don't introduce all capitals typedefs.

OK. Will fix in v2.

> Don't use packed,  it generates slower code. Better to use tools
> like pahole and make the layout of the structure naturally packed.

Thanks for the heads up 😊. Will check pahole then.

> Also, if you put fds[] at the end you could use dynamic sized array
> and not be constrained by VFIO_USER_MAX_FD.

I put this constraint just because one vfio-user message in theory will not
send so many fds. And since I am just planning to optimize the message structure,
I will consider this to make it more flexible and reasonable.

> 
> 
> Since this is DPDK library the symbols should all start with rte_

I think structs in this file are not exposed. Do we add rte_ in this case?
If yes, I will fix it in v2 then.

Thanks for all the good catches!
Chenbo
  

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index eafe9f8c46..5fb4880758 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1540,6 +1540,10 @@  M: Nithin Dabilpuram <ndabilpuram@marvell.com>
 M: Pavan Nikhilesh <pbhagavatula@marvell.com>
 F: lib/librte_node/
 
+Vfio-user - EXPERIMENTAL
+M: Chenbo Xia <chenbo.xia@intel.com>
+M: Xiuchun Lu <xiuchun.lu@intel.com>
+F: lib/librte_vfio_user/
 
 Test Applications
 -----------------
diff --git a/lib/librte_vfio_user/meson.build b/lib/librte_vfio_user/meson.build
new file mode 100644
index 0000000000..0f6407b80f
--- /dev/null
+++ b/lib/librte_vfio_user/meson.build
@@ -0,0 +1,9 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Intel Corporation
+
+if not is_linux
+	build = false
+	reason = 'only supported on Linux'
+endif
+
+sources = files('vfio_user_base.c')
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
new file mode 100644
index 0000000000..33c1b976f1
--- /dev/null
+++ b/lib/librte_vfio_user/version.map
@@ -0,0 +1,3 @@ 
+EXPERIMENTAL {
+	local: *;
+};
diff --git a/lib/librte_vfio_user/vfio_user_base.c b/lib/librte_vfio_user/vfio_user_base.c
new file mode 100644
index 0000000000..bbad553e0a
--- /dev/null
+++ b/lib/librte_vfio_user/vfio_user_base.c
@@ -0,0 +1,205 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <sys/socket.h>
+#include <string.h>
+
+#include "vfio_user_base.h"
+
+int vfio_user_log_level;
+
+const char *vfio_user_msg_str[VFIO_USER_MAX] = {
+	[VFIO_USER_NONE] = "VFIO_USER_NONE",
+	[VFIO_USER_VERSION] = "VFIO_USER_VERSION",
+};
+
+inline void vfio_user_close_msg_fds(VFIO_USER_MSG *msg)
+{
+	int i;
+
+	for (i = 0; i < msg->fd_num; i++)
+		close(msg->fds[i]);
+}
+
+int vfio_user_check_msg_fdnum(VFIO_USER_MSG *msg, int expected_fds)
+{
+	if (msg->fd_num == expected_fds)
+		return 0;
+
+	VFIO_USER_LOG(ERR, "Expect %d FDs for request %s, received %d\n",
+		expected_fds, vfio_user_msg_str[msg->cmd], msg->fd_num);
+
+	vfio_user_close_msg_fds(msg);
+
+	return -1;
+}
+
+static int vfio_user_recv_fd_msg(int sockfd, char *buf, int buflen, int *fds,
+	int max_fds, int *fd_num)
+{
+	struct iovec iov;
+	struct msghdr msgh;
+	char control[CMSG_SPACE(max_fds * sizeof(int))];
+	struct cmsghdr *cmsg;
+	int fd_sz, got_fds = 0;
+	int ret, i;
+
+	*fd_num = 0;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = buf;
+	iov.iov_len  = buflen;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+	msgh.msg_control = control;
+	msgh.msg_controllen = sizeof(control);
+
+	ret = recvmsg(sockfd, &msgh, 0);
+	if (ret <= 0) {
+		if (ret)
+			VFIO_USER_LOG(DEBUG, "recvmsg failed\n");
+		return ret;
+	}
+
+	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+		VFIO_USER_LOG(ERR, "Message is truncated\n");
+		return -1;
+	}
+
+	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+		if ((cmsg->cmsg_level == SOL_SOCKET) &&
+			(cmsg->cmsg_type == SCM_RIGHTS)) {
+			fd_sz = cmsg->cmsg_len - CMSG_LEN(0);
+			got_fds = fd_sz / sizeof(int);
+			if (got_fds >= max_fds) {
+				/* Invalid message, close fds */
+				int *close_fd = (int *)CMSG_DATA(cmsg);
+				for (i = 0; i < got_fds; i++) {
+					close_fd += i;
+					close(*close_fd);
+				}
+				VFIO_USER_LOG(ERR, "fd num exceeds max "
+					"in vfio-user msg\n");
+				return -1;
+			}
+			*fd_num = got_fds;
+			memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
+			break;
+		}
+	}
+
+	/* Make unused file descriptors invalid */
+	while (got_fds < max_fds)
+		fds[got_fds++] = -1;
+
+	return ret;
+}
+
+int vfio_user_recv_msg(int sockfd, VFIO_USER_MSG *msg)
+{
+	int ret;
+
+	ret = vfio_user_recv_fd_msg(sockfd, (char *)msg, VFIO_USER_MSG_HDR_SIZE,
+		msg->fds, VFIO_USER_MAX_FD, &msg->fd_num);
+	if (ret <= 0) {
+		return ret;
+	} else if (ret != VFIO_USER_MSG_HDR_SIZE) {
+		VFIO_USER_LOG(ERR, "Read unexpected header size\n");
+		ret = -1;
+		goto err;
+	}
+
+	if (msg->size > VFIO_USER_MSG_HDR_SIZE) {
+		if (msg->size > (sizeof(msg->payload) +
+			VFIO_USER_MSG_HDR_SIZE)) {
+			VFIO_USER_LOG(ERR, "Read invalid msg size: %d\n",
+				msg->size);
+			ret = -1;
+			goto err;
+		}
+
+		ret = read(sockfd, &msg->payload,
+			msg->size - VFIO_USER_MSG_HDR_SIZE);
+		if (ret <= 0)
+			goto err;
+		if (ret != (int)(msg->size - VFIO_USER_MSG_HDR_SIZE)) {
+			VFIO_USER_LOG(ERR, "Read payload failed\n");
+			ret = -1;
+			goto err;
+		}
+	}
+
+	return ret;
+err:
+	vfio_user_close_msg_fds(msg);
+	return ret;
+}
+
+static int
+vfio_user_send_fd_msg(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = buf;
+	iov.iov_len = buflen;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (fds && fd_num > 0) {
+		msgh.msg_control = control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), fds, fdsize);
+	} else {
+		msgh.msg_control = NULL;
+		msgh.msg_controllen = 0;
+	}
+
+	do {
+		ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		VFIO_USER_LOG(ERR, "sendmsg error\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+int vfio_user_send_msg(int sockfd, VFIO_USER_MSG *msg)
+{
+	if (!msg)
+		return 0;
+
+	return vfio_user_send_fd_msg(sockfd, (char *)msg,
+		msg->size, msg->fds, msg->fd_num);
+}
+
+int vfio_user_reply_msg(int sockfd, VFIO_USER_MSG *msg)
+{
+	if (!msg)
+		return 0;
+
+	msg->flags |= VFIO_USER_NEED_NO_RP;
+	msg->flags |= VFIO_USER_TYPE_REPLY;
+
+	return vfio_user_send_msg(sockfd, msg);
+}
+
+RTE_LOG_REGISTER(vfio_user_log_level, lib.vfio, INFO);
diff --git a/lib/librte_vfio_user/vfio_user_base.h b/lib/librte_vfio_user/vfio_user_base.h
new file mode 100644
index 0000000000..6db45b1819
--- /dev/null
+++ b/lib/librte_vfio_user/vfio_user_base.h
@@ -0,0 +1,65 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _VFIO_USER_BASE_H
+#define _VFIO_USER_BASE_H
+
+#include <rte_log.h>
+
+#define VFIO_USER_MAX_FD 1024
+#define VFIO_USER_MAX_VERSION_DATA 512
+
+extern int vfio_user_log_level;
+extern const char *vfio_user_msg_str[];
+
+#define VFIO_USER_LOG(level, fmt, args...)			\
+	rte_log(RTE_LOG_ ## level, vfio_user_log_level,		\
+	"VFIO_USER: " fmt, ## args)
+
+struct vfio_user_socket {
+	char *sock_addr;
+	int sock_fd;
+	int dev_id;
+};
+
+typedef enum VFIO_USER_CMD_TYPE {
+	VFIO_USER_NONE = 0,
+	VFIO_USER_VERSION = 1,
+	VFIO_USER_MAX = 2,
+} VFIO_USER_CMD_TYPE;
+
+struct vfio_user_version {
+	uint16_t major;
+	uint16_t minor;
+	/* Version data (JSON), for now not supported */
+	uint8_t ver_data[VFIO_USER_MAX_VERSION_DATA];
+};
+
+typedef struct vfio_user_msg {
+	uint16_t msg_id;
+	uint16_t cmd;
+	uint32_t size;
+#define VFIO_USER_TYPE_CMD	(0x0)		/* Message type is COMMAND */
+#define VFIO_USER_TYPE_REPLY	(0x1 << 0)	/* Message type is REPLY */
+#define VFIO_USER_NEED_NO_RP	(0x1 << 4)	/* Message needs no reply */
+#define VFIO_USER_ERROR		(0x1 << 5)	/* Reply message has error */
+	uint32_t flags;
+	uint32_t err;				/* Valid in reply, optional */
+	union {
+		struct vfio_user_version ver;
+	} payload;
+	int fds[VFIO_USER_MAX_FD];
+	int fd_num;
+} __attribute((packed)) VFIO_USER_MSG;
+
+#define VFIO_USER_MSG_HDR_SIZE offsetof(VFIO_USER_MSG, payload.ver)
+
+void vfio_user_close_msg_fds(VFIO_USER_MSG *msg);
+int vfio_user_check_msg_fdnum(VFIO_USER_MSG *msg, int expected_fds);
+void vfio_user_close_msg_fds(VFIO_USER_MSG *msg);
+int vfio_user_recv_msg(int sockfd, VFIO_USER_MSG *msg);
+int vfio_user_send_msg(int sockfd, VFIO_USER_MSG *msg);
+int vfio_user_reply_msg(int sockfd, VFIO_USER_MSG *msg);
+
+#endif
diff --git a/lib/meson.build b/lib/meson.build
index ed00f89146..b7fbfcc95b 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -28,6 +28,7 @@  libraries = [
 	'rib', 'reorder', 'sched', 'security', 'stack', 'vhost',
 	# ipsec lib depends on net, crypto and security
 	'ipsec',
+	'vfio_user',
 	#fib lib depends on rib
 	'fib',
 	# add pkt framework libs which use other libs from above