Hi Stephen,
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Saturday, December 19, 2020 1:18 AM
> To: Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; thomas@monjalon.net; david.marchand@redhat.com; Liang,
> Cunming <cunming.liang@intel.com>; Lu, Xiuchun <xiuchun.lu@intel.com>; Li,
> Miao <miao.li@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
> Subject: Re: [PATCH 1/9] lib: introduce vfio-user library
>
> On Fri, 18 Dec 2020 15:38:43 +0800
> Chenbo Xia <chenbo.xia@intel.com> wrote:
>
> > +typedef struct vfio_user_msg {
> > + uint16_t msg_id;
> > + uint16_t cmd;
> > + uint32_t size;
> > +#define VFIO_USER_TYPE_CMD (0x0) /* Message type is COMMAND */
> > +#define VFIO_USER_TYPE_REPLY (0x1 << 0) /* Message type is REPLY
> */
> > +#define VFIO_USER_NEED_NO_RP (0x1 << 4) /* Message needs no reply
> */
> > +#define VFIO_USER_ERROR (0x1 << 5) /* Reply message has error
> */
> > + uint32_t flags;
> > + uint32_t err; /* Valid in reply, optional */
> > + union {
> > + struct vfio_user_version ver;
> > + } payload;
> > + int fds[VFIO_USER_MAX_FD];
> > + int fd_num;
> > +} __attribute((packed)) VFIO_USER_MSG;
>
> Please don't introduce all capitals typedefs.
OK. Will fix in v2.
> Don't use packed, it generates slower code. Better to use tools
> like pahole and make the layout of the structure naturally packed.
Thanks for the heads up 😊. Will check pahole then.
> Also, if you put fds[] at the end you could use dynamic sized array
> and not be constrained by VFIO_USER_MAX_FD.
I put this constraint just because one vfio-user message in theory will not
send so many fds. And since I am just planning to optimize the message structure,
I will consider this to make it more flexible and reasonable.
>
>
> Since this is DPDK library the symbols should all start with rte_
I think structs in this file are not exposed. Do we add rte_ in this case?
If yes, I will fix it in v2 then.
Thanks for all the good catches!
Chenbo
@@ -1540,6 +1540,10 @@ M: Nithin Dabilpuram <ndabilpuram@marvell.com>
M: Pavan Nikhilesh <pbhagavatula@marvell.com>
F: lib/librte_node/
+Vfio-user - EXPERIMENTAL
+M: Chenbo Xia <chenbo.xia@intel.com>
+M: Xiuchun Lu <xiuchun.lu@intel.com>
+F: lib/librte_vfio_user/
Test Applications
-----------------
new file mode 100644
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Intel Corporation
+
+if not is_linux
+ build = false
+ reason = 'only supported on Linux'
+endif
+
+sources = files('vfio_user_base.c')
new file mode 100644
@@ -0,0 +1,3 @@
+EXPERIMENTAL {
+ local: *;
+};
new file mode 100644
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <sys/socket.h>
+#include <string.h>
+
+#include "vfio_user_base.h"
+
+int vfio_user_log_level;
+
+const char *vfio_user_msg_str[VFIO_USER_MAX] = {
+ [VFIO_USER_NONE] = "VFIO_USER_NONE",
+ [VFIO_USER_VERSION] = "VFIO_USER_VERSION",
+};
+
+inline void vfio_user_close_msg_fds(VFIO_USER_MSG *msg)
+{
+ int i;
+
+ for (i = 0; i < msg->fd_num; i++)
+ close(msg->fds[i]);
+}
+
+int vfio_user_check_msg_fdnum(VFIO_USER_MSG *msg, int expected_fds)
+{
+ if (msg->fd_num == expected_fds)
+ return 0;
+
+ VFIO_USER_LOG(ERR, "Expect %d FDs for request %s, received %d\n",
+ expected_fds, vfio_user_msg_str[msg->cmd], msg->fd_num);
+
+ vfio_user_close_msg_fds(msg);
+
+ return -1;
+}
+
+static int vfio_user_recv_fd_msg(int sockfd, char *buf, int buflen, int *fds,
+ int max_fds, int *fd_num)
+{
+ struct iovec iov;
+ struct msghdr msgh;
+ char control[CMSG_SPACE(max_fds * sizeof(int))];
+ struct cmsghdr *cmsg;
+ int fd_sz, got_fds = 0;
+ int ret, i;
+
+ *fd_num = 0;
+
+ memset(&msgh, 0, sizeof(msgh));
+ iov.iov_base = buf;
+ iov.iov_len = buflen;
+
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ ret = recvmsg(sockfd, &msgh, 0);
+ if (ret <= 0) {
+ if (ret)
+ VFIO_USER_LOG(DEBUG, "recvmsg failed\n");
+ return ret;
+ }
+
+ if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ VFIO_USER_LOG(ERR, "Message is truncated\n");
+ return -1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+ if ((cmsg->cmsg_level == SOL_SOCKET) &&
+ (cmsg->cmsg_type == SCM_RIGHTS)) {
+ fd_sz = cmsg->cmsg_len - CMSG_LEN(0);
+ got_fds = fd_sz / sizeof(int);
+ if (got_fds >= max_fds) {
+ /* Invalid message, close fds */
+ int *close_fd = (int *)CMSG_DATA(cmsg);
+ for (i = 0; i < got_fds; i++) {
+ close_fd += i;
+ close(*close_fd);
+ }
+ VFIO_USER_LOG(ERR, "fd num exceeds max "
+ "in vfio-user msg\n");
+ return -1;
+ }
+ *fd_num = got_fds;
+ memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
+ break;
+ }
+ }
+
+ /* Make unused file descriptors invalid */
+ while (got_fds < max_fds)
+ fds[got_fds++] = -1;
+
+ return ret;
+}
+
+int vfio_user_recv_msg(int sockfd, VFIO_USER_MSG *msg)
+{
+ int ret;
+
+ ret = vfio_user_recv_fd_msg(sockfd, (char *)msg, VFIO_USER_MSG_HDR_SIZE,
+ msg->fds, VFIO_USER_MAX_FD, &msg->fd_num);
+ if (ret <= 0) {
+ return ret;
+ } else if (ret != VFIO_USER_MSG_HDR_SIZE) {
+ VFIO_USER_LOG(ERR, "Read unexpected header size\n");
+ ret = -1;
+ goto err;
+ }
+
+ if (msg->size > VFIO_USER_MSG_HDR_SIZE) {
+ if (msg->size > (sizeof(msg->payload) +
+ VFIO_USER_MSG_HDR_SIZE)) {
+ VFIO_USER_LOG(ERR, "Read invalid msg size: %d\n",
+ msg->size);
+ ret = -1;
+ goto err;
+ }
+
+ ret = read(sockfd, &msg->payload,
+ msg->size - VFIO_USER_MSG_HDR_SIZE);
+ if (ret <= 0)
+ goto err;
+ if (ret != (int)(msg->size - VFIO_USER_MSG_HDR_SIZE)) {
+ VFIO_USER_LOG(ERR, "Read payload failed\n");
+ ret = -1;
+ goto err;
+ }
+ }
+
+ return ret;
+err:
+ vfio_user_close_msg_fds(msg);
+ return ret;
+}
+
+static int
+vfio_user_send_fd_msg(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+ struct iovec iov;
+ struct msghdr msgh;
+ size_t fdsize = fd_num * sizeof(int);
+ char control[CMSG_SPACE(fdsize)];
+ struct cmsghdr *cmsg;
+ int ret;
+
+ memset(&msgh, 0, sizeof(msgh));
+ iov.iov_base = buf;
+ iov.iov_len = buflen;
+
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+
+ if (fds && fd_num > 0) {
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_len = CMSG_LEN(fdsize);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), fds, fdsize);
+ } else {
+ msgh.msg_control = NULL;
+ msgh.msg_controllen = 0;
+ }
+
+ do {
+ ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0) {
+ VFIO_USER_LOG(ERR, "sendmsg error\n");
+ return ret;
+ }
+
+ return ret;
+}
+
+int vfio_user_send_msg(int sockfd, VFIO_USER_MSG *msg)
+{
+ if (!msg)
+ return 0;
+
+ return vfio_user_send_fd_msg(sockfd, (char *)msg,
+ msg->size, msg->fds, msg->fd_num);
+}
+
+int vfio_user_reply_msg(int sockfd, VFIO_USER_MSG *msg)
+{
+ if (!msg)
+ return 0;
+
+ msg->flags |= VFIO_USER_NEED_NO_RP;
+ msg->flags |= VFIO_USER_TYPE_REPLY;
+
+ return vfio_user_send_msg(sockfd, msg);
+}
+
+RTE_LOG_REGISTER(vfio_user_log_level, lib.vfio, INFO);
new file mode 100644
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _VFIO_USER_BASE_H
+#define _VFIO_USER_BASE_H
+
+#include <rte_log.h>
+
+#define VFIO_USER_MAX_FD 1024
+#define VFIO_USER_MAX_VERSION_DATA 512
+
+extern int vfio_user_log_level;
+extern const char *vfio_user_msg_str[];
+
+#define VFIO_USER_LOG(level, fmt, args...) \
+ rte_log(RTE_LOG_ ## level, vfio_user_log_level, \
+ "VFIO_USER: " fmt, ## args)
+
+struct vfio_user_socket {
+ char *sock_addr;
+ int sock_fd;
+ int dev_id;
+};
+
+typedef enum VFIO_USER_CMD_TYPE {
+ VFIO_USER_NONE = 0,
+ VFIO_USER_VERSION = 1,
+ VFIO_USER_MAX = 2,
+} VFIO_USER_CMD_TYPE;
+
+struct vfio_user_version {
+ uint16_t major;
+ uint16_t minor;
+ /* Version data (JSON), for now not supported */
+ uint8_t ver_data[VFIO_USER_MAX_VERSION_DATA];
+};
+
+typedef struct vfio_user_msg {
+ uint16_t msg_id;
+ uint16_t cmd;
+ uint32_t size;
+#define VFIO_USER_TYPE_CMD (0x0) /* Message type is COMMAND */
+#define VFIO_USER_TYPE_REPLY (0x1 << 0) /* Message type is REPLY */
+#define VFIO_USER_NEED_NO_RP (0x1 << 4) /* Message needs no reply */
+#define VFIO_USER_ERROR (0x1 << 5) /* Reply message has error */
+ uint32_t flags;
+ uint32_t err; /* Valid in reply, optional */
+ union {
+ struct vfio_user_version ver;
+ } payload;
+ int fds[VFIO_USER_MAX_FD];
+ int fd_num;
+} __attribute((packed)) VFIO_USER_MSG;
+
+#define VFIO_USER_MSG_HDR_SIZE offsetof(VFIO_USER_MSG, payload.ver)
+
+void vfio_user_close_msg_fds(VFIO_USER_MSG *msg);
+int vfio_user_check_msg_fdnum(VFIO_USER_MSG *msg, int expected_fds);
+void vfio_user_close_msg_fds(VFIO_USER_MSG *msg);
+int vfio_user_recv_msg(int sockfd, VFIO_USER_MSG *msg);
+int vfio_user_send_msg(int sockfd, VFIO_USER_MSG *msg);
+int vfio_user_reply_msg(int sockfd, VFIO_USER_MSG *msg);
+
+#endif
@@ -28,6 +28,7 @@ libraries = [
'rib', 'reorder', 'sched', 'security', 'stack', 'vhost',
# ipsec lib depends on net, crypto and security
'ipsec',
+ 'vfio_user',
#fib lib depends on rib
'fib',
# add pkt framework libs which use other libs from above