[dpdk-dev,v5] vfio: change to use generic multi-process channel

Message ID 1520175456-52990-1-git-send-email-jianfeng.tan@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Jianfeng Tan March 4, 2018, 2:57 p.m. UTC
  Previously, vfio uses its own private channel for the secondary
process to get container fd and group fd from the primary process.

This patch changes to use the generic mp channel.

Test:
  1. Bind two NICs to vfio-pci.

  2. Start the primary and secondary process.
    $ (symmetric_mp) -c 2 -- -p 3 --num-procs=2 --proc-id=0
    $ (symmetric_mp) -c 4 --proc-type=auto -- -p 3 \
				--num-procs=2 --proc-id=1

Cc: anatoly.burakov@intel.com

Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
---
 lib/librte_eal/linuxapp/eal/eal.c              |  14 +-
 lib/librte_eal/linuxapp/eal/eal_vfio.c         | 172 +++++------
 lib/librte_eal/linuxapp/eal/eal_vfio.h         |  15 +-
 lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c | 409 ++++---------------------
 4 files changed, 136 insertions(+), 474 deletions(-)
  

Comments

Anatoly Burakov March 14, 2018, 1:27 p.m. UTC | #1
On 04-Mar-18 2:57 PM, Jianfeng Tan wrote:
> Previously, vfio uses its own private channel for the secondary
> process to get container fd and group fd from the primary process.
> 
> This patch changes to use the generic mp channel.
> 
> Test:
>    1. Bind two NICs to vfio-pci.
> 
>    2. Start the primary and secondary process.
>      $ (symmetric_mp) -c 2 -- -p 3 --num-procs=2 --proc-id=0
>      $ (symmetric_mp) -c 4 --proc-type=auto -- -p 3 \
> 				--num-procs=2 --proc-id=1
> 
> Cc: anatoly.burakov@intel.com
> 
> Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
> ---

<...>

> -		ret = vfio_mp_sync_receive_request(socket_fd);
> -		switch (ret) {
> -		case SOCKET_NO_FD:
> -			close(socket_fd);
> -			return 0;
> -		case SOCKET_OK:
> -			vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
> -			/* if we got the fd, store it and return it */
> -			if (vfio_group_fd > 0) {
> -				close(socket_fd);
> -				cur_grp->group_no = iommu_group_no;
> -				cur_grp->fd = vfio_group_fd;
> -				vfio_cfg.vfio_active_groups++;
> -				return vfio_group_fd;
> -			}
> -			/* fall-through on error */
> -		default:
> -			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
> -			close(socket_fd);
> -			return -1;
> +	p->req = SOCKET_REQ_GROUP;
> +	p->group_no = iommu_group_no;
> +	strcpy(mp_req.name, "vfio");

"vfio" should probably be a #define. Also, i think the identifier is too 
short. Maybe "eal_vfio_mp_sync" or at least "eal_vfio" would be better?

> +	mp_req.len_param = sizeof(*p);
> +	mp_req.num_fds = 0;
> +
> +	vfio_group_fd = -1;
> +	if (rte_mp_request(&mp_req, &mp_reply, &ts) == 0 &&
> +	    mp_reply.nb_received == 1) {
> +		mp_rep = &mp_reply.msgs[0];
> +		p = (struct vfio_mp_param *)mp_rep->param;
> +		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
> +			cur_grp->group_no = iommu_group_no;
> +			vfio_group_fd = mp_rep->fds[0];
> +			cur_grp->fd = vfio_group_fd;
> +			vfio_cfg.vfio_active_groups++;
>   		}
> +		free(mp_reply.msgs);
>   	}
> -	return -1;
> +
> +	if (vfio_group_fd < 0)
> +		RTE_LOG(ERR, EAL, "  cannot request group fd\n");
> +	return vfio_group_fd;

p->result can be SOCKET_NO_FD, in which case returned value should be 
zero. I think this is missing from this code. There probably should be 
an "else if (p->result == SOCKET_NO_FD)" clause that sets return value to 0.

You should be able to test this by trying to set up a device for VFIO 
that isn't bound to VFIO driver, in a secondary process.

>   }
>   
>   
> @@ -200,7 +185,10 @@ int
>   rte_vfio_clear_group(int vfio_group_fd)
>   {
>   	int i;
> -	int socket_fd, ret;
> +	struct rte_mp_msg mp_req, *mp_rep;
> +	struct rte_mp_reply mp_reply;
> +	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
> +	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
>   
>   	if (internal_config.process_type == RTE_PROC_PRIMARY) {
>   
> @@ -214,43 +202,24 @@ rte_vfio_clear_group(int vfio_group_fd)
>   		return 0;
>   	}
>   
> -	/* This is just for SECONDARY processes */
> -	socket_fd = vfio_mp_sync_connect_to_primary();
> -
> -	if (socket_fd < 0) {
> -		RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
> -		return -1;
> -	}
> -
> -	if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) {
> -		RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
> -		close(socket_fd);
> -		return -1;
> -	}
> +	p->req = SOCKET_CLR_GROUP;
> +	p->group_no = vfio_group_fd;
> +	strcpy(mp_req.name, "vfio");

Same here, please use a #define here.

> +	mp_req.len_param = sizeof(*p);
> +	mp_req.num_fds = 0;
>   
> -	if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) {
> -		RTE_LOG(ERR, EAL, "  cannot send group fd!\n");
> -		close(socket_fd);
> -		return -1;
> +	if (rte_mp_request(&mp_req, &mp_reply, &ts) == 0 &&
> +	    mp_reply.nb_received == 1) {
> +		mp_rep = &mp_reply.msgs[0];
> +		p = (struct vfio_mp_param *)mp_rep->param;
> +		if (p->result == SOCKET_OK) {
> +			free(mp_reply.msgs);
> +			return 0;
> +		}
> +		free(mp_reply.msgs);
>   	}
>   
> -	ret = vfio_mp_sync_receive_request(socket_fd);
> -	switch (ret) {
> -	case SOCKET_NO_FD:
> -		RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");
> -		close(socket_fd);
> -		break;
> -	case SOCKET_OK:
> -		close(socket_fd);
> -		return 0;
> -	case SOCKET_ERR:
> -		RTE_LOG(ERR, EAL, "  Socket error\n");
> -		close(socket_fd);
> -		break;
> -	default:
> -		RTE_LOG(ERR, EAL, "  UNKNOWN reply, %d\n", ret);
> -		close(socket_fd);
> -	}
> +	RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");

Old error messages distinguished between "bad VFIO group fd" and other 
errors. You should probably only output this message of response was 
SOCKET_NO_FD, and output another message in case of other errors.

>   	return -1;
>   }
>   
> @@ -561,6 +530,11 @@ int
>   vfio_get_container_fd(void)
>   {

<...>

> -		vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
> -		if (vfio_container_fd < 0) {
> -			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
> -			close(socket_fd);
> -			return -1;
> +	}
> +	/*
> +	 * if we're in a secondary process, request container fd from the
> +	 * primary process via mp channel
> +	 */
> +	p->req = SOCKET_REQ_CONTAINER;
> +	strcpy(mp_req.name, "vfio");

Same here, please use #define here.

> +	mp_req.len_param = sizeof(*p);
> +	mp_req.num_fds = 0;
> +
> +	vfio_container_fd = -1;
> +	if (rte_mp_request(&mp_req, &mp_reply, &ts) == 0 &&
> +	    mp_reply.nb_received == 1) {
> +		mp_rep = &mp_reply.msgs[0];
> +		p = (struct vfio_mp_param *)mp_rep->param;
> +		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
> +			free(mp_reply.msgs);
> +			return mp_rep->fds[0];
>   		}
> -		close(socket_fd);
> -		return vfio_container_fd;
> +		free(mp_reply.msgs);

<...>

> -	/* set up a socket */
> -	socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
> -	if (socket_fd < 0) {
> -		RTE_LOG(ERR, EAL, "Failed to create socket!\n");
> -		return -1;
> -	}
> -
> -	get_socket_path(addr.sun_path, sizeof(addr.sun_path));
> -	addr.sun_family = AF_UNIX;
> -
> -	sockaddr_len = sizeof(struct sockaddr_un);
> -
> -	unlink(addr.sun_path);
> -
> -	ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len);
> -	if (ret) {
> -		RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno));
> -		close(socket_fd);
> +		break;
> +	case SOCKET_CLR_GROUP:
> +		r->req = SOCKET_CLR_GROUP;
> +		r->group_no = m->group_no;
> +		if (rte_vfio_clear_group(m->group_no) < 0)
> +			r->result = SOCKET_NO_FD;
> +		else
> +			r->result = SOCKET_OK;
> +		break;
> +	case SOCKET_REQ_CONTAINER:
> +		r->req = SOCKET_REQ_CONTAINER;
> +		fd = vfio_get_container_fd();
> +		if (fd < 0)
> +			r->result = SOCKET_ERR;
> +		else {
> +			r->result = SOCKET_OK;
> +			num = 1;
> +		}
> +		break;
> +	default:
> +		RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
>   		return -1;
>   	}
>   
> -	ret = listen(socket_fd, 50);
> -	if (ret) {
> -		RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno));
> -		close(socket_fd);
> -		return -1;
> +	if (num == 1) {
> +		reply.num_fds = 1;
> +		reply.fds[0] = fd;
>   	}

You're not saving any lines of code with this, but you are sacrificing 
code clarity :) I think this should be done inline, e.g. in "else" 
clause of SOCKET_REQ_CONTAINER and SOCKET_REQ_GROUP.

> +	strcpy(reply.name, "vfio");

Same here, please use #define.

> +	reply.len_param = sizeof(*r);
>   
> -	/* save the socket in local configuration */
> -	mp_socket_fd = socket_fd;
> -
> -	return 0;
> +	ret = rte_mp_reply(&reply, peer);
> +	if (m->req == SOCKET_REQ_CONTAINER && num == 1)

Why not just "fd >= 0"? No need for num variable then.

> +		close(fd);
> +	return ret;
>   }
>   
> -/*
> - * set up a local socket and tell it to listen for incoming connections
> - */
>   int
>   vfio_mp_sync_setup(void)
>   {
> -	int ret;
> -	char thread_name[RTE_MAX_THREAD_NAME_LEN];
> -
> -	if (vfio_mp_sync_socket_setup() < 0) {
> -		RTE_LOG(ERR, EAL, "Failed to set up local socket!\n");
> -		return -1;
> -	}
> -
> -	ret = pthread_create(&socket_thread, NULL,
> -			vfio_mp_sync_thread, NULL);
> -	if (ret) {
> -		RTE_LOG(ERR, EAL,
> -			"Failed to create thread for communication with secondary processes!\n");
> -		close(mp_socket_fd);
> -		return -1;
> -	}
> -
> -	/* Set thread_name for aid in debugging. */
> -	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "vfio-sync");
> -	ret = rte_thread_setname(socket_thread, thread_name);
> -	if (ret)
> -		RTE_LOG(DEBUG, EAL,
> -			"Failed to set thread name for secondary processes!\n");
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> +		return rte_mp_action_register("vfio", vfio_mp_primary);

Same here, please use #define.

>   
>   	return 0;
>   }
> 

Thanks for doing this patch!
  
Jianfeng Tan March 19, 2018, 6:53 a.m. UTC | #2
Hi Anatoly,

Thank you for the review. All your comments will be addressed in next version, except for below concern which might be taken care of in another patch if it also concerns you.

> -----Original Message-----

> From: Burakov, Anatoly

> Sent: Wednesday, March 14, 2018 9:27 PM

> To: Tan, Jianfeng; dev@dpdk.org

> Cc: Richardson, Bruce; Ananyev, Konstantin; thomas@monjalon.net

> Subject: Re: [PATCH v5] vfio: change to use generic multi-process channel

[...]
> 

> > +	mp_req.len_param = sizeof(*p);

> > +	mp_req.num_fds = 0;

> > +

> > +	vfio_group_fd = -1;

> > +	if (rte_mp_request(&mp_req, &mp_reply, &ts) == 0 &&

> > +	    mp_reply.nb_received == 1) {

> > +		mp_rep = &mp_reply.msgs[0];

> > +		p = (struct vfio_mp_param *)mp_rep->param;

> > +		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {

> > +			cur_grp->group_no = iommu_group_no;

> > +			vfio_group_fd = mp_rep->fds[0];

> > +			cur_grp->fd = vfio_group_fd;

> > +			vfio_cfg.vfio_active_groups++;

> >   		}

> > +		free(mp_reply.msgs);

> >   	}

> > -	return -1;

> > +

> > +	if (vfio_group_fd < 0)

> > +		RTE_LOG(ERR, EAL, "  cannot request group fd\n");

> > +	return vfio_group_fd;

> 

> p->result can be SOCKET_NO_FD, in which case returned value should be

> zero. I think this is missing from this code. There probably should be

> an "else if (p->result == SOCKET_NO_FD)" clause that sets return value to 0.

> 

> You should be able to test this by trying to set up a device for VFIO

> that isn't bound to VFIO driver, in a secondary process.


OK, I will fix this.

But really, "zero" could be ambiguous as a fd could, theoretically, be zero too.

Thanks,
Jianfeng
  
Anatoly Burakov March 20, 2018, 10:33 a.m. UTC | #3
On 19-Mar-18 6:53 AM, Tan, Jianfeng wrote:
> Hi Anatoly,
> 
> Thank you for the review. All your comments will be addressed in next version, except for below concern which might be taken care of in another patch if it also concerns you.
> 
>> -----Original Message-----
>> From: Burakov, Anatoly
>> Sent: Wednesday, March 14, 2018 9:27 PM
>> To: Tan, Jianfeng; dev@dpdk.org
>> Cc: Richardson, Bruce; Ananyev, Konstantin; thomas@monjalon.net
>> Subject: Re: [PATCH v5] vfio: change to use generic multi-process channel
> [...]
>>
>>> +	mp_req.len_param = sizeof(*p);
>>> +	mp_req.num_fds = 0;
>>> +
>>> +	vfio_group_fd = -1;
>>> +	if (rte_mp_request(&mp_req, &mp_reply, &ts) == 0 &&
>>> +	    mp_reply.nb_received == 1) {
>>> +		mp_rep = &mp_reply.msgs[0];
>>> +		p = (struct vfio_mp_param *)mp_rep->param;
>>> +		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
>>> +			cur_grp->group_no = iommu_group_no;
>>> +			vfio_group_fd = mp_rep->fds[0];
>>> +			cur_grp->fd = vfio_group_fd;
>>> +			vfio_cfg.vfio_active_groups++;
>>>    		}
>>> +		free(mp_reply.msgs);
>>>    	}
>>> -	return -1;
>>> +
>>> +	if (vfio_group_fd < 0)
>>> +		RTE_LOG(ERR, EAL, "  cannot request group fd\n");
>>> +	return vfio_group_fd;
>>
>> p->result can be SOCKET_NO_FD, in which case returned value should be
>> zero. I think this is missing from this code. There probably should be
>> an "else if (p->result == SOCKET_NO_FD)" clause that sets return value to 0.
>>
>> You should be able to test this by trying to set up a device for VFIO
>> that isn't bound to VFIO driver, in a secondary process.
> 
> OK, I will fix this.
> 
> But really, "zero" could be ambiguous as a fd could, theoretically, be zero too.

You're correct. Maybe return 0/-1 in case of success/failure and put fd 
into a pointer? i.e.

int func(int *vfio_group_fd) {
<...>
*vfio_group_fd = fd;
return 0;
}

> 
> Thanks,
> Jianfeng
>
  
Anatoly Burakov March 20, 2018, 10:56 a.m. UTC | #4
On 20-Mar-18 10:33 AM, Burakov, Anatoly wrote:
> On 19-Mar-18 6:53 AM, Tan, Jianfeng wrote:
>> Hi Anatoly,
>>
>> Thank you for the review. All your comments will be addressed in next 
>> version, except for below concern which might be taken care of in 
>> another patch if it also concerns you.
>>
>>> -----Original Message-----
>>> From: Burakov, Anatoly
>>> Sent: Wednesday, March 14, 2018 9:27 PM
>>> To: Tan, Jianfeng; dev@dpdk.org
>>> Cc: Richardson, Bruce; Ananyev, Konstantin; thomas@monjalon.net
>>> Subject: Re: [PATCH v5] vfio: change to use generic multi-process 
>>> channel
>> [...]
>>>
>>>> +    mp_req.len_param = sizeof(*p);
>>>> +    mp_req.num_fds = 0;
>>>> +
>>>> +    vfio_group_fd = -1;
>>>> +    if (rte_mp_request(&mp_req, &mp_reply, &ts) == 0 &&
>>>> +        mp_reply.nb_received == 1) {
>>>> +        mp_rep = &mp_reply.msgs[0];
>>>> +        p = (struct vfio_mp_param *)mp_rep->param;
>>>> +        if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
>>>> +            cur_grp->group_no = iommu_group_no;
>>>> +            vfio_group_fd = mp_rep->fds[0];
>>>> +            cur_grp->fd = vfio_group_fd;
>>>> +            vfio_cfg.vfio_active_groups++;
>>>>            }
>>>> +        free(mp_reply.msgs);
>>>>        }
>>>> -    return -1;
>>>> +
>>>> +    if (vfio_group_fd < 0)
>>>> +        RTE_LOG(ERR, EAL, "  cannot request group fd\n");
>>>> +    return vfio_group_fd;
>>>
>>> p->result can be SOCKET_NO_FD, in which case returned value should be
>>> zero. I think this is missing from this code. There probably should be
>>> an "else if (p->result == SOCKET_NO_FD)" clause that sets return 
>>> value to 0.
>>>
>>> You should be able to test this by trying to set up a device for VFIO
>>> that isn't bound to VFIO driver, in a secondary process.
>>
>> OK, I will fix this.
>>
>> But really, "zero" could be ambiguous as a fd could, theoretically, be 
>> zero too.
> 
> You're correct. Maybe return 0/-1 in case of success/failure and put fd 
> into a pointer? i.e.
> 
> int func(int *vfio_group_fd) {
> <...>
> *vfio_group_fd = fd;
> return 0;
> }

Or rather return 1/0/-1 depending on whether we got SOCKET_OK, 
SOCKET_NO_FD or SOCKET_ERR.
> 
>>
>> Thanks,
>> Jianfeng
>>
> 
>
  

Patch

diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 38306bf..4ca06f4 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -695,18 +695,8 @@  static int rte_eal_vfio_setup(void)
 		return -1;
 	vfio_enabled = rte_vfio_is_enabled("vfio");
 
-	if (vfio_enabled) {
-
-		/* if we are primary process, create a thread to communicate with
-		 * secondary processes. the thread will use a socket to wait for
-		 * requests from secondary process to send open file descriptors,
-		 * because VFIO does not allow multiple open descriptors on a group or
-		 * VFIO container.
-		 */
-		if (internal_config.process_type == RTE_PROC_PRIMARY &&
-				vfio_mp_sync_setup() < 0)
-			return -1;
-	}
+	if (vfio_enabled && vfio_mp_sync_setup() < 0)
+		return -1;
 
 	return 0;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index e44ae4d..d905e8e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -1,5 +1,5 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #include <string.h>
@@ -42,6 +42,10 @@  vfio_get_group_fd(int iommu_group_no)
 	int vfio_group_fd;
 	char filename[PATH_MAX];
 	struct vfio_group *cur_grp;
+	struct rte_mp_msg mp_req, *mp_rep;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
 
 	/* check if we already have the group descriptor open */
 	for (i = 0; i < VFIO_MAX_GROUPS; i++)
@@ -101,50 +105,31 @@  vfio_get_group_fd(int iommu_group_no)
 		return vfio_group_fd;
 	}
 	/* if we're in a secondary process, request group fd from the primary
-	 * process via our socket
+	 * process via mp channel
 	 */
-	else {
-		int socket_fd, ret;
-
-		socket_fd = vfio_mp_sync_connect_to_primary();
-
-		if (socket_fd < 0) {
-			RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-			return -1;
-		}
-		if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {
-			RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-			close(socket_fd);
-			return -1;
-		}
-		if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) {
-			RTE_LOG(ERR, EAL, "  cannot send group number!\n");
-			close(socket_fd);
-			return -1;
-		}
-		ret = vfio_mp_sync_receive_request(socket_fd);
-		switch (ret) {
-		case SOCKET_NO_FD:
-			close(socket_fd);
-			return 0;
-		case SOCKET_OK:
-			vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
-			/* if we got the fd, store it and return it */
-			if (vfio_group_fd > 0) {
-				close(socket_fd);
-				cur_grp->group_no = iommu_group_no;
-				cur_grp->fd = vfio_group_fd;
-				vfio_cfg.vfio_active_groups++;
-				return vfio_group_fd;
-			}
-			/* fall-through on error */
-		default:
-			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
-			close(socket_fd);
-			return -1;
+	p->req = SOCKET_REQ_GROUP;
+	p->group_no = iommu_group_no;
+	strcpy(mp_req.name, "vfio");
+	mp_req.len_param = sizeof(*p);
+	mp_req.num_fds = 0;
+
+	vfio_group_fd = -1;
+	if (rte_mp_request(&mp_req, &mp_reply, &ts) == 0 &&
+	    mp_reply.nb_received == 1) {
+		mp_rep = &mp_reply.msgs[0];
+		p = (struct vfio_mp_param *)mp_rep->param;
+		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+			cur_grp->group_no = iommu_group_no;
+			vfio_group_fd = mp_rep->fds[0];
+			cur_grp->fd = vfio_group_fd;
+			vfio_cfg.vfio_active_groups++;
 		}
+		free(mp_reply.msgs);
 	}
-	return -1;
+
+	if (vfio_group_fd < 0)
+		RTE_LOG(ERR, EAL, "  cannot request group fd\n");
+	return vfio_group_fd;
 }
 
 
@@ -200,7 +185,10 @@  int
 rte_vfio_clear_group(int vfio_group_fd)
 {
 	int i;
-	int socket_fd, ret;
+	struct rte_mp_msg mp_req, *mp_rep;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
 
 	if (internal_config.process_type == RTE_PROC_PRIMARY) {
 
@@ -214,43 +202,24 @@  rte_vfio_clear_group(int vfio_group_fd)
 		return 0;
 	}
 
-	/* This is just for SECONDARY processes */
-	socket_fd = vfio_mp_sync_connect_to_primary();
-
-	if (socket_fd < 0) {
-		RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-		return -1;
-	}
-
-	if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) {
-		RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-		close(socket_fd);
-		return -1;
-	}
+	p->req = SOCKET_CLR_GROUP;
+	p->group_no = vfio_group_fd;
+	strcpy(mp_req.name, "vfio");
+	mp_req.len_param = sizeof(*p);
+	mp_req.num_fds = 0;
 
-	if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) {
-		RTE_LOG(ERR, EAL, "  cannot send group fd!\n");
-		close(socket_fd);
-		return -1;
+	if (rte_mp_request(&mp_req, &mp_reply, &ts) == 0 &&
+	    mp_reply.nb_received == 1) {
+		mp_rep = &mp_reply.msgs[0];
+		p = (struct vfio_mp_param *)mp_rep->param;
+		if (p->result == SOCKET_OK) {
+			free(mp_reply.msgs);
+			return 0;
+		}
+		free(mp_reply.msgs);
 	}
 
-	ret = vfio_mp_sync_receive_request(socket_fd);
-	switch (ret) {
-	case SOCKET_NO_FD:
-		RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");
-		close(socket_fd);
-		break;
-	case SOCKET_OK:
-		close(socket_fd);
-		return 0;
-	case SOCKET_ERR:
-		RTE_LOG(ERR, EAL, "  Socket error\n");
-		close(socket_fd);
-		break;
-	default:
-		RTE_LOG(ERR, EAL, "  UNKNOWN reply, %d\n", ret);
-		close(socket_fd);
-	}
+	RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");
 	return -1;
 }
 
@@ -561,6 +530,11 @@  int
 vfio_get_container_fd(void)
 {
 	int ret, vfio_container_fd;
+	struct rte_mp_msg mp_req, *mp_rep;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+
 
 	/* if we're in a primary process, try to open the container */
 	if (internal_config.process_type == RTE_PROC_PRIMARY) {
@@ -591,33 +565,29 @@  vfio_get_container_fd(void)
 		}
 
 		return vfio_container_fd;
-	} else {
-		/*
-		 * if we're in a secondary process, request container fd from the
-		 * primary process via our socket
-		 */
-		int socket_fd;
-
-		socket_fd = vfio_mp_sync_connect_to_primary();
-		if (socket_fd < 0) {
-			RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-			return -1;
-		}
-		if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) {
-			RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-			close(socket_fd);
-			return -1;
-		}
-		vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
-		if (vfio_container_fd < 0) {
-			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
-			close(socket_fd);
-			return -1;
+	}
+	/*
+	 * if we're in a secondary process, request container fd from the
+	 * primary process via mp channel
+	 */
+	p->req = SOCKET_REQ_CONTAINER;
+	strcpy(mp_req.name, "vfio");
+	mp_req.len_param = sizeof(*p);
+	mp_req.num_fds = 0;
+
+	vfio_container_fd = -1;
+	if (rte_mp_request(&mp_req, &mp_reply, &ts) == 0 &&
+	    mp_reply.nb_received == 1) {
+		mp_rep = &mp_reply.msgs[0];
+		p = (struct vfio_mp_param *)mp_rep->param;
+		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+			free(mp_reply.msgs);
+			return mp_rep->fds[0];
 		}
-		close(socket_fd);
-		return vfio_container_fd;
+		free(mp_reply.msgs);
 	}
 
+	RTE_LOG(ERR, EAL, "  cannot request container fd\n");
 	return -1;
 }
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 8059577..6b48969 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -88,15 +88,6 @@  struct vfio_iommu_spapr_tce_info {
 #define VFIO_MAX_GROUPS RTE_MAX_VFIO_GROUPS
 
 /*
- * Function prototypes for VFIO multiprocess sync functions
- */
-int vfio_mp_sync_send_request(int socket, int req);
-int vfio_mp_sync_receive_request(int socket);
-int vfio_mp_sync_send_fd(int socket, int fd);
-int vfio_mp_sync_receive_fd(int socket);
-int vfio_mp_sync_connect_to_primary(void);
-
-/*
  * we don't need to store device fd's anywhere since they can be obtained from
  * the group fd via an ioctl() call.
  */
@@ -157,6 +148,12 @@  int vfio_mp_sync_setup(void);
 #define SOCKET_NO_FD 0x1
 #define SOCKET_ERR 0xFF
 
+struct vfio_mp_param {
+	int req;
+	int result;
+	int group_no;
+};
+
 #endif /* VFIO_PRESENT */
 
 #endif /* EAL_VFIO_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index 7cc3c15..c61cdb9 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -1,32 +1,15 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
+#include <unistd.h>
 #include <string.h>
-#include <fcntl.h>
-#include <sys/socket.h>
-#include <pthread.h>
-
-/* sys/un.h with __USE_MISC uses strlen, which is unsafe */
-#ifdef __USE_MISC
-#define REMOVED_USE_MISC
-#undef __USE_MISC
-#endif
-#include <sys/un.h>
-/* make sure we redefine __USE_MISC only if it was previously undefined */
-#ifdef REMOVED_USE_MISC
-#define __USE_MISC
-#undef REMOVED_USE_MISC
-#endif
 
 #include <rte_log.h>
-#include <rte_eal_memconfig.h>
-#include <rte_malloc.h>
 #include <rte_vfio.h>
+#include <rte_eal.h>
 
-#include "eal_filesystem.h"
 #include "eal_vfio.h"
-#include "eal_thread.h"
 
 /**
  * @file
@@ -37,358 +20,80 @@ 
 
 #ifdef VFIO_PRESENT
 
-#define SOCKET_PATH_FMT "%s/.%s_mp_socket"
-#define CMSGLEN (CMSG_LEN(sizeof(int)))
-#define FD_TO_CMSGHDR(fd, chdr) \
-		do {\
-			(chdr).cmsg_len = CMSGLEN;\
-			(chdr).cmsg_level = SOL_SOCKET;\
-			(chdr).cmsg_type = SCM_RIGHTS;\
-			memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\
-		} while (0)
-#define CMSGHDR_TO_FD(chdr, fd) \
-			memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd))
-
-static pthread_t socket_thread;
-static int mp_socket_fd;
-
-
-/* get socket path (/var/run if root, $HOME otherwise) */
-static void
-get_socket_path(char *buffer, int bufsz)
-{
-	const char *dir = "/var/run";
-	const char *home_dir = getenv("HOME");
-
-	if (getuid() != 0 && home_dir != NULL)
-		dir = home_dir;
-
-	/* use current prefix as file path */
-	snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir,
-			internal_config.hugefile_prefix);
-}
-
-
-
-/*
- * data flow for socket comm protocol:
- * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP
- * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number
- * 2. server receives message
- * 2a. in case of invalid group, SOCKET_ERR is sent back to client
- * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client
- * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd
- *
- * in case of any error, socket is closed.
- */
-
-/* send a request, return -1 on error */
-int
-vfio_mp_sync_send_request(int socket, int req)
-{
-	struct msghdr hdr;
-	struct iovec iov;
-	int buf;
-	int ret;
-
-	memset(&hdr, 0, sizeof(hdr));
-
-	buf = req;
-
-	hdr.msg_iov = &iov;
-	hdr.msg_iovlen = 1;
-	iov.iov_base = (char *) &buf;
-	iov.iov_len = sizeof(buf);
-
-	ret = sendmsg(socket, &hdr, 0);
-	if (ret < 0)
-		return -1;
-	return 0;
-}
-
-/* receive a request and return it */
-int
-vfio_mp_sync_receive_request(int socket)
-{
-	int buf;
-	struct msghdr hdr;
-	struct iovec iov;
-	int ret, req;
-
-	memset(&hdr, 0, sizeof(hdr));
-
-	buf = SOCKET_ERR;
-
-	hdr.msg_iov = &iov;
-	hdr.msg_iovlen = 1;
-	iov.iov_base = (char *) &buf;
-	iov.iov_len = sizeof(buf);
-
-	ret = recvmsg(socket, &hdr, 0);
-	if (ret < 0)
-		return -1;
-
-	req = buf;
-
-	return req;
-}
-
-/* send OK in message, fd in control message */
-int
-vfio_mp_sync_send_fd(int socket, int fd)
+static int
+vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
 {
-	int buf;
-	struct msghdr hdr;
-	struct cmsghdr *chdr;
-	char chdr_buf[CMSGLEN];
-	struct iovec iov;
+	int fd;
+	int num;
 	int ret;
+	struct rte_mp_msg reply;
+	struct vfio_mp_param *r = (struct vfio_mp_param *)reply.param;
+	const struct vfio_mp_param *m = (const struct vfio_mp_param *)msg->param;
 
-	chdr = (struct cmsghdr *) chdr_buf;
-	memset(chdr, 0, sizeof(chdr_buf));
-	memset(&hdr, 0, sizeof(hdr));
-
-	hdr.msg_iov = &iov;
-	hdr.msg_iovlen = 1;
-	iov.iov_base = (char *) &buf;
-	iov.iov_len = sizeof(buf);
-	hdr.msg_control = chdr;
-	hdr.msg_controllen = CMSGLEN;
-
-	buf = SOCKET_OK;
-	FD_TO_CMSGHDR(fd, *chdr);
-
-	ret = sendmsg(socket, &hdr, 0);
-	if (ret < 0)
-		return -1;
-	return 0;
-}
-
-/* receive OK in message, fd in control message */
-int
-vfio_mp_sync_receive_fd(int socket)
-{
-	int buf;
-	struct msghdr hdr;
-	struct cmsghdr *chdr;
-	char chdr_buf[CMSGLEN];
-	struct iovec iov;
-	int ret, req, fd;
-
-	buf = SOCKET_ERR;
-
-	chdr = (struct cmsghdr *) chdr_buf;
-	memset(chdr, 0, sizeof(chdr_buf));
-	memset(&hdr, 0, sizeof(hdr));
-
-	hdr.msg_iov = &iov;
-	hdr.msg_iovlen = 1;
-	iov.iov_base = (char *) &buf;
-	iov.iov_len = sizeof(buf);
-	hdr.msg_control = chdr;
-	hdr.msg_controllen = CMSGLEN;
-
-	ret = recvmsg(socket, &hdr, 0);
-	if (ret < 0)
-		return -1;
-
-	req = buf;
-
-	if (req != SOCKET_OK)
-		return -1;
-
-	CMSGHDR_TO_FD(*chdr, fd);
-
-	return fd;
-}
-
-/* connect socket_fd in secondary process to the primary process's socket */
-int
-vfio_mp_sync_connect_to_primary(void)
-{
-	struct sockaddr_un addr;
-	socklen_t sockaddr_len;
-	int socket_fd;
-
-	/* set up a socket */
-	socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
-	if (socket_fd < 0) {
-		RTE_LOG(ERR, EAL, "Failed to create socket!\n");
+	if (msg->len_param != sizeof(*m)) {
+		RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
 		return -1;
 	}
 
-	get_socket_path(addr.sun_path, sizeof(addr.sun_path));
-	addr.sun_family = AF_UNIX;
-
-	sockaddr_len = sizeof(struct sockaddr_un);
-
-	if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0)
-		return socket_fd;
-
-	/* if connect failed */
-	close(socket_fd);
-	return -1;
-}
-
-
-
-/*
- * socket listening thread for primary process
- */
-static __attribute__((noreturn)) void *
-vfio_mp_sync_thread(void __rte_unused * arg)
-{
-	int ret, fd, vfio_data;
-
-	/* wait for requests on the socket */
-	for (;;) {
-		int conn_sock;
-		struct sockaddr_un addr;
-		socklen_t sockaddr_len = sizeof(addr);
-
-		/* this is a blocking call */
-		conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr,
-				&sockaddr_len);
-
-		/* just restart on error */
-		if (conn_sock == -1)
-			continue;
-
-		/* set socket to linger after close */
-		struct linger l;
-		l.l_onoff = 1;
-		l.l_linger = 60;
-
-		if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0)
-			RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option "
-					"on listen socket (%s)\n", strerror(errno));
-
-		ret = vfio_mp_sync_receive_request(conn_sock);
-
-		switch (ret) {
-		case SOCKET_REQ_CONTAINER:
-			fd = vfio_get_container_fd();
-			if (fd < 0)
-				vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
-			else
-				vfio_mp_sync_send_fd(conn_sock, fd);
-			if (fd >= 0)
-				close(fd);
-			break;
-		case SOCKET_REQ_GROUP:
-			/* wait for group number */
-			vfio_data = vfio_mp_sync_receive_request(conn_sock);
-			if (vfio_data < 0) {
-				close(conn_sock);
-				continue;
-			}
-
-			fd = vfio_get_group_fd(vfio_data);
+	memset(&reply, 0, sizeof(reply));
 
-			if (fd < 0)
-				vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
+	switch (m->req) {
+	case SOCKET_REQ_GROUP:
+		r->req = SOCKET_REQ_GROUP;
+		r->group_no = m->group_no;
+		fd = vfio_get_group_fd(m->group_no);
+		if (fd < 0)
+			r->result = SOCKET_ERR;
+		else if (fd == 0)
 			/* if VFIO group exists but isn't bound to VFIO driver */
-			else if (fd == 0)
-				vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
+			r->result = SOCKET_NO_FD;
+		else {
 			/* if group exists and is bound to VFIO driver */
-			else {
-				vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
-				vfio_mp_sync_send_fd(conn_sock, fd);
-			}
-			break;
-		case SOCKET_CLR_GROUP:
-			/* wait for group fd */
-			vfio_data = vfio_mp_sync_receive_request(conn_sock);
-			if (vfio_data < 0) {
-				close(conn_sock);
-				continue;
-			}
-
-			ret = rte_vfio_clear_group(vfio_data);
-
-			if (ret < 0)
-				vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
-			else
-				vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
-			break;
-		default:
-			vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
-			break;
+			r->result = SOCKET_OK;
+			num = 1;
 		}
-		close(conn_sock);
-	}
-}
-
-static int
-vfio_mp_sync_socket_setup(void)
-{
-	int ret, socket_fd;
-	struct sockaddr_un addr;
-	socklen_t sockaddr_len;
-
-	/* set up a socket */
-	socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
-	if (socket_fd < 0) {
-		RTE_LOG(ERR, EAL, "Failed to create socket!\n");
-		return -1;
-	}
-
-	get_socket_path(addr.sun_path, sizeof(addr.sun_path));
-	addr.sun_family = AF_UNIX;
-
-	sockaddr_len = sizeof(struct sockaddr_un);
-
-	unlink(addr.sun_path);
-
-	ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len);
-	if (ret) {
-		RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno));
-		close(socket_fd);
+		break;
+	case SOCKET_CLR_GROUP:
+		r->req = SOCKET_CLR_GROUP;
+		r->group_no = m->group_no;
+		if (rte_vfio_clear_group(m->group_no) < 0)
+			r->result = SOCKET_NO_FD;
+		else
+			r->result = SOCKET_OK;
+		break;
+	case SOCKET_REQ_CONTAINER:
+		r->req = SOCKET_REQ_CONTAINER;
+		fd = vfio_get_container_fd();
+		if (fd < 0)
+			r->result = SOCKET_ERR;
+		else {
+			r->result = SOCKET_OK;
+			num = 1;
+		}
+		break;
+	default:
+		RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
 		return -1;
 	}
 
-	ret = listen(socket_fd, 50);
-	if (ret) {
-		RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno));
-		close(socket_fd);
-		return -1;
+	if (num == 1) {
+		reply.num_fds = 1;
+		reply.fds[0] = fd;
 	}
+	strcpy(reply.name, "vfio");
+	reply.len_param = sizeof(*r);
 
-	/* save the socket in local configuration */
-	mp_socket_fd = socket_fd;
-
-	return 0;
+	ret = rte_mp_reply(&reply, peer);
+	if (m->req == SOCKET_REQ_CONTAINER && num == 1)
+		close(fd);
+	return ret;
 }
 
-/*
- * set up a local socket and tell it to listen for incoming connections
- */
 int
 vfio_mp_sync_setup(void)
 {
-	int ret;
-	char thread_name[RTE_MAX_THREAD_NAME_LEN];
-
-	if (vfio_mp_sync_socket_setup() < 0) {
-		RTE_LOG(ERR, EAL, "Failed to set up local socket!\n");
-		return -1;
-	}
-
-	ret = pthread_create(&socket_thread, NULL,
-			vfio_mp_sync_thread, NULL);
-	if (ret) {
-		RTE_LOG(ERR, EAL,
-			"Failed to create thread for communication with secondary processes!\n");
-		close(mp_socket_fd);
-		return -1;
-	}
-
-	/* Set thread_name for aid in debugging. */
-	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "vfio-sync");
-	ret = rte_thread_setname(socket_thread, thread_name);
-	if (ret)
-		RTE_LOG(DEBUG, EAL,
-			"Failed to set thread name for secondary processes!\n");
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		return rte_mp_action_register("vfio", vfio_mp_primary);
 
 	return 0;
 }